From 45e7ed0fcbc357562230cff9451062901aa4110e Mon Sep 17 00:00:00 2001
From: Frostebite <jas.f.ukcmti@gmail.com>
Date: Mon, 29 Dec 2025 18:26:09 +0000
Subject: [PATCH] pr feedback - fix taint removal syntax

---
 .github/workflows/cloud-runner-integrity.yml | 40 +++++++++++++++++++-
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/cloud-runner-integrity.yml b/.github/workflows/cloud-runner-integrity.yml
index 85aaf21f..4bfb54b9 100644
--- a/.github/workflows/cloud-runner-integrity.yml
+++ b/.github/workflows/cloud-runner-integrity.yml
@@ -238,13 +238,20 @@ jobs:
             fi
           done
           # If disk pressure taint is still present after cleanup, manually remove it (CI only)
+          # Try multiple times as Kubernetes may re-add it if condition persists
           if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
             echo "WARNING: Disk pressure taint still present after cleanup. Manually removing taint for CI..."
             NODE_NAMES=$(kubectl get nodes -o name 2>/dev/null | sed 's/node\///' || echo "")
             for node in $NODE_NAMES; do
+              # Try removing with NoSchedule effect (most common)
+              kubectl taint nodes "$node" node.kubernetes.io/disk-pressure:NoSchedule- 2>/dev/null || true
+              # Also try without effect specifier
               kubectl taint nodes "$node" node.kubernetes.io/disk-pressure- 2>/dev/null || true
+              # Use patch as fallback
+              kubectl patch node "$node" -p '{"spec":{"taints":[]}}' 2>/dev/null || true
             done
-            echo "Taint removed. Checking nodes..."
+            sleep 2
+            echo "Taint removal attempted. Checking nodes..."
             kubectl describe nodes | grep -i taint || echo "No taints found"
           fi
           # Wait for disk pressure condition to clear (not just taint)
@@ -264,9 +271,20 @@ jobs:
             echo "WARNING: Disk pressure condition still exists. Removing taint and waiting 10 seconds..."
             NODE_NAMES=$(kubectl get nodes -o name 2>/dev/null | sed 's/node\///' || echo "")
             for node in $NODE_NAMES; do
+              # Try removing with NoSchedule effect (most common)
+              kubectl taint nodes "$node" node.kubernetes.io/disk-pressure:NoSchedule- 2>/dev/null || true
+              # Also try without effect specifier
               kubectl taint nodes "$node" node.kubernetes.io/disk-pressure- 2>/dev/null || true
+              # Use patch as fallback to remove all taints
+              kubectl patch node "$node" -p '{"spec":{"taints":[]}}' 2>/dev/null || true
             done
             sleep 10
+            # Verify taint is actually removed
+            if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
+              echo "ERROR: Taint still present after removal attempts. This may cause pod scheduling issues."
+            else
+              echo "Taint successfully removed."
+            fi
           fi
       - name: Run cloud-runner-image test (validate image creation)
         timeout-minutes: 10
@@ -488,13 +506,20 @@ jobs:
             fi
           done
           # If disk pressure taint is still present after cleanup, manually remove it (CI only)
+          # Try multiple times as Kubernetes may re-add it if condition persists
           if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
             echo "WARNING: Disk pressure taint still present after cleanup. Manually removing taint for CI..."
             NODE_NAMES=$(kubectl get nodes -o name 2>/dev/null | sed 's/node\///' || echo "")
             for node in $NODE_NAMES; do
+              # Try removing with NoSchedule effect (most common)
+              kubectl taint nodes "$node" node.kubernetes.io/disk-pressure:NoSchedule- 2>/dev/null || true
+              # Also try without effect specifier
               kubectl taint nodes "$node" node.kubernetes.io/disk-pressure- 2>/dev/null || true
+              # Use patch as fallback
+              kubectl patch node "$node" -p '{"spec":{"taints":[]}}' 2>/dev/null || true
             done
-            echo "Taint removed. Checking nodes..."
+            sleep 2
+            echo "Taint removal attempted. Checking nodes..."
             kubectl describe nodes | grep -i taint || echo "No taints found"
           fi
           # Wait for disk pressure condition to clear (not just taint)
@@ -514,9 +539,20 @@ jobs:
             echo "WARNING: Disk pressure condition still exists. Removing taint and waiting 10 seconds..."
             NODE_NAMES=$(kubectl get nodes -o name 2>/dev/null | sed 's/node\///' || echo "")
             for node in $NODE_NAMES; do
+              # Try removing with NoSchedule effect (most common)
+              kubectl taint nodes "$node" node.kubernetes.io/disk-pressure:NoSchedule- 2>/dev/null || true
+              # Also try without effect specifier
               kubectl taint nodes "$node" node.kubernetes.io/disk-pressure- 2>/dev/null || true
+              # Use patch as fallback to remove all taints
+              kubectl patch node "$node" -p '{"spec":{"taints":[]}}' 2>/dev/null || true
             done
             sleep 10
+            # Verify taint is actually removed
+            if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
+              echo "ERROR: Taint still present after removal attempts. This may cause pod scheduling issues."
+            else
+              echo "Taint successfully removed."
+            fi
           fi
       - name: Run cloud-runner-s3-steps test (validate S3 operations with K8s)
         timeout-minutes: 30