pr feedback

cloud-runner-develop
Frostebite 2025-12-29 23:43:22 +00:00
commit 9dc0888c46
1 changed files with 131 additions and 41 deletions

View File

@ -29,7 +29,7 @@ jobs:
name: Cloud Runner Tests (K8s) name: Cloud Runner Tests (K8s)
runs-on: ubuntu-latest runs-on: ubuntu-latest
env: env:
K3D_NODE_CONTAINERS: "k3d-unity-builder-server-0 k3d-unity-builder-agent-0" K3D_NODE_CONTAINERS: "k3d-unity-builder-agent-0"
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
with: with:
@ -95,18 +95,28 @@ jobs:
# Clean up disk space on the k3d node to prevent evictions and disk pressure # Clean up disk space on the k3d node to prevent evictions and disk pressure
echo "Cleaning up disk space on k3d nodes..." echo "Cleaning up disk space on k3d nodes..."
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}" K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
cleanup_k3d_nodes() {
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "df -h && docker system prune -af --volumes || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/log -type f -name '*.log' -delete 2>/dev/null || true
find /tmp -type f -delete 2>/dev/null || true
df -h /
" || true
done done
}
cleanup_k3d_nodes
docker system prune -af --volumes || true docker system prune -af --volumes || true
# Wait for disk pressure taints to clear (with timeout) # Wait for disk pressure taints to clear (with timeout)
echo "Checking for disk pressure taints on nodes..." echo "Checking for disk pressure taints on nodes..."
for i in {1..30}; do for i in {1..30}; do
if kubectl describe nodes | grep -q "node.kubernetes.io/disk-pressure"; then if kubectl describe nodes | grep -q "node.kubernetes.io/disk-pressure"; then
echo "Disk pressure detected, waiting for it to clear... ($i/30)" echo "Disk pressure detected, waiting for it to clear... ($i/30)"
for NODE in $K3D_NODE_CONTAINERS; do cleanup_k3d_nodes
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
done
docker system prune -af --volumes || true docker system prune -af --volumes || true
sleep 2 sleep 2
else else
@ -196,28 +206,37 @@ jobs:
echo "Cleaning up disk space on k3d node..." echo "Cleaning up disk space on k3d node..."
# Use containerd/crictl commands (docker not available in k3d nodes) # Use containerd/crictl commands (docker not available in k3d nodes)
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}" K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
cleanup_k3d_nodes() {
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "crictl rmi --prune 2>/dev/null || true" || true docker exec "$NODE" sh -c "
docker exec "$NODE" sh -c "crictl rmp --all 2>/dev/null || true" || true crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/log -type f -name '*.log' -delete 2>/dev/null || true
find /tmp -type f -delete 2>/dev/null || true
find /var/lib/rancher/k3s -type f -name '*.log' -delete 2>/dev/null || true
df -h /
" || true
done done
}
cleanup_k3d_nodes
# Clean up containerd snapshots and images more aggressively # Clean up containerd snapshots and images more aggressively
for NODE in $K3D_NODE_CONTAINERS; do cleanup_k3d_nodes
docker exec "$NODE" sh -c "find /var/lib/rancher/k3s/agent/containerd -type d -name 'snapshots' -exec rm -rf {}/* 2>/dev/null \; || true" || true
done
# Clean up old logs and temporary files
for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "find /var/log -type f -name '*.log' -delete 2>/dev/null || true" || true
docker exec "$NODE" sh -c "find /tmp -type f -delete 2>/dev/null || true" || true
docker exec "$NODE" sh -c "find /var/lib/rancher/k3s -type f -name '*.log' -delete 2>/dev/null || true" || true
docker exec "$NODE" sh -c "df -h" 2>/dev/null || true
done
# Wait for disk pressure taints to clear before proceeding # Wait for disk pressure taints to clear before proceeding
echo "Checking for disk pressure taints..." echo "Checking for disk pressure taints..."
for i in {1..20}; do for i in {1..20}; do
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
echo "Disk pressure detected, cleaning up and waiting... ($i/20)" echo "Disk pressure detected, cleaning up and waiting... ($i/20)"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
docker system prune -af --volumes || true docker system prune -af --volumes || true
sleep 3 sleep 3
@ -398,14 +417,26 @@ jobs:
# Clean up disk space on k3d node # Clean up disk space on k3d node
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}" K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
# Wait for disk pressure to clear # Wait for disk pressure to clear
for i in {1..15}; do for i in {1..15}; do
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
echo "Disk pressure detected, cleaning up... ($i/15)" echo "Disk pressure detected, cleaning up... ($i/15)"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
docker system prune -af --volumes || true docker system prune -af --volumes || true
sleep 2 sleep 2
@ -421,17 +452,29 @@ jobs:
docker system prune -af --volumes || true docker system prune -af --volumes || true
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}" K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
for i in {1..30}; do for i in {1..30}; do
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
echo "Disk pressure detected, cleaning up aggressively... ($i/30)" echo "Disk pressure detected, cleaning up aggressively... ($i/30)"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
docker system prune -af --volumes || true docker system prune -af --volumes || true
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker images -q | xargs -r docker rmi -f 2>/dev/null || true" || true docker exec "$NODE" sh -c "crictl images -q | xargs -r crictl rmi 2>/dev/null || true" || true
done done
sleep 3 sleep 3
else else
@ -506,14 +549,26 @@ jobs:
# Clean up disk space on k3d node # Clean up disk space on k3d node
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}" K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
# Wait for disk pressure to clear # Wait for disk pressure to clear
for i in {1..15}; do for i in {1..15}; do
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
echo "Disk pressure detected, cleaning up... ($i/15)" echo "Disk pressure detected, cleaning up... ($i/15)"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
docker system prune -af --volumes || true docker system prune -af --volumes || true
sleep 2 sleep 2
@ -529,7 +584,13 @@ jobs:
docker system prune -af --volumes || true docker system prune -af --volumes || true
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}" K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
# Wait for disk pressure taints to clear (with aggressive cleanup) # Wait for disk pressure taints to clear (with aggressive cleanup)
# Limit to 10 attempts to avoid timeout - if cleanup doesn't work, just remove the taint # Limit to 10 attempts to avoid timeout - if cleanup doesn't work, just remove the taint
@ -546,8 +607,11 @@ jobs:
# Use k3s/containerd commands instead of docker (docker not available in k3d nodes) # Use k3s/containerd commands instead of docker (docker not available in k3d nodes)
# Clean up k3s containerd snapshots and images # Clean up k3s containerd snapshots and images
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "crictl rmi --prune 2>/dev/null || true" || true docker exec "$NODE" sh -c "
docker exec "$NODE" sh -c "crictl rmp --all 2>/dev/null || true" || true crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
" || true
done done
# Clean up old containerd snapshots # Clean up old containerd snapshots
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
@ -555,9 +619,11 @@ jobs:
done done
# Clean up k3s logs and temp files # Clean up k3s logs and temp files
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "find /var/lib/rancher/k3s -type f -name '*.log' -delete 2>/dev/null || true" || true docker exec "$NODE" sh -c "
docker exec "$NODE" sh -c "find /tmp -type f -mtime +0 -delete 2>/dev/null || true" || true find /var/lib/rancher/k3s -type f -name '*.log' -delete 2>/dev/null || true
docker exec "$NODE" sh -c "find /var/log -type f -name '*.log' -mtime +0 -delete 2>/dev/null || true" || true find /tmp -type f -mtime +0 -delete 2>/dev/null || true
find /var/log -type f -name '*.log' -mtime +0 -delete 2>/dev/null || true
" || true
done done
# Clean up host docker # Clean up host docker
docker system prune -af --volumes || true docker system prune -af --volumes || true
@ -716,17 +782,29 @@ jobs:
docker system prune -af --volumes || true docker system prune -af --volumes || true
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}" K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
for i in {1..30}; do for i in {1..30}; do
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
echo "Disk pressure detected, cleaning up aggressively... ($i/30)" echo "Disk pressure detected, cleaning up aggressively... ($i/30)"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
docker system prune -af --volumes || true docker system prune -af --volumes || true
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker images -q | xargs -r docker rmi -f 2>/dev/null || true" || true docker exec "$NODE" sh -c "crictl images -q | xargs -r crictl rmi 2>/dev/null || true" || true
done done
sleep 3 sleep 3
else else
@ -824,17 +902,29 @@ jobs:
docker system prune -af --volumes || true docker system prune -af --volumes || true
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}" K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
for i in {1..30}; do for i in {1..30}; do
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
echo "Disk pressure detected, cleaning up aggressively... ($i/30)" echo "Disk pressure detected, cleaning up aggressively... ($i/30)"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true docker exec "$NODE" sh -c "
crictl rmi --prune 2>/dev/null || true
crictl rmp --all 2>/dev/null || true
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
" || true
done done
docker system prune -af --volumes || true docker system prune -af --volumes || true
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker images -q | xargs -r docker rmi -f 2>/dev/null || true" || true docker exec "$NODE" sh -c "crictl images -q | xargs -r crictl rmi 2>/dev/null || true" || true
done done
sleep 3 sleep 3
else else