Harden k3d cleanup to avoid disk exhaustion
parent
9eb6e27272
commit
9f60a75602
|
|
@ -28,6 +28,8 @@ jobs:
|
|||
k8s:
|
||||
name: Cloud Runner Tests (K8s)
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
K3D_NODE_CONTAINERS: "k3d-unity-builder-agent-0"
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
|
|
@ -92,14 +94,29 @@ jobs:
|
|||
echo "Cluster connectivity test - if this fails, LocalStack may not be accessible from k3d"
|
||||
# Clean up disk space on the k3d node to prevent evictions and disk pressure
|
||||
echo "Cleaning up disk space on k3d nodes..."
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "df -h && docker system prune -af --volumes || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
cleanup_k3d_nodes() {
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/log -type f -name '*.log' -delete 2>/dev/null || true
|
||||
find /tmp -type f -delete 2>/dev/null || true
|
||||
df -h /
|
||||
" || true
|
||||
done
|
||||
}
|
||||
cleanup_k3d_nodes
|
||||
docker system prune -af --volumes || true
|
||||
# Wait for disk pressure taints to clear (with timeout)
|
||||
echo "Checking for disk pressure taints on nodes..."
|
||||
for i in {1..30}; do
|
||||
if kubectl describe nodes | grep -q "node.kubernetes.io/disk-pressure"; then
|
||||
echo "Disk pressure detected, waiting for it to clear... ($i/30)"
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
cleanup_k3d_nodes
|
||||
docker system prune -af --volumes || true
|
||||
sleep 2
|
||||
else
|
||||
|
|
@ -113,14 +130,21 @@ jobs:
|
|||
run: |
|
||||
echo "Pre-pulling Unity image into k3d node to avoid evictions during tests..."
|
||||
# Clean up old images first to make space
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "crictl rmi --prune 2>/dev/null || true" || true
|
||||
done
|
||||
# Pre-pull the Unity image that will be used in tests
|
||||
# This ensures it's cached and doesn't need to be pulled during test execution
|
||||
UNITY_IMAGE="unityci/editor:ubuntu-2021.3.45f1-base-3"
|
||||
echo "Pulling ${UNITY_IMAGE} into k3d node..."
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "crictl pull ${UNITY_IMAGE} 2>&1 || echo 'Image pull failed or already exists'" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "crictl pull ${UNITY_IMAGE} 2>&1 || echo 'Image pull failed or already exists'" || true
|
||||
done
|
||||
echo "Image pre-pull completed. Checking disk space..."
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "df -h / | tail -1" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "df -h / | tail -1" || true
|
||||
done
|
||||
- uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: 20
|
||||
|
|
@ -181,21 +205,39 @@ jobs:
|
|||
# Clean up disk space on k3d node to prevent ephemeral-storage evictions and disk pressure
|
||||
echo "Cleaning up disk space on k3d node..."
|
||||
# Use containerd/crictl commands (docker not available in k3d nodes)
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "crictl rmp --all 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
cleanup_k3d_nodes() {
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/log -type f -name '*.log' -delete 2>/dev/null || true
|
||||
find /tmp -type f -delete 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s -type f -name '*.log' -delete 2>/dev/null || true
|
||||
df -h /
|
||||
" || true
|
||||
done
|
||||
}
|
||||
cleanup_k3d_nodes
|
||||
# Clean up containerd snapshots and images more aggressively
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /var/lib/rancher/k3s/agent/containerd -type d -name 'snapshots' -exec rm -rf {}/* 2>/dev/null \; || true" || true
|
||||
# Clean up old logs and temporary files
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /var/log -type f -name '*.log' -delete 2>/dev/null || true" || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /tmp -type f -delete 2>/dev/null || true" || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /var/lib/rancher/k3s -type f -name '*.log' -delete 2>/dev/null || true" || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "df -h" 2>/dev/null || true
|
||||
cleanup_k3d_nodes
|
||||
# Wait for disk pressure taints to clear before proceeding
|
||||
echo "Checking for disk pressure taints..."
|
||||
for i in {1..20}; do
|
||||
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
|
||||
echo "Disk pressure detected, cleaning up and waiting... ($i/20)"
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
docker system prune -af --volumes || true
|
||||
sleep 3
|
||||
else
|
||||
|
|
@ -209,33 +251,43 @@ jobs:
|
|||
echo "Ensuring disk pressure is cleared before test..."
|
||||
rm -rf ./cloud-runner-cache/* || true
|
||||
docker system prune -af --volumes || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
done
|
||||
# Wait for disk pressure taints to clear (with aggressive cleanup)
|
||||
# Limit to 10 attempts to avoid timeout - if cleanup doesn't work, just remove the taint
|
||||
PREVIOUS_DISK_USAGE=100
|
||||
for i in {1..10}; do
|
||||
HAS_DISK_PRESSURE=$(kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure" && echo "true" || echo "false")
|
||||
if [ "$HAS_DISK_PRESSURE" = "true" ]; then
|
||||
echo "Disk pressure detected, cleaning up aggressively... ($i/10)"
|
||||
# Check actual disk usage on the node
|
||||
DISK_USAGE=$(docker exec k3d-unity-builder-agent-0 sh -c "df -h / 2>/dev/null | tail -1 | awk '{print \$5}' | sed 's/%//'" || echo "unknown")
|
||||
echo "Current disk usage on k3d node: ${DISK_USAGE}%"
|
||||
|
||||
# Use k3s/containerd commands instead of docker (docker not available in k3d nodes)
|
||||
# Clean up k3s containerd snapshots and images
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "crictl rmp --all 2>/dev/null || true" || true
|
||||
# Clean up old containerd snapshots
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /var/lib/rancher/k3s/agent/containerd -type d -name 'snapshots' -exec rm -rf {}/* 2>/dev/null \; || true" || true
|
||||
# Clean up k3s logs and temp files
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /var/lib/rancher/k3s -type f -name '*.log' -delete 2>/dev/null || true" || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /tmp -type f -mtime +0 -delete 2>/dev/null || true" || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /var/log -type f -name '*.log' -mtime +0 -delete 2>/dev/null || true" || true
|
||||
# Clean up host docker
|
||||
docker system prune -af --volumes || true
|
||||
|
||||
# Check if disk usage improved
|
||||
NEW_DISK_USAGE=$(docker exec k3d-unity-builder-agent-0 sh -c "df -h / 2>/dev/null | tail -1 | awk '{print \$5}' | sed 's/%//'" || echo "unknown")
|
||||
if [ "$HAS_DISK_PRESSURE" = "true" ]; then
|
||||
echo "Disk pressure detected, cleaning up aggressively... ($i/10)"
|
||||
# Check actual disk usage on the node
|
||||
PRIMARY_NODE=$(echo "$K3D_NODE_CONTAINERS" | awk '{print $1}')
|
||||
DISK_USAGE=$(docker exec "$PRIMARY_NODE" sh -c "df -h / 2>/dev/null | tail -1 | awk '{print \$5}' | sed 's/%//'" || echo "unknown")
|
||||
echo "Current disk usage on k3d node: ${DISK_USAGE}%"
|
||||
|
||||
# Use k3s/containerd commands instead of docker (docker not available in k3d nodes)
|
||||
# Clean up k3s containerd snapshots and images
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "crictl rmi --prune 2>/dev/null || true" || true
|
||||
docker exec "$NODE" sh -c "crictl rmp --all 2>/dev/null || true" || true
|
||||
done
|
||||
# Clean up old containerd snapshots
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "find /var/lib/rancher/k3s/agent/containerd -type d -name 'snapshots' -exec rm -rf {}/* 2>/dev/null \; || true" || true
|
||||
done
|
||||
# Clean up k3s logs and temp files
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "find /var/lib/rancher/k3s -type f -name '*.log' -delete 2>/dev/null || true" || true
|
||||
docker exec "$NODE" sh -c "find /tmp -type f -mtime +0 -delete 2>/dev/null || true" || true
|
||||
docker exec "$NODE" sh -c "find /var/log -type f -name '*.log' -mtime +0 -delete 2>/dev/null || true" || true
|
||||
done
|
||||
# Clean up host docker
|
||||
docker system prune -af --volumes || true
|
||||
|
||||
# Check if disk usage improved
|
||||
NEW_DISK_USAGE=$(docker exec "$PRIMARY_NODE" sh -c "df -h / 2>/dev/null | tail -1 | awk '{print \$5}' | sed 's/%//'" || echo "unknown")
|
||||
if [ "$NEW_DISK_USAGE" != "unknown" ] && [ "$PREVIOUS_DISK_USAGE" != "unknown" ]; then
|
||||
if [ "$NEW_DISK_USAGE" -ge "$PREVIOUS_DISK_USAGE" ] && [ "$i" -ge 3 ]; then
|
||||
echo "Disk usage not improving (${PREVIOUS_DISK_USAGE}% -> ${NEW_DISK_USAGE}%), breaking cleanup loop and removing taint manually"
|
||||
|
|
@ -363,12 +415,29 @@ jobs:
|
|||
rm -rf ./cloud-runner-cache/* || true
|
||||
docker system prune -af --volumes || true
|
||||
# Clean up disk space on k3d node
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
# Wait for disk pressure to clear
|
||||
for i in {1..15}; do
|
||||
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
|
||||
echo "Disk pressure detected, cleaning up... ($i/15)"
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
docker system prune -af --volumes || true
|
||||
sleep 2
|
||||
else
|
||||
|
|
@ -381,13 +450,32 @@ jobs:
|
|||
echo "Ensuring disk pressure is cleared before test..."
|
||||
rm -rf ./cloud-runner-cache/* || true
|
||||
docker system prune -af --volumes || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
for i in {1..30}; do
|
||||
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
|
||||
echo "Disk pressure detected, cleaning up aggressively... ($i/30)"
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
docker system prune -af --volumes || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker images -q | xargs -r docker rmi -f 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "crictl images -q | xargs -r crictl rmi 2>/dev/null || true" || true
|
||||
done
|
||||
sleep 3
|
||||
else
|
||||
echo "No disk pressure taints found, proceeding with test"
|
||||
|
|
@ -459,12 +547,29 @@ jobs:
|
|||
rm -rf ./cloud-runner-cache/* || true
|
||||
docker system prune -af --volumes || true
|
||||
# Clean up disk space on k3d node
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
# Wait for disk pressure to clear
|
||||
for i in {1..15}; do
|
||||
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
|
||||
echo "Disk pressure detected, cleaning up... ($i/15)"
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
docker system prune -af --volumes || true
|
||||
sleep 2
|
||||
else
|
||||
|
|
@ -477,7 +582,16 @@ jobs:
|
|||
echo "Ensuring disk pressure is cleared before test..."
|
||||
rm -rf ./cloud-runner-cache/* || true
|
||||
docker system prune -af --volumes || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
# Wait for disk pressure taints to clear (with aggressive cleanup)
|
||||
# Limit to 10 attempts to avoid timeout - if cleanup doesn't work, just remove the taint
|
||||
PREVIOUS_DISK_USAGE=100
|
||||
|
|
@ -486,24 +600,36 @@ jobs:
|
|||
if [ "$HAS_DISK_PRESSURE" = "true" ]; then
|
||||
echo "Disk pressure detected, cleaning up aggressively... ($i/10)"
|
||||
# Check actual disk usage on the node
|
||||
DISK_USAGE=$(docker exec k3d-unity-builder-agent-0 sh -c "df -h / 2>/dev/null | tail -1 | awk '{print \$5}' | sed 's/%//'" || echo "unknown")
|
||||
PRIMARY_NODE=$(echo "$K3D_NODE_CONTAINERS" | awk '{print $1}')
|
||||
DISK_USAGE=$(docker exec "$PRIMARY_NODE" sh -c "df -h / 2>/dev/null | tail -1 | awk '{print \$5}' | sed 's/%//'" || echo "unknown")
|
||||
echo "Current disk usage on k3d node: ${DISK_USAGE}%"
|
||||
|
||||
# Use k3s/containerd commands instead of docker (docker not available in k3d nodes)
|
||||
# Clean up k3s containerd snapshots and images
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "crictl rmp --all 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
# Clean up old containerd snapshots
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /var/lib/rancher/k3s/agent/containerd -type d -name 'snapshots' -exec rm -rf {}/* 2>/dev/null \; || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "find /var/lib/rancher/k3s/agent/containerd -type d -name 'snapshots' -exec rm -rf {}/* 2>/dev/null \; || true" || true
|
||||
done
|
||||
# Clean up k3s logs and temp files
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /var/lib/rancher/k3s -type f -name '*.log' -delete 2>/dev/null || true" || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /tmp -type f -mtime +0 -delete 2>/dev/null || true" || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "find /var/log -type f -name '*.log' -mtime +0 -delete 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
find /var/lib/rancher/k3s -type f -name '*.log' -delete 2>/dev/null || true
|
||||
find /tmp -type f -mtime +0 -delete 2>/dev/null || true
|
||||
find /var/log -type f -name '*.log' -mtime +0 -delete 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
# Clean up host docker
|
||||
docker system prune -af --volumes || true
|
||||
|
||||
# Check if disk usage improved
|
||||
NEW_DISK_USAGE=$(docker exec k3d-unity-builder-agent-0 sh -c "df -h / 2>/dev/null | tail -1 | awk '{print \$5}' | sed 's/%//'" || echo "unknown")
|
||||
NEW_DISK_USAGE=$(docker exec "$PRIMARY_NODE" sh -c "df -h / 2>/dev/null | tail -1 | awk '{print \$5}' | sed 's/%//'" || echo "unknown")
|
||||
if [ "$NEW_DISK_USAGE" != "unknown" ] && [ "$PREVIOUS_DISK_USAGE" != "unknown" ]; then
|
||||
if [ "$NEW_DISK_USAGE" -ge "$PREVIOUS_DISK_USAGE" ] && [ "$i" -ge 3 ]; then
|
||||
echo "Disk usage not improving (${PREVIOUS_DISK_USAGE}% -> ${NEW_DISK_USAGE}%), breaking cleanup loop and removing taint manually"
|
||||
|
|
@ -631,12 +757,17 @@ jobs:
|
|||
rm -rf ./cloud-runner-cache/* || true
|
||||
docker system prune -af --volumes || true
|
||||
# Clean up disk space on k3d node
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
done
|
||||
# Wait for disk pressure to clear
|
||||
for i in {1..15}; do
|
||||
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
|
||||
echo "Disk pressure detected, cleaning up... ($i/15)"
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
done
|
||||
docker system prune -af --volumes || true
|
||||
sleep 2
|
||||
else
|
||||
|
|
@ -649,13 +780,32 @@ jobs:
|
|||
echo "Ensuring disk pressure is cleared before test..."
|
||||
rm -rf ./cloud-runner-cache/* || true
|
||||
docker system prune -af --volumes || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
for i in {1..30}; do
|
||||
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
|
||||
echo "Disk pressure detected, cleaning up aggressively... ($i/30)"
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
docker system prune -af --volumes || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker images -q | xargs -r docker rmi -f 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "crictl images -q | xargs -r crictl rmi 2>/dev/null || true" || true
|
||||
done
|
||||
sleep 3
|
||||
else
|
||||
echo "No disk pressure taints found, proceeding with test"
|
||||
|
|
@ -727,12 +877,17 @@ jobs:
|
|||
rm -rf ./cloud-runner-cache/* || true
|
||||
docker system prune -af --volumes || true
|
||||
# Clean up disk space on k3d node
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
done
|
||||
# Wait for disk pressure to clear
|
||||
for i in {1..15}; do
|
||||
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
|
||||
echo "Disk pressure detected, cleaning up... ($i/15)"
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
done
|
||||
docker system prune -af --volumes || true
|
||||
sleep 2
|
||||
else
|
||||
|
|
@ -745,13 +900,32 @@ jobs:
|
|||
echo "Ensuring disk pressure is cleared before test..."
|
||||
rm -rf ./cloud-runner-cache/* || true
|
||||
docker system prune -af --volumes || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
for i in {1..30}; do
|
||||
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
|
||||
echo "Disk pressure detected, cleaning up aggressively... ($i/30)"
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "
|
||||
crictl rmi --prune 2>/dev/null || true
|
||||
crictl rmp --all 2>/dev/null || true
|
||||
crictl images -q | xargs -r crictl rmi 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/agent/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
find /var/lib/rancher/k3s/storage -mindepth 1 -maxdepth 1 -type d -exec rm -rf {} + 2>/dev/null || true
|
||||
" || true
|
||||
done
|
||||
docker system prune -af --volumes || true
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker images -q | xargs -r docker rmi -f 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "crictl images -q | xargs -r crictl rmi 2>/dev/null || true" || true
|
||||
done
|
||||
sleep 3
|
||||
else
|
||||
echo "No disk pressure taints found, proceeding with test"
|
||||
|
|
@ -822,12 +996,17 @@ jobs:
|
|||
rm -rf ./cloud-runner-cache/* || true
|
||||
docker system prune -af --volumes || true
|
||||
# Clean up disk space on k3d node
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}"
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
done
|
||||
# Wait for disk pressure to clear
|
||||
for i in {1..15}; do
|
||||
if kubectl describe nodes 2>/dev/null | grep -q "node.kubernetes.io/disk-pressure"; then
|
||||
echo "Disk pressure detected, cleaning up... ($i/15)"
|
||||
docker exec k3d-unity-builder-agent-0 sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
for NODE in $K3D_NODE_CONTAINERS; do
|
||||
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true
|
||||
done
|
||||
docker system prune -af --volumes || true
|
||||
sleep 2
|
||||
else
|
||||
|
|
|
|||
Loading…
Reference in New Issue