cloud-runner-develop
Frostebite 2026-01-20 02:23:23 +00:00
parent 4b09fe3615
commit ad5dd3b9c1
1 changed files with 29 additions and 40 deletions

View File

@ -242,49 +242,38 @@ jobs:
echo "Cleaning up before pre-pulling image..."
for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "crictl rm --all 2>/dev/null || true" || true
docker exec "$NODE" sh -c "crictl rmi --prune 2>/dev/null || true" || true
# Only remove non-Unity images to preserve space while keeping Unity image if already cached
docker exec "$NODE" sh -c "for img in \$(crictl images -q 2>/dev/null); do repo=\$(crictl inspecti \$img --format '{{.repo}}' 2>/dev/null || echo ''); if echo \"\$repo\" | grep -qvE 'unityci/editor|unity'; then crictl rmi \$img 2>/dev/null || true; fi; done" || true
done || true
echo "Creating a temporary pod to pull and cache the Unity image..."
kubectl run image-puller --image="$UNITY_IMAGE" --restart=Never --command -- sleep 1 || true
echo "Waiting for image pull to complete (this may take several minutes for 3.9GB image)..."
# Wait for pod to be scheduled and image to be pulled
MAX_WAIT=600
ELAPSED=0
while [ $ELAPSED -lt $MAX_WAIT ]; do
POD_STATUS=$(kubectl get pod image-puller -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
if [ "$POD_STATUS" = "Running" ] || [ "$POD_STATUS" = "Succeeded" ]; then
echo "Image pull pod is $POD_STATUS"
break
elif [ "$POD_STATUS" = "Failed" ] || [ "$POD_STATUS" = "Evicted" ]; then
echo "Warning: Image pull pod status is $POD_STATUS. Checking events..."
kubectl describe pod image-puller 2>/dev/null | tail -20 || true
# Try to continue anyway - image might be partially cached
break
fi
sleep 5
ELAPSED=$((ELAPSED + 5))
echo "Waiting for image pull... ($ELAPSED/$MAX_WAIT seconds)"
done
sleep 10
kubectl delete pod image-puller --ignore-not-found=true || true
for i in {1..30}; do
if ! kubectl get pod image-puller 2>/dev/null; then
echo "Pre-pull pod fully deleted (attempt $i/30)"
break
fi
sleep 2
done
sleep 5
# Clean up the pod runtime but keep the image
# Explicitly pull the image on BOTH nodes to ensure it's cached wherever pods might be scheduled
# This prevents "no space left" errors when pods are scheduled on nodes without the cached image
echo "Pulling Unity image directly on each node to ensure it's cached..."
for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "
crictl rmp --all 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.runtime.v2.task/default -name '*image-puller*' -exec rm -rf {} + 2>/dev/null || true
" || true
echo "Checking if image already exists on $NODE..."
IMAGE_EXISTS=$(docker exec "$NODE" sh -c "crictl images | grep -q unityci/editor && echo 'yes' || echo 'no'" || echo "no")
if [ "$IMAGE_EXISTS" = "yes" ]; then
echo "Unity image already cached on $NODE, skipping pull"
else
echo "Pulling Unity image on $NODE (this may take several minutes for 3.9GB image)..."
# Use crictl pull directly in the node's containerd
# This ensures the image is cached in the node's local storage
# Use timeout to prevent hanging indefinitely (10 minutes max)
if timeout 600 docker exec "$NODE" sh -c "crictl pull $UNITY_IMAGE 2>&1"; then
echo "Successfully pulled image on $NODE"
# Verify it's cached
docker exec "$NODE" sh -c "crictl images | grep unityci/editor || echo 'Warning: Image not found after pull'" || true
else
PULL_EXIT_CODE=$?
if [ $PULL_EXIT_CODE -eq 124 ]; then
echo "Warning: Image pull on $NODE timed out after 10 minutes. Checking if partially cached..."
else
echo "Warning: Image pull on $NODE failed (exit code: $PULL_EXIT_CODE). Checking if partially cached..."
fi
docker exec "$NODE" sh -c "crictl images | grep unityci/editor || echo 'Image not found on $NODE'" || true
echo "Note: Pods scheduled on $NODE will attempt to pull the image during runtime, which may fail if disk space is insufficient."
fi
fi
done
# Verify image is cached