pr feedback

cloud-runner-develop
Frostebite 2026-01-17 03:52:38 +00:00
parent 100e542566
commit a60739249f
4 changed files with 148 additions and 24 deletions

View File

@ -229,12 +229,46 @@ jobs:
# Pre-pull the Unity image into the k3d cluster before running tests # Pre-pull the Unity image into the k3d cluster before running tests
# This ensures it's cached in the k3d node's containerd and won't need to be pulled during test execution # This ensures it's cached in the k3d node's containerd and won't need to be pulled during test execution
UNITY_IMAGE="unityci/editor:ubuntu-2021.3.45f1-base-3" UNITY_IMAGE="unityci/editor:ubuntu-2021.3.45f1-base-3"
# Check disk space before pulling
echo "Checking disk space before pre-pulling Unity image..."
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0 k3d-unity-builder-server-0}"
for NODE in $K3D_NODE_CONTAINERS; do
echo "Disk space in $NODE:"
docker exec "$NODE" sh -c "df -h /var/lib/rancher/k3s 2>/dev/null || df -h / 2>/dev/null || true" || true
done
# Clean up before pulling to ensure we have space
echo "Cleaning up before pre-pulling image..."
for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "crictl rm --all 2>/dev/null || true" || true
docker exec "$NODE" sh -c "crictl rmi --prune 2>/dev/null || true" || true
done || true
echo "Creating a temporary pod to pull and cache the Unity image..." echo "Creating a temporary pod to pull and cache the Unity image..."
kubectl run image-puller --image="$UNITY_IMAGE" --restart=Never --command -- sleep 1 || true kubectl run image-puller --image="$UNITY_IMAGE" --restart=Never --command -- sleep 1 || true
echo "Waiting for image pull to complete (this may take several minutes for 3.9GB image)..." echo "Waiting for image pull to complete (this may take several minutes for 3.9GB image)..."
timeout 600 kubectl wait --for=condition=Ready pod/image-puller --timeout=600s 2>/dev/null || \
timeout 600 kubectl wait --for=condition=PodScheduled pod/image-puller --timeout=600s 2>/dev/null || true # Wait for pod to be scheduled and image to be pulled
sleep 30 MAX_WAIT=600
ELAPSED=0
while [ $ELAPSED -lt $MAX_WAIT ]; do
POD_STATUS=$(kubectl get pod image-puller -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
if [ "$POD_STATUS" = "Running" ] || [ "$POD_STATUS" = "Succeeded" ]; then
echo "Image pull pod is $POD_STATUS"
break
elif [ "$POD_STATUS" = "Failed" ] || [ "$POD_STATUS" = "Evicted" ]; then
echo "Warning: Image pull pod status is $POD_STATUS. Checking events..."
kubectl describe pod image-puller 2>/dev/null | tail -20 || true
# Try to continue anyway - image might be partially cached
break
fi
sleep 5
ELAPSED=$((ELAPSED + 5))
echo "Waiting for image pull... ($ELAPSED/$MAX_WAIT seconds)"
done
sleep 10
kubectl delete pod image-puller --ignore-not-found=true || true kubectl delete pod image-puller --ignore-not-found=true || true
for i in {1..30}; do for i in {1..30}; do
if ! kubectl get pod image-puller 2>/dev/null; then if ! kubectl get pod image-puller 2>/dev/null; then
@ -244,11 +278,22 @@ jobs:
sleep 2 sleep 2
done done
sleep 5 sleep 5
docker exec k3d-unity-builder-server-0 sh -c "
# Clean up the pod runtime but keep the image
for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "
crictl rmp --all 2>/dev/null || true crictl rmp --all 2>/dev/null || true
find /var/lib/rancher/k3s/agent/containerd/io.containerd.runtime.v2.task/default -name '*image-puller*' -exec rm -rf {} + 2>/dev/null || true find /var/lib/rancher/k3s/agent/containerd/io.containerd.runtime.v2.task/default -name '*image-puller*' -exec rm -rf {} + 2>/dev/null || true
" || true " || true
echo "Image pre-pull completed. Image is now cached in k3d node." done
# Verify image is cached
echo "Checking if Unity image is cached..."
for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "crictl images | grep unityci/editor || echo 'Image not found in $NODE'" || true
done
echo "Image pre-pull completed. Image should be cached in k3d node."
- name: Clean up K8s test resources before tests - name: Clean up K8s test resources before tests
run: | run: |
echo "Cleaning up K8s test resources..." echo "Cleaning up K8s test resources..."
@ -374,6 +419,24 @@ jobs:
AWS_EC2_METADATA_DISABLED: 'true' AWS_EC2_METADATA_DISABLED: 'true'
GIT_PRIVATE_TOKEN: ${{ secrets.GIT_PRIVATE_TOKEN }} GIT_PRIVATE_TOKEN: ${{ secrets.GIT_PRIVATE_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GIT_PRIVATE_TOKEN }} GITHUB_TOKEN: ${{ secrets.GIT_PRIVATE_TOKEN }}
- name: Clean up disk space before end2end-retaining test
run: |
echo "Cleaning up disk space before end2end-retaining test..."
kubectl delete jobs --all --ignore-not-found=true -n default || true
kubectl get pods -n default -o name 2>/dev/null | grep -E "(unity-builder-job-|helper-pod-)" | while read pod; do
kubectl delete "$pod" --ignore-not-found=true || true
done || true
# Aggressive cleanup in k3d nodes
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0 k3d-unity-builder-server-0}"
for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "crictl rm --all 2>/dev/null || true" || true
docker exec "$NODE" sh -c "crictl rmi --prune 2>/dev/null || true" || true
docker exec "$NODE" sh -c "crictl images -q | xargs -r crictl rmi 2>/dev/null || true" || true
done || true
rm -rf ./cloud-runner-cache/* || true
docker system prune -f || true
echo "Disk usage before end2end-retaining test:"
df -h
- name: Run cloud-runner-end2end-retaining test (K8s) - name: Run cloud-runner-end2end-retaining test (K8s)
timeout-minutes: 60 timeout-minutes: 60
run: yarn run test "cloud-runner-end2end-retaining" --detectOpenHandles --forceExit --runInBand run: yarn run test "cloud-runner-end2end-retaining" --detectOpenHandles --forceExit --runInBand
@ -428,9 +491,20 @@ jobs:
done || true done || true
rm -rf ./cloud-runner-cache/* || true rm -rf ./cloud-runner-cache/* || true
docker system prune -af --volumes || true docker system prune -af --volumes || true
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0}" # Aggressive cleanup in k3d nodes to free ephemeral storage
K3D_NODE_CONTAINERS="${K3D_NODE_CONTAINERS:-k3d-unity-builder-agent-0 k3d-unity-builder-server-0}"
for NODE in $K3D_NODE_CONTAINERS; do for NODE in $K3D_NODE_CONTAINERS; do
docker exec "$NODE" sh -c "docker system prune -af --volumes 2>/dev/null || true" || true echo "Cleaning up $NODE..."
# Remove all stopped containers
docker exec "$NODE" sh -c "crictl rm --all 2>/dev/null || true" || true
# Remove all unused images
docker exec "$NODE" sh -c "crictl rmi --prune 2>/dev/null || true" || true
# Remove all images (more aggressive)
docker exec "$NODE" sh -c "crictl images -q | xargs -r crictl rmi 2>/dev/null || true" || true
# Clean up containerd content store
docker exec "$NODE" sh -c "crictl images --prune 2>/dev/null || true" || true
# Check disk space
docker exec "$NODE" sh -c "df -h /var/lib/rancher/k3s 2>/dev/null || df -h / 2>/dev/null || true" || true
done done
echo "Disk usage after K8s cleanup:" echo "Disk usage after K8s cleanup:"
df -h df -h

33
dist/index.js vendored
View File

@ -3756,9 +3756,36 @@ class Kubernetes {
try { try {
cloud_runner_logger_1.default.log('Cleaning up old images in k3d node before pulling new image...'); cloud_runner_logger_1.default.log('Cleaning up old images in k3d node before pulling new image...');
const { CloudRunnerSystem } = await Promise.resolve().then(() => __importStar(__nccwpck_require__(4197))); const { CloudRunnerSystem } = await Promise.resolve().then(() => __importStar(__nccwpck_require__(4197)));
// Clean up unused images in k3d node using containerd // More aggressive cleanup: remove all stopped containers, unused images, and containerd snapshots
await CloudRunnerSystem.Run('docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true', true, true); const cleanupCommands = [
await CloudRunnerSystem.Run('docker exec k3d-unity-builder-agent-0 sh -c "crictl images -q | head -n -1 | xargs -r crictl rmi 2>/dev/null || true" || true', true, true); // Remove all stopped containers
'docker exec k3d-unity-builder-agent-0 sh -c "crictl rm --all 2>/dev/null || true" || true',
// Remove all unused images (more aggressive)
'docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true',
// Remove all images except the one we might need (if any)
'docker exec k3d-unity-builder-agent-0 sh -c "crictl images -q | xargs -r crictl rmi 2>/dev/null || true" || true',
// Clean up containerd snapshots and layers
'docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune --all 2>/dev/null || true" || true',
// Clean up containerd content store (removes unused layers)
'docker exec k3d-unity-builder-agent-0 sh -c "crictl images --prune 2>/dev/null || true" || true',
];
for (const cmd of cleanupCommands) {
try {
await CloudRunnerSystem.Run(cmd, true, true);
}
catch (cmdError) {
// Ignore individual command failures
cloud_runner_logger_1.default.log(`Cleanup command failed (non-fatal): ${cmdError}`);
}
}
// Check disk space after cleanup
try {
const diskCheck = await CloudRunnerSystem.Run('docker exec k3d-unity-builder-agent-0 sh -c "df -h /var/lib/rancher/k3s 2>/dev/null || df -h / 2>/dev/null || true" || true', true, true);
cloud_runner_logger_1.default.log(`Disk space in k3d node after cleanup:\n${diskCheck}`);
}
catch {
// Ignore disk check failures
}
} }
catch (cleanupError) { catch (cleanupError) {
cloud_runner_logger_1.default.logWarning(`Failed to cleanup images before job creation: ${cleanupError}`); cloud_runner_logger_1.default.logWarning(`Failed to cleanup images before job creation: ${cleanupError}`);

2
dist/index.js.map vendored

File diff suppressed because one or more lines are too long

View File

@ -161,17 +161,40 @@ class Kubernetes implements ProviderInterface {
try { try {
CloudRunnerLogger.log('Cleaning up old images in k3d node before pulling new image...'); CloudRunnerLogger.log('Cleaning up old images in k3d node before pulling new image...');
const { CloudRunnerSystem } = await import('../../services/core/cloud-runner-system'); const { CloudRunnerSystem } = await import('../../services/core/cloud-runner-system');
// Clean up unused images in k3d node using containerd // More aggressive cleanup: remove all stopped containers, unused images, and containerd snapshots
await CloudRunnerSystem.Run( const cleanupCommands = [
// Remove all stopped containers
'docker exec k3d-unity-builder-agent-0 sh -c "crictl rm --all 2>/dev/null || true" || true',
// Remove all unused images (more aggressive)
'docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true', 'docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true',
// Remove all images except the one we might need (if any)
'docker exec k3d-unity-builder-agent-0 sh -c "crictl images -q | xargs -r crictl rmi 2>/dev/null || true" || true',
// Clean up containerd snapshots and layers
'docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune --all 2>/dev/null || true" || true',
// Clean up containerd content store (removes unused layers)
'docker exec k3d-unity-builder-agent-0 sh -c "crictl images --prune 2>/dev/null || true" || true',
];
for (const cmd of cleanupCommands) {
try {
await CloudRunnerSystem.Run(cmd, true, true);
} catch (cmdError) {
// Ignore individual command failures
CloudRunnerLogger.log(`Cleanup command failed (non-fatal): ${cmdError}`);
}
}
// Check disk space after cleanup
try {
const diskCheck = await CloudRunnerSystem.Run(
'docker exec k3d-unity-builder-agent-0 sh -c "df -h /var/lib/rancher/k3s 2>/dev/null || df -h / 2>/dev/null || true" || true',
true, true,
true, true,
); );
await CloudRunnerSystem.Run( CloudRunnerLogger.log(`Disk space in k3d node after cleanup:\n${diskCheck}`);
'docker exec k3d-unity-builder-agent-0 sh -c "crictl images -q | head -n -1 | xargs -r crictl rmi 2>/dev/null || true" || true', } catch {
true, // Ignore disk check failures
true, }
);
} catch (cleanupError) { } catch (cleanupError) {
CloudRunnerLogger.logWarning(`Failed to cleanup images before job creation: ${cleanupError}`); CloudRunnerLogger.logWarning(`Failed to cleanup images before job creation: ${cleanupError}`);
// Continue anyway - image might already be cached // Continue anyway - image might already be cached