pr feedback

cloud-runner-develop
Frostebite 2026-01-17 05:48:22 +00:00
parent 0497076eba
commit 5f552f2bc2
10 changed files with 76 additions and 60 deletions

14
dist/index.js vendored
View File

@ -3762,17 +3762,17 @@ class Kubernetes {
const imageTag = image.split(':')[1] || 'latest';
// More targeted cleanup: remove stopped containers and non-Unity images
// IMPORTANT: Preserve Unity images to avoid re-pulling the 3.9GB image
// Strategy: Only remove containers, don't prune images (which might remove Unity image)
const cleanupCommands = [
// Remove all stopped containers (this frees runtime space but keeps images)
'docker exec k3d-unity-builder-agent-0 sh -c "crictl rm --all 2>/dev/null || true" || true',
'docker exec k3d-unity-builder-server-0 sh -c "crictl rm --all 2>/dev/null || true" || true',
// Remove non-Unity images only (preserve unityci/editor images)
// List all images, filter out Unity images, then remove the rest
'docker exec k3d-unity-builder-agent-0 sh -c "crictl images --format \\"table {{.ID}}\\t{{.Repository}}\\" 2>/dev/null | grep -vE \\"unityci/editor|unity|IMAGE\\" | awk \\"{print \\$1}\\" | xargs -r crictl rmi 2>/dev/null || true" || true',
'docker exec k3d-unity-builder-server-0 sh -c "crictl images --format \\"table {{.ID}}\\t{{.Repository}}\\" 2>/dev/null | grep -vE \\"unityci/editor|unity|IMAGE\\" | awk \\"{print \\$1}\\" | xargs -r crictl rmi 2>/dev/null || true" || true',
// Clean up unused layers/snapshots (prune should preserve referenced images)
'docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true',
'docker exec k3d-unity-builder-server-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true',
// Remove specific non-Unity images by name (safer than filtering)
// Only remove known system images, preserve everything else including Unity
'docker exec k3d-unity-builder-agent-0 sh -c "crictl images --format \\"{{.Repository}}:{{.Tag}}\\" 2>/dev/null | grep -vE \\"unityci/editor|unity\\" | grep -E \\"rancher/|curlimages/|amazon/aws-cli|rclone/rclone|steamcmd/steamcmd|ubuntu:|alpine:\\" | xargs -r -I {} crictl rmi {} 2>/dev/null || true" || true',
'docker exec k3d-unity-builder-server-0 sh -c "crictl images --format \\"{{.Repository}}:{{.Tag}}\\" 2>/dev/null | grep -vE \\"unityci/editor|unity\\" | grep -E \\"rancher/|curlimages/|amazon/aws-cli|rclone/rclone|steamcmd/steamcmd|ubuntu:|alpine:\\" | xargs -r -I {} crictl rmi {} 2>/dev/null || true" || true',
// DO NOT use --prune as it might remove the Unity image if no containers are using it
// Only clean up if we have very little space left
];
for (const cmd of cleanupCommands) {
try {

2
dist/index.js.map vendored

File diff suppressed because one or more lines are too long

View File

@ -91,10 +91,14 @@ class CloudRunner {
let provider = CloudRunner.buildParameters.providerStrategy;
if (provider === 'aws' && isLocalStack && !forceAwsProvider) {
CloudRunnerLogger.log('LocalStack endpoints detected; routing provider to local-docker for this run');
CloudRunnerLogger.log('Note: Set AWS_FORCE_PROVIDER=aws to force AWS provider with LocalStack for AWS functionality tests');
CloudRunnerLogger.log(
'Note: Set AWS_FORCE_PROVIDER=aws to force AWS provider with LocalStack for AWS functionality tests',
);
provider = 'local-docker';
} else if (provider === 'aws' && isLocalStack && forceAwsProvider) {
CloudRunnerLogger.log('LocalStack endpoints detected but AWS_FORCE_PROVIDER is set; using AWS provider to validate AWS functionality');
CloudRunnerLogger.log(
'LocalStack endpoints detected but AWS_FORCE_PROVIDER is set; using AWS provider to validate AWS functionality',
);
}
switch (provider) {
@ -107,7 +111,9 @@ class CloudRunner {
if (isLocalStack && forceAwsProvider) {
CloudRunnerLogger.log('✓ AWS provider initialized with LocalStack - AWS functionality will be validated');
} else if (isLocalStack && !forceAwsProvider) {
CloudRunnerLogger.log('⚠ WARNING: AWS provider was requested but LocalStack detected without AWS_FORCE_PROVIDER');
CloudRunnerLogger.log(
'⚠ WARNING: AWS provider was requested but LocalStack detected without AWS_FORCE_PROVIDER',
);
CloudRunnerLogger.log('⚠ This may cause AWS functionality tests to fail validation');
}
break;

View File

@ -169,17 +169,17 @@ class Kubernetes implements ProviderInterface {
// More targeted cleanup: remove stopped containers and non-Unity images
// IMPORTANT: Preserve Unity images to avoid re-pulling the 3.9GB image
// Strategy: Only remove containers, don't prune images (which might remove Unity image)
const cleanupCommands = [
// Remove all stopped containers (this frees runtime space but keeps images)
'docker exec k3d-unity-builder-agent-0 sh -c "crictl rm --all 2>/dev/null || true" || true',
'docker exec k3d-unity-builder-server-0 sh -c "crictl rm --all 2>/dev/null || true" || true',
// Remove non-Unity images only (preserve unityci/editor images)
// List all images, filter out Unity images, then remove the rest
'docker exec k3d-unity-builder-agent-0 sh -c "crictl images --format \\"table {{.ID}}\\t{{.Repository}}\\" 2>/dev/null | grep -vE \\"unityci/editor|unity|IMAGE\\" | awk \\"{print \\$1}\\" | xargs -r crictl rmi 2>/dev/null || true" || true',
'docker exec k3d-unity-builder-server-0 sh -c "crictl images --format \\"table {{.ID}}\\t{{.Repository}}\\" 2>/dev/null | grep -vE \\"unityci/editor|unity|IMAGE\\" | awk \\"{print \\$1}\\" | xargs -r crictl rmi 2>/dev/null || true" || true',
// Clean up unused layers/snapshots (prune should preserve referenced images)
'docker exec k3d-unity-builder-agent-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true',
'docker exec k3d-unity-builder-server-0 sh -c "crictl rmi --prune 2>/dev/null || true" || true',
// Remove specific non-Unity images by name (safer than filtering)
// Only remove known system images, preserve everything else including Unity
'docker exec k3d-unity-builder-agent-0 sh -c "crictl images --format \\"{{.Repository}}:{{.Tag}}\\" 2>/dev/null | grep -vE \\"unityci/editor|unity\\" | grep -E \\"rancher/|curlimages/|amazon/aws-cli|rclone/rclone|steamcmd/steamcmd|ubuntu:|alpine:\\" | xargs -r -I {} crictl rmi {} 2>/dev/null || true" || true',
'docker exec k3d-unity-builder-server-0 sh -c "crictl images --format \\"{{.Repository}}:{{.Tag}}\\" 2>/dev/null | grep -vE \\"unityci/editor|unity\\" | grep -E \\"rancher/|curlimages/|amazon/aws-cli|rclone/rclone|steamcmd/steamcmd|ubuntu:|alpine:\\" | xargs -r -I {} crictl rmi {} 2>/dev/null || true" || true',
// DO NOT use --prune as it might remove the Unity image if no containers are using it
// Only clean up if we have very little space left
];
for (const cmd of cleanupCommands) {

View File

@ -148,15 +148,17 @@ class KubernetesPods {
}
// Check if pod was evicted due to disk pressure - this is an infrastructure issue
const wasEvicted = errorDetails.some((detail) =>
detail.toLowerCase().includes('evicted') || detail.toLowerCase().includes('diskpressure'),
const wasEvicted = errorDetails.some(
(detail) => detail.toLowerCase().includes('evicted') || detail.toLowerCase().includes('diskpressure'),
);
if (wasEvicted) {
const evictionMessage = `Pod ${podName} was evicted due to disk pressure. This is a test infrastructure issue - the cluster doesn't have enough disk space.`;
CloudRunnerLogger.logWarning(evictionMessage);
CloudRunnerLogger.log(`Pod details: ${errorDetails.join('\n')}`);
throw new Error(
`${evictionMessage}\nThis indicates the test environment needs more disk space or better cleanup.\n${errorDetails.join('\n')}`,
`${evictionMessage}\nThis indicates the test environment needs more disk space or better cleanup.\n${errorDetails.join(
'\n',
)}`,
);
}

View File

@ -519,7 +519,9 @@ class KubernetesTaskRunner {
// If actively pulling image, reset pending count to allow more time
// Large images (like Unity 3.9GB) can take 3-5 minutes to pull
if (isPullingImage && consecutivePendingCount > 4) {
CloudRunnerLogger.log(`Pod ${podName} is pulling image (check ${consecutivePendingCount}). This may take several minutes for large images.`);
CloudRunnerLogger.log(
`Pod ${podName} is pulling image (check ${consecutivePendingCount}). This may take several minutes for large images.`,
);
// Don't increment consecutivePendingCount if we're actively pulling
consecutivePendingCount = Math.max(4, consecutivePendingCount - 1);
}
@ -530,9 +532,10 @@ class KubernetesTaskRunner {
// For tests, allow more time if image is being pulled (large images need 5+ minutes)
// Otherwise fail faster if stuck in Pending (2 minutes = 8 checks at 15s interval)
const isTest = process.env['cloudRunnerTests'] === 'true';
const isPullingImage = containerStatuses.some(
(cs: any) => cs.state?.waiting?.reason === 'ImagePull' || cs.state?.waiting?.reason === 'ErrImagePull',
) || conditions.some((c: any) => c.reason?.includes('Pulling'));
const isPullingImage =
containerStatuses.some(
(cs: any) => cs.state?.waiting?.reason === 'ImagePull' || cs.state?.waiting?.reason === 'ErrImagePull',
) || conditions.some((c: any) => c.reason?.includes('Pulling'));
// Allow up to 20 minutes for image pulls in tests (80 checks), 2 minutes otherwise
const maxPendingChecks = isTest && isPullingImage ? 80 : isTest ? 8 : 80;
@ -559,7 +562,9 @@ class KubernetesTaskRunner {
// Check container resource requests
if (podSpec?.containers?.[0]?.resources?.requests) {
const requests = podSpec.containers[0].resources.requests;
message += `\n\nContainer Resource Requests:\n CPU: ${requests.cpu || 'not set'}\n Memory: ${requests.memory || 'not set'}\n Ephemeral Storage: ${requests['ephemeral-storage'] || 'not set'}`;
message += `\n\nContainer Resource Requests:\n CPU: ${requests.cpu || 'not set'}\n Memory: ${
requests.memory || 'not set'
}\n Ephemeral Storage: ${requests['ephemeral-storage'] || 'not set'}`;
}
// Check node selector and tolerations
@ -572,9 +577,13 @@ class KubernetesTaskRunner {
// Check pod conditions for scheduling issues
if (podStatusDetails?.conditions) {
const unschedulable = podStatusDetails.conditions.find((c: any) => c.type === 'PodScheduled' && c.status === 'False');
const unschedulable = podStatusDetails.conditions.find(
(c: any) => c.type === 'PodScheduled' && c.status === 'False',
);
if (unschedulable) {
message += `\n\nScheduling Issue: ${unschedulable.reason || 'Unknown'} - ${unschedulable.message || 'No message'}`;
message += `\n\nScheduling Issue: ${unschedulable.reason || 'Unknown'} - ${
unschedulable.message || 'No message'
}`;
}
}
} catch (podStatusError) {

View File

@ -194,7 +194,9 @@ export class ContainerHookService {
ENDPOINT_ARGS=""
if [ -n "$AWS_S3_ENDPOINT" ]; then ENDPOINT_ARGS="--endpoint-url $AWS_S3_ENDPOINT"; fi
aws $ENDPOINT_ARGS s3 ls ${CloudRunner.buildParameters.awsStackName}/cloud-runner-cache/ 2>/dev/null || true
aws $ENDPOINT_ARGS s3 ls ${CloudRunner.buildParameters.awsStackName}/cloud-runner-cache/$CACHE_KEY/ 2>/dev/null || true
aws $ENDPOINT_ARGS s3 ls ${
CloudRunner.buildParameters.awsStackName
}/cloud-runner-cache/$CACHE_KEY/ 2>/dev/null || true
BUCKET1="${CloudRunner.buildParameters.awsStackName}/cloud-runner-cache/$CACHE_KEY/Library/"
OBJECT1=""
LS_OUTPUT1="$(aws $ENDPOINT_ARGS s3 ls $BUCKET1 2>/dev/null || echo '')"

View File

@ -60,15 +60,10 @@ describe('Cloud Runner Retain Workspace', () => {
true,
true,
);
// Remove non-Unity images only (preserve unityci/editor images)
// Only remove specific known system images, preserve Unity and everything else
// DO NOT use --prune as it might remove Unity image
await CloudRunnerSystem.Run(
`docker exec ${NODE} sh -c "crictl images --format 'table {{.ID}}\\t{{.Repository}}' 2>/dev/null | grep -vE 'unityci/editor|unity|IMAGE' | awk '{print \\$1}' | xargs -r crictl rmi 2>/dev/null || true" || true`,
true,
true,
);
// Clean up unused layers
await CloudRunnerSystem.Run(
`docker exec ${NODE} sh -c "crictl rmi --prune 2>/dev/null || true" || true`,
`docker exec ${NODE} sh -c "crictl images --format '{{.Repository}}:{{.Tag}}' 2>/dev/null | grep -vE 'unityci/editor|unity' | grep -E 'rancher/|curlimages/|amazon/aws-cli|rclone/rclone|steamcmd/steamcmd|ubuntu:|alpine:' | xargs -r -I {} crictl rmi {} 2>/dev/null || true" || true`,
true,
true,
);

View File

@ -55,13 +55,15 @@ describe('Cloud Runner Kubernetes', () => {
// Check if pod was evicted due to resource constraints - this is a test infrastructure failure
// Evictions indicate the cluster doesn't have enough resources, which is a test environment issue
if (results.includes('The node was low on resource: ephemeral-storage') ||
results.includes('TerminationByKubelet') ||
results.includes('Evicted')) {
if (
results.includes('The node was low on resource: ephemeral-storage') ||
results.includes('TerminationByKubelet') ||
results.includes('Evicted')
) {
throw new Error(
`Test failed: Pod was evicted due to resource constraints (ephemeral-storage). ` +
`This indicates the test environment doesn't have enough disk space. ` +
`Results: ${results.substring(0, 500)}`
`This indicates the test environment doesn't have enough disk space. ` +
`Results: ${results.substring(0, 500)}`,
);
}