PR feedback

cloud-runner-develop
Frostebite 2025-12-05 23:07:08 +00:00
parent 956b2e4324
commit bea818fb9c
4 changed files with 104 additions and 5 deletions

49
dist/index.js vendored
View File

@ -4002,6 +4002,9 @@ class KubernetesPods {
}
// Check if only PreStopHook failed but container succeeded
const hasPreStopHookFailure = events.some((e) => e.reason === 'FailedPreStopHook');
const wasKilled = events.some((e) => e.reason === 'Killing');
// If container succeeded (exit code 0), PreStopHook failure is non-critical
// Also check if pod was killed but container might have succeeded
if (containerSucceeded && containerExitCode === 0) {
// Container succeeded - PreStopHook failure is non-critical
if (hasPreStopHookFailure) {
@ -4014,6 +4017,13 @@ class KubernetesPods {
// Don't throw error - container succeeded, PreStopHook failure is non-critical
return false; // Pod is not running, but we don't treat it as a failure
}
// If pod was killed and we have PreStopHook failure but no container status yet, wait a bit
// The container might have succeeded but status hasn't been updated yet
if (wasKilled && hasPreStopHookFailure && containerExitCode === undefined) {
cloud_runner_logger_1.default.log(`Pod ${podName} was killed with PreStopHook failure, but container status not yet available. This may be non-fatal if container succeeded.`);
// Still throw error for now, but with more context
// The task runner will retry and get the actual container status
}
const errorMessage = `K8s pod failed\n${errorDetails.join('\n')}`;
cloud_runner_logger_1.default.log(errorMessage);
throw new Error(errorMessage);
@ -5202,7 +5212,44 @@ class Caching {
process.chdir(`${startPath}`);
return;
}
await cloud_runner_system_1.CloudRunnerSystem.Run(`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${node_path_1.default.basename(sourceFolder)}"`);
// Check disk space before creating tar archive
try {
const diskCheckOutput = await cloud_runner_system_1.CloudRunnerSystem.Run(`df -h . 2>/dev/null || df -h /data 2>/dev/null || true`);
cloud_runner_logger_1.default.log(`Disk space before tar: ${diskCheckOutput}`);
}
catch (error) {
// Ignore disk check errors
}
// Clean up any existing incomplete tar files
try {
await cloud_runner_system_1.CloudRunnerSystem.Run(`rm -f ${cacheArtifactName}.tar${compressionSuffix} 2>/dev/null || true`);
}
catch (error) {
// Ignore cleanup errors
}
try {
await cloud_runner_system_1.CloudRunnerSystem.Run(`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${node_path_1.default.basename(sourceFolder)}"`);
}
catch (error) {
// Check if error is due to disk space
const errorMessage = error?.message || error?.toString() || '';
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
cloud_runner_logger_1.default.log(`Disk space error detected. Attempting cleanup...`);
// Try to clean up old cache files
try {
const cacheParent = node_path_1.default.dirname(cacheFolder);
if (await fileExists(cacheParent)) {
// Find and remove old cache entries (keep only the most recent)
await cloud_runner_system_1.CloudRunnerSystem.Run(`find ${cacheParent} -name "*.tar*" -type f -mtime +1 -delete 2>/dev/null || true`);
}
}
catch (cleanupError) {
cloud_runner_logger_1.default.log(`Cleanup attempt failed: ${cleanupError}`);
}
throw new Error(`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Please free up disk space and retry.`);
}
throw error;
}
await cloud_runner_system_1.CloudRunnerSystem.Run(`du ${cacheArtifactName}.tar${compressionSuffix}`);
(0, node_console_1.assert)(await fileExists(`${cacheArtifactName}.tar${compressionSuffix}`), 'cache archive exists');
(0, node_console_1.assert)(await fileExists(node_path_1.default.basename(sourceFolder)), 'source folder exists');

2
dist/index.js.map vendored

File diff suppressed because one or more lines are too long

View File

@ -63,7 +63,10 @@ class KubernetesPods {
// Check if only PreStopHook failed but container succeeded
const hasPreStopHookFailure = events.some((e) => e.reason === 'FailedPreStopHook');
const wasKilled = events.some((e) => e.reason === 'Killing');
// If container succeeded (exit code 0), PreStopHook failure is non-critical
// Also check if pod was killed but container might have succeeded
if (containerSucceeded && containerExitCode === 0) {
// Container succeeded - PreStopHook failure is non-critical
if (hasPreStopHookFailure) {
@ -80,6 +83,16 @@ class KubernetesPods {
return false; // Pod is not running, but we don't treat it as a failure
}
// If pod was killed and we have PreStopHook failure but no container status yet, wait a bit
// The container might have succeeded but status hasn't been updated yet
if (wasKilled && hasPreStopHookFailure && containerExitCode === undefined) {
CloudRunnerLogger.log(
`Pod ${podName} was killed with PreStopHook failure, but container status not yet available. This may be non-fatal if container succeeded.`,
);
// Still throw error for now, but with more context
// The task runner will retry and get the actual container status
}
const errorMessage = `K8s pod failed\n${errorDetails.join('\n')}`;
CloudRunnerLogger.log(errorMessage);
throw new Error(errorMessage);

View File

@ -79,9 +79,48 @@ export class Caching {
return;
}
await CloudRunnerSystem.Run(
`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${path.basename(sourceFolder)}"`,
);
// Check disk space before creating tar archive
try {
const diskCheckOutput = await CloudRunnerSystem.Run(`df -h . 2>/dev/null || df -h /data 2>/dev/null || true`);
CloudRunnerLogger.log(`Disk space before tar: ${diskCheckOutput}`);
} catch (error) {
// Ignore disk check errors
}
// Clean up any existing incomplete tar files
try {
await CloudRunnerSystem.Run(`rm -f ${cacheArtifactName}.tar${compressionSuffix} 2>/dev/null || true`);
} catch (error) {
// Ignore cleanup errors
}
try {
await CloudRunnerSystem.Run(
`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${path.basename(sourceFolder)}"`,
);
} catch (error: any) {
// Check if error is due to disk space
const errorMessage = error?.message || error?.toString() || '';
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
CloudRunnerLogger.log(`Disk space error detected. Attempting cleanup...`);
// Try to clean up old cache files
try {
const cacheParent = path.dirname(cacheFolder);
if (await fileExists(cacheParent)) {
// Find and remove old cache entries (keep only the most recent)
await CloudRunnerSystem.Run(
`find ${cacheParent} -name "*.tar*" -type f -mtime +1 -delete 2>/dev/null || true`,
);
}
} catch (cleanupError) {
CloudRunnerLogger.log(`Cleanup attempt failed: ${cleanupError}`);
}
throw new Error(
`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Please free up disk space and retry.`,
);
}
throw error;
}
await CloudRunnerSystem.Run(`du ${cacheArtifactName}.tar${compressionSuffix}`);
assert(await fileExists(`${cacheArtifactName}.tar${compressionSuffix}`), 'cache archive exists');
assert(await fileExists(path.basename(sourceFolder)), 'source folder exists');