PR feedback
parent
956b2e4324
commit
bea818fb9c
|
|
@ -4002,6 +4002,9 @@ class KubernetesPods {
|
|||
}
|
||||
// Check if only PreStopHook failed but container succeeded
|
||||
const hasPreStopHookFailure = events.some((e) => e.reason === 'FailedPreStopHook');
|
||||
const wasKilled = events.some((e) => e.reason === 'Killing');
|
||||
// If container succeeded (exit code 0), PreStopHook failure is non-critical
|
||||
// Also check if pod was killed but container might have succeeded
|
||||
if (containerSucceeded && containerExitCode === 0) {
|
||||
// Container succeeded - PreStopHook failure is non-critical
|
||||
if (hasPreStopHookFailure) {
|
||||
|
|
@ -4014,6 +4017,13 @@ class KubernetesPods {
|
|||
// Don't throw error - container succeeded, PreStopHook failure is non-critical
|
||||
return false; // Pod is not running, but we don't treat it as a failure
|
||||
}
|
||||
// If pod was killed and we have PreStopHook failure but no container status yet, wait a bit
|
||||
// The container might have succeeded but status hasn't been updated yet
|
||||
if (wasKilled && hasPreStopHookFailure && containerExitCode === undefined) {
|
||||
cloud_runner_logger_1.default.log(`Pod ${podName} was killed with PreStopHook failure, but container status not yet available. This may be non-fatal if container succeeded.`);
|
||||
// Still throw error for now, but with more context
|
||||
// The task runner will retry and get the actual container status
|
||||
}
|
||||
const errorMessage = `K8s pod failed\n${errorDetails.join('\n')}`;
|
||||
cloud_runner_logger_1.default.log(errorMessage);
|
||||
throw new Error(errorMessage);
|
||||
|
|
@ -5202,7 +5212,44 @@ class Caching {
|
|||
process.chdir(`${startPath}`);
|
||||
return;
|
||||
}
|
||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${node_path_1.default.basename(sourceFolder)}"`);
|
||||
// Check disk space before creating tar archive
|
||||
try {
|
||||
const diskCheckOutput = await cloud_runner_system_1.CloudRunnerSystem.Run(`df -h . 2>/dev/null || df -h /data 2>/dev/null || true`);
|
||||
cloud_runner_logger_1.default.log(`Disk space before tar: ${diskCheckOutput}`);
|
||||
}
|
||||
catch (error) {
|
||||
// Ignore disk check errors
|
||||
}
|
||||
// Clean up any existing incomplete tar files
|
||||
try {
|
||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`rm -f ${cacheArtifactName}.tar${compressionSuffix} 2>/dev/null || true`);
|
||||
}
|
||||
catch (error) {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
try {
|
||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${node_path_1.default.basename(sourceFolder)}"`);
|
||||
}
|
||||
catch (error) {
|
||||
// Check if error is due to disk space
|
||||
const errorMessage = error?.message || error?.toString() || '';
|
||||
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
|
||||
cloud_runner_logger_1.default.log(`Disk space error detected. Attempting cleanup...`);
|
||||
// Try to clean up old cache files
|
||||
try {
|
||||
const cacheParent = node_path_1.default.dirname(cacheFolder);
|
||||
if (await fileExists(cacheParent)) {
|
||||
// Find and remove old cache entries (keep only the most recent)
|
||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`find ${cacheParent} -name "*.tar*" -type f -mtime +1 -delete 2>/dev/null || true`);
|
||||
}
|
||||
}
|
||||
catch (cleanupError) {
|
||||
cloud_runner_logger_1.default.log(`Cleanup attempt failed: ${cleanupError}`);
|
||||
}
|
||||
throw new Error(`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Please free up disk space and retry.`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`du ${cacheArtifactName}.tar${compressionSuffix}`);
|
||||
(0, node_console_1.assert)(await fileExists(`${cacheArtifactName}.tar${compressionSuffix}`), 'cache archive exists');
|
||||
(0, node_console_1.assert)(await fileExists(node_path_1.default.basename(sourceFolder)), 'source folder exists');
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -63,7 +63,10 @@ class KubernetesPods {
|
|||
|
||||
// Check if only PreStopHook failed but container succeeded
|
||||
const hasPreStopHookFailure = events.some((e) => e.reason === 'FailedPreStopHook');
|
||||
const wasKilled = events.some((e) => e.reason === 'Killing');
|
||||
|
||||
// If container succeeded (exit code 0), PreStopHook failure is non-critical
|
||||
// Also check if pod was killed but container might have succeeded
|
||||
if (containerSucceeded && containerExitCode === 0) {
|
||||
// Container succeeded - PreStopHook failure is non-critical
|
||||
if (hasPreStopHookFailure) {
|
||||
|
|
@ -80,6 +83,16 @@ class KubernetesPods {
|
|||
return false; // Pod is not running, but we don't treat it as a failure
|
||||
}
|
||||
|
||||
// If pod was killed and we have PreStopHook failure but no container status yet, wait a bit
|
||||
// The container might have succeeded but status hasn't been updated yet
|
||||
if (wasKilled && hasPreStopHookFailure && containerExitCode === undefined) {
|
||||
CloudRunnerLogger.log(
|
||||
`Pod ${podName} was killed with PreStopHook failure, but container status not yet available. This may be non-fatal if container succeeded.`,
|
||||
);
|
||||
// Still throw error for now, but with more context
|
||||
// The task runner will retry and get the actual container status
|
||||
}
|
||||
|
||||
const errorMessage = `K8s pod failed\n${errorDetails.join('\n')}`;
|
||||
CloudRunnerLogger.log(errorMessage);
|
||||
throw new Error(errorMessage);
|
||||
|
|
|
|||
|
|
@ -79,9 +79,48 @@ export class Caching {
|
|||
return;
|
||||
}
|
||||
|
||||
await CloudRunnerSystem.Run(
|
||||
`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${path.basename(sourceFolder)}"`,
|
||||
);
|
||||
// Check disk space before creating tar archive
|
||||
try {
|
||||
const diskCheckOutput = await CloudRunnerSystem.Run(`df -h . 2>/dev/null || df -h /data 2>/dev/null || true`);
|
||||
CloudRunnerLogger.log(`Disk space before tar: ${diskCheckOutput}`);
|
||||
} catch (error) {
|
||||
// Ignore disk check errors
|
||||
}
|
||||
|
||||
// Clean up any existing incomplete tar files
|
||||
try {
|
||||
await CloudRunnerSystem.Run(`rm -f ${cacheArtifactName}.tar${compressionSuffix} 2>/dev/null || true`);
|
||||
} catch (error) {
|
||||
// Ignore cleanup errors
|
||||
}
|
||||
|
||||
try {
|
||||
await CloudRunnerSystem.Run(
|
||||
`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${path.basename(sourceFolder)}"`,
|
||||
);
|
||||
} catch (error: any) {
|
||||
// Check if error is due to disk space
|
||||
const errorMessage = error?.message || error?.toString() || '';
|
||||
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
|
||||
CloudRunnerLogger.log(`Disk space error detected. Attempting cleanup...`);
|
||||
// Try to clean up old cache files
|
||||
try {
|
||||
const cacheParent = path.dirname(cacheFolder);
|
||||
if (await fileExists(cacheParent)) {
|
||||
// Find and remove old cache entries (keep only the most recent)
|
||||
await CloudRunnerSystem.Run(
|
||||
`find ${cacheParent} -name "*.tar*" -type f -mtime +1 -delete 2>/dev/null || true`,
|
||||
);
|
||||
}
|
||||
} catch (cleanupError) {
|
||||
CloudRunnerLogger.log(`Cleanup attempt failed: ${cleanupError}`);
|
||||
}
|
||||
throw new Error(
|
||||
`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Please free up disk space and retry.`,
|
||||
);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
await CloudRunnerSystem.Run(`du ${cacheArtifactName}.tar${compressionSuffix}`);
|
||||
assert(await fileExists(`${cacheArtifactName}.tar${compressionSuffix}`), 'cache archive exists');
|
||||
assert(await fileExists(path.basename(sourceFolder)), 'source folder exists');
|
||||
|
|
|
|||
Loading…
Reference in New Issue