PR feedback
parent
2c3cb006c0
commit
c216e3bb41
|
|
@ -5212,14 +5212,39 @@ class Caching {
|
||||||
process.chdir(`${startPath}`);
|
process.chdir(`${startPath}`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Check disk space before creating tar archive
|
// Check disk space before creating tar archive and clean up if needed
|
||||||
|
let diskUsagePercent = 0;
|
||||||
try {
|
try {
|
||||||
const diskCheckOutput = await cloud_runner_system_1.CloudRunnerSystem.Run(`df -h . 2>/dev/null || df -h /data 2>/dev/null || true`);
|
const diskCheckOutput = await cloud_runner_system_1.CloudRunnerSystem.Run(`df . 2>/dev/null || df /data 2>/dev/null || true`);
|
||||||
cloud_runner_logger_1.default.log(`Disk space before tar: ${diskCheckOutput}`);
|
cloud_runner_logger_1.default.log(`Disk space before tar: ${diskCheckOutput}`);
|
||||||
|
// Parse disk usage percentage (e.g., "72G 72G 196M 100%")
|
||||||
|
const usageMatch = diskCheckOutput.match(/(\d+)%/);
|
||||||
|
if (usageMatch) {
|
||||||
|
diskUsagePercent = parseInt(usageMatch[1], 10);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (error) {
|
catch (error) {
|
||||||
// Ignore disk check errors
|
// Ignore disk check errors
|
||||||
}
|
}
|
||||||
|
// If disk usage is high (>90%), proactively clean up old cache files
|
||||||
|
if (diskUsagePercent > 90) {
|
||||||
|
cloud_runner_logger_1.default.log(`Disk usage is ${diskUsagePercent}% - cleaning up old cache files before tar operation`);
|
||||||
|
try {
|
||||||
|
const cacheParent = node_path_1.default.dirname(cacheFolder);
|
||||||
|
if (await fileExists(cacheParent)) {
|
||||||
|
// Remove cache files older than 6 hours (more aggressive than 1 day)
|
||||||
|
await cloud_runner_system_1.CloudRunnerSystem.Run(`find ${cacheParent} -name "*.tar*" -type f -mmin +360 -delete 2>/dev/null || true`);
|
||||||
|
// Also try to remove old cache directories
|
||||||
|
await cloud_runner_system_1.CloudRunnerSystem.Run(`find ${cacheParent} -type d -empty -delete 2>/dev/null || true`);
|
||||||
|
cloud_runner_logger_1.default.log(`Cleanup completed. Checking disk space again...`);
|
||||||
|
const diskCheckAfter = await cloud_runner_system_1.CloudRunnerSystem.Run(`df . 2>/dev/null || df /data 2>/dev/null || true`);
|
||||||
|
cloud_runner_logger_1.default.log(`Disk space after cleanup: ${diskCheckAfter}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (cleanupError) {
|
||||||
|
cloud_runner_logger_1.default.log(`Proactive cleanup failed: ${cleanupError}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
// Clean up any existing incomplete tar files
|
// Clean up any existing incomplete tar files
|
||||||
try {
|
try {
|
||||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`rm -f ${cacheArtifactName}.tar${compressionSuffix} 2>/dev/null || true`);
|
await cloud_runner_system_1.CloudRunnerSystem.Run(`rm -f ${cacheArtifactName}.tar${compressionSuffix} 2>/dev/null || true`);
|
||||||
|
|
@ -5234,22 +5259,51 @@ class Caching {
|
||||||
// Check if error is due to disk space
|
// Check if error is due to disk space
|
||||||
const errorMessage = error?.message || error?.toString() || '';
|
const errorMessage = error?.message || error?.toString() || '';
|
||||||
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
|
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
|
||||||
cloud_runner_logger_1.default.log(`Disk space error detected. Attempting cleanup...`);
|
cloud_runner_logger_1.default.log(`Disk space error detected. Attempting aggressive cleanup...`);
|
||||||
// Try to clean up old cache files
|
// Try to clean up old cache files more aggressively
|
||||||
try {
|
try {
|
||||||
const cacheParent = node_path_1.default.dirname(cacheFolder);
|
const cacheParent = node_path_1.default.dirname(cacheFolder);
|
||||||
if (await fileExists(cacheParent)) {
|
if (await fileExists(cacheParent)) {
|
||||||
// Find and remove old cache entries (keep only the most recent)
|
// Remove cache files older than 1 hour (very aggressive)
|
||||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`find ${cacheParent} -name "*.tar*" -type f -mtime +1 -delete 2>/dev/null || true`);
|
await cloud_runner_system_1.CloudRunnerSystem.Run(`find ${cacheParent} -name "*.tar*" -type f -mmin +60 -delete 2>/dev/null || true`);
|
||||||
|
// Remove empty cache directories
|
||||||
|
await cloud_runner_system_1.CloudRunnerSystem.Run(`find ${cacheParent} -type d -empty -delete 2>/dev/null || true`);
|
||||||
|
// Also try to clean up the entire cache folder if it's getting too large
|
||||||
|
const cacheRoot = node_path_1.default.resolve(cacheParent, '..');
|
||||||
|
if (await fileExists(cacheRoot)) {
|
||||||
|
// Remove cache entries older than 30 minutes
|
||||||
|
await cloud_runner_system_1.CloudRunnerSystem.Run(`find ${cacheRoot} -name "*.tar*" -type f -mmin +30 -delete 2>/dev/null || true`);
|
||||||
|
}
|
||||||
|
cloud_runner_logger_1.default.log(`Aggressive cleanup completed. Retrying tar operation...`);
|
||||||
|
// Retry the tar operation once after cleanup
|
||||||
|
let retrySucceeded = false;
|
||||||
|
try {
|
||||||
|
await cloud_runner_system_1.CloudRunnerSystem.Run(`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${node_path_1.default.basename(sourceFolder)}"`);
|
||||||
|
// If retry succeeds, mark it - we'll continue normally without throwing
|
||||||
|
retrySucceeded = true;
|
||||||
|
}
|
||||||
|
catch (retryError) {
|
||||||
|
throw new Error(`Failed to create cache archive after cleanup. Original error: ${errorMessage}. Retry error: ${retryError?.message || retryError}`);
|
||||||
|
}
|
||||||
|
// If retry succeeded, don't throw the original error - let execution continue after catch block
|
||||||
|
if (!retrySucceeded) {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
// If we get here, retry succeeded - execution will continue after the catch block
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw new Error(`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Cleanup not possible - cache folder missing.`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (cleanupError) {
|
catch (cleanupError) {
|
||||||
cloud_runner_logger_1.default.log(`Cleanup attempt failed: ${cleanupError}`);
|
cloud_runner_logger_1.default.log(`Cleanup attempt failed: ${cleanupError}`);
|
||||||
|
throw new Error(`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Cleanup failed: ${cleanupError?.message || cleanupError}`);
|
||||||
}
|
}
|
||||||
throw new Error(`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Please free up disk space and retry.`);
|
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`du ${cacheArtifactName}.tar${compressionSuffix}`);
|
await cloud_runner_system_1.CloudRunnerSystem.Run(`du ${cacheArtifactName}.tar${compressionSuffix}`);
|
||||||
(0, node_console_1.assert)(await fileExists(`${cacheArtifactName}.tar${compressionSuffix}`), 'cache archive exists');
|
(0, node_console_1.assert)(await fileExists(`${cacheArtifactName}.tar${compressionSuffix}`), 'cache archive exists');
|
||||||
(0, node_console_1.assert)(await fileExists(node_path_1.default.basename(sourceFolder)), 'source folder exists');
|
(0, node_console_1.assert)(await fileExists(node_path_1.default.basename(sourceFolder)), 'source folder exists');
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
|
|
@ -79,14 +79,45 @@ export class Caching {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check disk space before creating tar archive
|
// Check disk space before creating tar archive and clean up if needed
|
||||||
|
let diskUsagePercent = 0;
|
||||||
try {
|
try {
|
||||||
const diskCheckOutput = await CloudRunnerSystem.Run(`df -h . 2>/dev/null || df -h /data 2>/dev/null || true`);
|
const diskCheckOutput = await CloudRunnerSystem.Run(`df . 2>/dev/null || df /data 2>/dev/null || true`);
|
||||||
CloudRunnerLogger.log(`Disk space before tar: ${diskCheckOutput}`);
|
CloudRunnerLogger.log(`Disk space before tar: ${diskCheckOutput}`);
|
||||||
|
// Parse disk usage percentage (e.g., "72G 72G 196M 100%")
|
||||||
|
const usageMatch = diskCheckOutput.match(/(\d+)%/);
|
||||||
|
if (usageMatch) {
|
||||||
|
diskUsagePercent = parseInt(usageMatch[1], 10);
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Ignore disk check errors
|
// Ignore disk check errors
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If disk usage is high (>90%), proactively clean up old cache files
|
||||||
|
if (diskUsagePercent > 90) {
|
||||||
|
CloudRunnerLogger.log(
|
||||||
|
`Disk usage is ${diskUsagePercent}% - cleaning up old cache files before tar operation`,
|
||||||
|
);
|
||||||
|
try {
|
||||||
|
const cacheParent = path.dirname(cacheFolder);
|
||||||
|
if (await fileExists(cacheParent)) {
|
||||||
|
// Remove cache files older than 6 hours (more aggressive than 1 day)
|
||||||
|
await CloudRunnerSystem.Run(
|
||||||
|
`find ${cacheParent} -name "*.tar*" -type f -mmin +360 -delete 2>/dev/null || true`,
|
||||||
|
);
|
||||||
|
// Also try to remove old cache directories
|
||||||
|
await CloudRunnerSystem.Run(
|
||||||
|
`find ${cacheParent} -type d -empty -delete 2>/dev/null || true`,
|
||||||
|
);
|
||||||
|
CloudRunnerLogger.log(`Cleanup completed. Checking disk space again...`);
|
||||||
|
const diskCheckAfter = await CloudRunnerSystem.Run(`df . 2>/dev/null || df /data 2>/dev/null || true`);
|
||||||
|
CloudRunnerLogger.log(`Disk space after cleanup: ${diskCheckAfter}`);
|
||||||
|
}
|
||||||
|
} catch (cleanupError) {
|
||||||
|
CloudRunnerLogger.log(`Proactive cleanup failed: ${cleanupError}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Clean up any existing incomplete tar files
|
// Clean up any existing incomplete tar files
|
||||||
try {
|
try {
|
||||||
await CloudRunnerSystem.Run(`rm -f ${cacheArtifactName}.tar${compressionSuffix} 2>/dev/null || true`);
|
await CloudRunnerSystem.Run(`rm -f ${cacheArtifactName}.tar${compressionSuffix} 2>/dev/null || true`);
|
||||||
|
|
@ -102,25 +133,61 @@ export class Caching {
|
||||||
// Check if error is due to disk space
|
// Check if error is due to disk space
|
||||||
const errorMessage = error?.message || error?.toString() || '';
|
const errorMessage = error?.message || error?.toString() || '';
|
||||||
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
|
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
|
||||||
CloudRunnerLogger.log(`Disk space error detected. Attempting cleanup...`);
|
CloudRunnerLogger.log(`Disk space error detected. Attempting aggressive cleanup...`);
|
||||||
// Try to clean up old cache files
|
// Try to clean up old cache files more aggressively
|
||||||
try {
|
try {
|
||||||
const cacheParent = path.dirname(cacheFolder);
|
const cacheParent = path.dirname(cacheFolder);
|
||||||
if (await fileExists(cacheParent)) {
|
if (await fileExists(cacheParent)) {
|
||||||
// Find and remove old cache entries (keep only the most recent)
|
// Remove cache files older than 1 hour (very aggressive)
|
||||||
await CloudRunnerSystem.Run(
|
await CloudRunnerSystem.Run(
|
||||||
`find ${cacheParent} -name "*.tar*" -type f -mtime +1 -delete 2>/dev/null || true`,
|
`find ${cacheParent} -name "*.tar*" -type f -mmin +60 -delete 2>/dev/null || true`,
|
||||||
|
);
|
||||||
|
// Remove empty cache directories
|
||||||
|
await CloudRunnerSystem.Run(
|
||||||
|
`find ${cacheParent} -type d -empty -delete 2>/dev/null || true`,
|
||||||
|
);
|
||||||
|
// Also try to clean up the entire cache folder if it's getting too large
|
||||||
|
const cacheRoot = path.resolve(cacheParent, '..');
|
||||||
|
if (await fileExists(cacheRoot)) {
|
||||||
|
// Remove cache entries older than 30 minutes
|
||||||
|
await CloudRunnerSystem.Run(
|
||||||
|
`find ${cacheRoot} -name "*.tar*" -type f -mmin +30 -delete 2>/dev/null || true`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} catch (cleanupError) {
|
CloudRunnerLogger.log(`Aggressive cleanup completed. Retrying tar operation...`);
|
||||||
CloudRunnerLogger.log(`Cleanup attempt failed: ${cleanupError}`);
|
// Retry the tar operation once after cleanup
|
||||||
}
|
let retrySucceeded = false;
|
||||||
|
try {
|
||||||
|
await CloudRunnerSystem.Run(
|
||||||
|
`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${path.basename(sourceFolder)}"`,
|
||||||
|
);
|
||||||
|
// If retry succeeds, mark it - we'll continue normally without throwing
|
||||||
|
retrySucceeded = true;
|
||||||
|
} catch (retryError: any) {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Please free up disk space and retry.`,
|
`Failed to create cache archive after cleanup. Original error: ${errorMessage}. Retry error: ${retryError?.message || retryError}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
// If retry succeeded, don't throw the original error - let execution continue after catch block
|
||||||
|
if (!retrySucceeded) {
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
// If we get here, retry succeeded - execution will continue after the catch block
|
||||||
|
} else {
|
||||||
|
throw new Error(
|
||||||
|
`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Cleanup not possible - cache folder missing.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (cleanupError: any) {
|
||||||
|
CloudRunnerLogger.log(`Cleanup attempt failed: ${cleanupError}`);
|
||||||
|
throw new Error(
|
||||||
|
`Failed to create cache archive due to insufficient disk space. Error: ${errorMessage}. Cleanup failed: ${cleanupError?.message || cleanupError}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
await CloudRunnerSystem.Run(`du ${cacheArtifactName}.tar${compressionSuffix}`);
|
await CloudRunnerSystem.Run(`du ${cacheArtifactName}.tar${compressionSuffix}`);
|
||||||
assert(await fileExists(`${cacheArtifactName}.tar${compressionSuffix}`), 'cache archive exists');
|
assert(await fileExists(`${cacheArtifactName}.tar${compressionSuffix}`), 'cache archive exists');
|
||||||
assert(await fileExists(path.basename(sourceFolder)), 'source folder exists');
|
assert(await fileExists(path.basename(sourceFolder)), 'source folder exists');
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue