pr feedback

pull/767/head
Frostebite 2025-12-13 06:01:59 +00:00
parent d12244db60
commit 7f133d8cc7
4 changed files with 190 additions and 11 deletions

85
dist/index.js vendored
View File

@ -4594,6 +4594,47 @@ class KubernetesTaskRunner {
break;
}
}
// After kubectl logs loop ends, read log file as fallback to capture any messages
// written after kubectl stopped reading (e.g., "Collected Logs" from post-build)
// This ensures all log messages are included in BuildResults for test assertions
try {
const isPodStillRunning = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
if (!isPodStillRunning) {
cloud_runner_logger_1.default.log('Pod is terminated, reading log file as fallback to capture post-build messages...');
try {
// Try to read the log file from the terminated pod
// Use kubectl exec with --previous flag or try to access via PVC
const logFileContent = await cloud_runner_system_1.CloudRunnerSystem.Run(`kubectl exec ${podName} -c ${containerName} -n ${namespace} --previous -- cat /home/job-log.txt 2>/dev/null || kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /home/job-log.txt 2>/dev/null || echo ""`, true, true);
if (logFileContent && logFileContent.trim()) {
cloud_runner_logger_1.default.log(`Read log file from pod as fallback (${logFileContent.length} chars) to capture missing messages`);
// Get the lines we already have in output to avoid duplicates
const existingLines = new Set(output.split('\n').map((line) => line.trim()));
// Process the log file content line by line and add missing lines
for (const line of logFileContent.split(`\n`)) {
const trimmedLine = line.trim();
const lowerLine = trimmedLine.toLowerCase();
// Skip empty lines, kubectl errors, and lines we already have
if (trimmedLine &&
!lowerLine.includes('unable to retrieve container logs') &&
!existingLines.has(trimmedLine)) {
// Add missing line to output
output += `${line}\n`;
// Process through FollowLogStreamService to ensure proper handling
({ shouldReadLogs, shouldCleanup, output } = follow_log_stream_service_1.FollowLogStreamService.handleIteration(line, shouldReadLogs, shouldCleanup, output));
}
}
}
}
catch (logFileError) {
cloud_runner_logger_1.default.logWarning(`Could not read log file from pod as fallback: ${logFileError?.message || logFileError}`);
// Continue with existing output - this is a best-effort fallback
}
}
}
catch (fallbackError) {
cloud_runner_logger_1.default.logWarning(`Error checking pod status for log file fallback: ${fallbackError?.message || fallbackError}`);
// Continue with existing output - this is a best-effort fallback
}
// Filter out kubectl error messages from the final output
// These errors can be added via stderr even when kubectl fails
// We filter them out so they don't pollute the BuildResults
@ -5452,9 +5493,28 @@ class Caching {
cloud_runner_logger_1.default.log(`Cleanup completed. Checking disk space again...`);
const diskCheckAfter = await cloud_runner_system_1.CloudRunnerSystem.Run(`df . 2>/dev/null || df /data 2>/dev/null || true`);
cloud_runner_logger_1.default.log(`Disk space after cleanup: ${diskCheckAfter}`);
// Check disk usage again after cleanup
let diskUsageAfterCleanup = 0;
try {
const usageMatchAfter = diskCheckAfter.match(/(\d+)%/);
if (usageMatchAfter) {
diskUsageAfterCleanup = Number.parseInt(usageMatchAfter[1], 10);
}
}
catch {
// Ignore parsing errors
}
// If disk is still at 100% after cleanup, skip tar operation to prevent hang
if (diskUsageAfterCleanup >= 100) {
throw new Error(`Cannot create cache archive: disk is still at ${diskUsageAfterCleanup}% after cleanup. Tar operation would hang. Please free up disk space manually.`);
}
}
}
catch (cleanupError) {
// If cleanupError is our disk space error, rethrow it
if (cleanupError instanceof Error && cleanupError.message.includes('Cannot create cache archive')) {
throw cleanupError;
}
cloud_runner_logger_1.default.log(`Proactive cleanup failed: ${cleanupError}`);
}
}
@ -5466,12 +5526,31 @@ class Caching {
// Ignore cleanup errors
}
try {
await cloud_runner_system_1.CloudRunnerSystem.Run(`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${node_path_1.default.basename(sourceFolder)}"`);
// Add timeout to tar command to prevent hanging when disk is full
// Use timeout command with 10 minute limit (600 seconds) if available
// Check if timeout command exists, otherwise use regular tar
const tarCommand = `tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${node_path_1.default.basename(sourceFolder)}"`;
let tarCommandToRun = tarCommand;
try {
// Check if timeout command is available
await cloud_runner_system_1.CloudRunnerSystem.Run(`which timeout > /dev/null 2>&1`, true, true);
// Use timeout if available (600 seconds = 10 minutes)
tarCommandToRun = `timeout 600 ${tarCommand}`;
}
catch {
// timeout command not available, use regular tar
// Note: This could still hang if disk is full, but the disk space check above should prevent this
tarCommandToRun = tarCommand;
}
await cloud_runner_system_1.CloudRunnerSystem.Run(tarCommandToRun);
}
catch (error) {
// Check if error is due to disk space
// Check if error is due to disk space or timeout
const errorMessage = error?.message || error?.toString() || '';
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
if (errorMessage.includes('No space left') ||
errorMessage.includes('Wrote only') ||
errorMessage.includes('timeout') ||
errorMessage.includes('Terminated')) {
cloud_runner_logger_1.default.log(`Disk space error detected. Attempting aggressive cleanup...`);
// Try to clean up old cache files more aggressively
try {

2
dist/index.js.map vendored

File diff suppressed because one or more lines are too long

View File

@ -76,8 +76,9 @@ class KubernetesTaskRunner {
// Filter out kubectl error messages from the error output
const errorMessage = error?.message || error?.toString() || '';
const isKubectlLogsError = errorMessage.includes('unable to retrieve container logs for containerd://') ||
errorMessage.toLowerCase().includes('unable to retrieve container logs');
const isKubectlLogsError =
errorMessage.includes('unable to retrieve container logs for containerd://') ||
errorMessage.toLowerCase().includes('unable to retrieve container logs');
if (isKubectlLogsError) {
CloudRunnerLogger.log(`Kubectl unable to retrieve logs, attempt ${kubectlLogsFailedCount}/${maxKubectlLogsFailures}`);
@ -208,6 +209,64 @@ class KubernetesTaskRunner {
}
}
// After kubectl logs loop ends, read log file as fallback to capture any messages
// written after kubectl stopped reading (e.g., "Collected Logs" from post-build)
// This ensures all log messages are included in BuildResults for test assertions
try {
const isPodStillRunning = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
if (!isPodStillRunning) {
CloudRunnerLogger.log('Pod is terminated, reading log file as fallback to capture post-build messages...');
try {
// Try to read the log file from the terminated pod
// Use kubectl exec with --previous flag or try to access via PVC
const logFileContent = await CloudRunnerSystem.Run(
`kubectl exec ${podName} -c ${containerName} -n ${namespace} --previous -- cat /home/job-log.txt 2>/dev/null || kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /home/job-log.txt 2>/dev/null || echo ""`,
true,
true,
);
if (logFileContent && logFileContent.trim()) {
CloudRunnerLogger.log(
`Read log file from pod as fallback (${logFileContent.length} chars) to capture missing messages`,
);
// Get the lines we already have in output to avoid duplicates
const existingLines = new Set(output.split('\n').map((line) => line.trim()));
// Process the log file content line by line and add missing lines
for (const line of logFileContent.split(`\n`)) {
const trimmedLine = line.trim();
const lowerLine = trimmedLine.toLowerCase();
// Skip empty lines, kubectl errors, and lines we already have
if (
trimmedLine &&
!lowerLine.includes('unable to retrieve container logs') &&
!existingLines.has(trimmedLine)
) {
// Add missing line to output
output += `${line}\n`;
// Process through FollowLogStreamService to ensure proper handling
({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration(
line,
shouldReadLogs,
shouldCleanup,
output,
));
}
}
}
} catch (logFileError: any) {
CloudRunnerLogger.logWarning(
`Could not read log file from pod as fallback: ${logFileError?.message || logFileError}`,
);
// Continue with existing output - this is a best-effort fallback
}
}
} catch (fallbackError: any) {
CloudRunnerLogger.logWarning(
`Error checking pod status for log file fallback: ${fallbackError?.message || fallbackError}`,
);
// Continue with existing output - this is a best-effort fallback
}
// Filter out kubectl error messages from the final output
// These errors can be added via stderr even when kubectl fails
// We filter them out so they don't pollute the BuildResults

View File

@ -110,8 +110,30 @@ export class Caching {
CloudRunnerLogger.log(`Cleanup completed. Checking disk space again...`);
const diskCheckAfter = await CloudRunnerSystem.Run(`df . 2>/dev/null || df /data 2>/dev/null || true`);
CloudRunnerLogger.log(`Disk space after cleanup: ${diskCheckAfter}`);
// Check disk usage again after cleanup
let diskUsageAfterCleanup = 0;
try {
const usageMatchAfter = diskCheckAfter.match(/(\d+)%/);
if (usageMatchAfter) {
diskUsageAfterCleanup = Number.parseInt(usageMatchAfter[1], 10);
}
} catch {
// Ignore parsing errors
}
// If disk is still at 100% after cleanup, skip tar operation to prevent hang
if (diskUsageAfterCleanup >= 100) {
throw new Error(
`Cannot create cache archive: disk is still at ${diskUsageAfterCleanup}% after cleanup. Tar operation would hang. Please free up disk space manually.`,
);
}
}
} catch (cleanupError) {
// If cleanupError is our disk space error, rethrow it
if (cleanupError instanceof Error && cleanupError.message.includes('Cannot create cache archive')) {
throw cleanupError;
}
CloudRunnerLogger.log(`Proactive cleanup failed: ${cleanupError}`);
}
}
@ -124,13 +146,32 @@ export class Caching {
}
try {
await CloudRunnerSystem.Run(
`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${path.basename(sourceFolder)}"`,
);
// Add timeout to tar command to prevent hanging when disk is full
// Use timeout command with 10 minute limit (600 seconds) if available
// Check if timeout command exists, otherwise use regular tar
const tarCommand = `tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${path.basename(sourceFolder)}"`;
let tarCommandToRun = tarCommand;
try {
// Check if timeout command is available
await CloudRunnerSystem.Run(`which timeout > /dev/null 2>&1`, true, true);
// Use timeout if available (600 seconds = 10 minutes)
tarCommandToRun = `timeout 600 ${tarCommand}`;
} catch {
// timeout command not available, use regular tar
// Note: This could still hang if disk is full, but the disk space check above should prevent this
tarCommandToRun = tarCommand;
}
await CloudRunnerSystem.Run(tarCommandToRun);
} catch (error: any) {
// Check if error is due to disk space
// Check if error is due to disk space or timeout
const errorMessage = error?.message || error?.toString() || '';
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
if (
errorMessage.includes('No space left') ||
errorMessage.includes('Wrote only') ||
errorMessage.includes('timeout') ||
errorMessage.includes('Terminated')
) {
CloudRunnerLogger.log(`Disk space error detected. Attempting aggressive cleanup...`);
// Try to clean up old cache files more aggressively