pr feedback
parent
d12244db60
commit
7f133d8cc7
|
|
@ -4594,6 +4594,47 @@ class KubernetesTaskRunner {
|
|||
break;
|
||||
}
|
||||
}
|
||||
// After kubectl logs loop ends, read log file as fallback to capture any messages
|
||||
// written after kubectl stopped reading (e.g., "Collected Logs" from post-build)
|
||||
// This ensures all log messages are included in BuildResults for test assertions
|
||||
try {
|
||||
const isPodStillRunning = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
|
||||
if (!isPodStillRunning) {
|
||||
cloud_runner_logger_1.default.log('Pod is terminated, reading log file as fallback to capture post-build messages...');
|
||||
try {
|
||||
// Try to read the log file from the terminated pod
|
||||
// Use kubectl exec with --previous flag or try to access via PVC
|
||||
const logFileContent = await cloud_runner_system_1.CloudRunnerSystem.Run(`kubectl exec ${podName} -c ${containerName} -n ${namespace} --previous -- cat /home/job-log.txt 2>/dev/null || kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /home/job-log.txt 2>/dev/null || echo ""`, true, true);
|
||||
if (logFileContent && logFileContent.trim()) {
|
||||
cloud_runner_logger_1.default.log(`Read log file from pod as fallback (${logFileContent.length} chars) to capture missing messages`);
|
||||
// Get the lines we already have in output to avoid duplicates
|
||||
const existingLines = new Set(output.split('\n').map((line) => line.trim()));
|
||||
// Process the log file content line by line and add missing lines
|
||||
for (const line of logFileContent.split(`\n`)) {
|
||||
const trimmedLine = line.trim();
|
||||
const lowerLine = trimmedLine.toLowerCase();
|
||||
// Skip empty lines, kubectl errors, and lines we already have
|
||||
if (trimmedLine &&
|
||||
!lowerLine.includes('unable to retrieve container logs') &&
|
||||
!existingLines.has(trimmedLine)) {
|
||||
// Add missing line to output
|
||||
output += `${line}\n`;
|
||||
// Process through FollowLogStreamService to ensure proper handling
|
||||
({ shouldReadLogs, shouldCleanup, output } = follow_log_stream_service_1.FollowLogStreamService.handleIteration(line, shouldReadLogs, shouldCleanup, output));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (logFileError) {
|
||||
cloud_runner_logger_1.default.logWarning(`Could not read log file from pod as fallback: ${logFileError?.message || logFileError}`);
|
||||
// Continue with existing output - this is a best-effort fallback
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (fallbackError) {
|
||||
cloud_runner_logger_1.default.logWarning(`Error checking pod status for log file fallback: ${fallbackError?.message || fallbackError}`);
|
||||
// Continue with existing output - this is a best-effort fallback
|
||||
}
|
||||
// Filter out kubectl error messages from the final output
|
||||
// These errors can be added via stderr even when kubectl fails
|
||||
// We filter them out so they don't pollute the BuildResults
|
||||
|
|
@ -5452,9 +5493,28 @@ class Caching {
|
|||
cloud_runner_logger_1.default.log(`Cleanup completed. Checking disk space again...`);
|
||||
const diskCheckAfter = await cloud_runner_system_1.CloudRunnerSystem.Run(`df . 2>/dev/null || df /data 2>/dev/null || true`);
|
||||
cloud_runner_logger_1.default.log(`Disk space after cleanup: ${diskCheckAfter}`);
|
||||
// Check disk usage again after cleanup
|
||||
let diskUsageAfterCleanup = 0;
|
||||
try {
|
||||
const usageMatchAfter = diskCheckAfter.match(/(\d+)%/);
|
||||
if (usageMatchAfter) {
|
||||
diskUsageAfterCleanup = Number.parseInt(usageMatchAfter[1], 10);
|
||||
}
|
||||
}
|
||||
catch {
|
||||
// Ignore parsing errors
|
||||
}
|
||||
// If disk is still at 100% after cleanup, skip tar operation to prevent hang
|
||||
if (diskUsageAfterCleanup >= 100) {
|
||||
throw new Error(`Cannot create cache archive: disk is still at ${diskUsageAfterCleanup}% after cleanup. Tar operation would hang. Please free up disk space manually.`);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (cleanupError) {
|
||||
// If cleanupError is our disk space error, rethrow it
|
||||
if (cleanupError instanceof Error && cleanupError.message.includes('Cannot create cache archive')) {
|
||||
throw cleanupError;
|
||||
}
|
||||
cloud_runner_logger_1.default.log(`Proactive cleanup failed: ${cleanupError}`);
|
||||
}
|
||||
}
|
||||
|
|
@ -5466,12 +5526,31 @@ class Caching {
|
|||
// Ignore cleanup errors
|
||||
}
|
||||
try {
|
||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${node_path_1.default.basename(sourceFolder)}"`);
|
||||
// Add timeout to tar command to prevent hanging when disk is full
|
||||
// Use timeout command with 10 minute limit (600 seconds) if available
|
||||
// Check if timeout command exists, otherwise use regular tar
|
||||
const tarCommand = `tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${node_path_1.default.basename(sourceFolder)}"`;
|
||||
let tarCommandToRun = tarCommand;
|
||||
try {
|
||||
// Check if timeout command is available
|
||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`which timeout > /dev/null 2>&1`, true, true);
|
||||
// Use timeout if available (600 seconds = 10 minutes)
|
||||
tarCommandToRun = `timeout 600 ${tarCommand}`;
|
||||
}
|
||||
catch {
|
||||
// timeout command not available, use regular tar
|
||||
// Note: This could still hang if disk is full, but the disk space check above should prevent this
|
||||
tarCommandToRun = tarCommand;
|
||||
}
|
||||
await cloud_runner_system_1.CloudRunnerSystem.Run(tarCommandToRun);
|
||||
}
|
||||
catch (error) {
|
||||
// Check if error is due to disk space
|
||||
// Check if error is due to disk space or timeout
|
||||
const errorMessage = error?.message || error?.toString() || '';
|
||||
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
|
||||
if (errorMessage.includes('No space left') ||
|
||||
errorMessage.includes('Wrote only') ||
|
||||
errorMessage.includes('timeout') ||
|
||||
errorMessage.includes('Terminated')) {
|
||||
cloud_runner_logger_1.default.log(`Disk space error detected. Attempting aggressive cleanup...`);
|
||||
// Try to clean up old cache files more aggressively
|
||||
try {
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -76,8 +76,9 @@ class KubernetesTaskRunner {
|
|||
|
||||
// Filter out kubectl error messages from the error output
|
||||
const errorMessage = error?.message || error?.toString() || '';
|
||||
const isKubectlLogsError = errorMessage.includes('unable to retrieve container logs for containerd://') ||
|
||||
errorMessage.toLowerCase().includes('unable to retrieve container logs');
|
||||
const isKubectlLogsError =
|
||||
errorMessage.includes('unable to retrieve container logs for containerd://') ||
|
||||
errorMessage.toLowerCase().includes('unable to retrieve container logs');
|
||||
|
||||
if (isKubectlLogsError) {
|
||||
CloudRunnerLogger.log(`Kubectl unable to retrieve logs, attempt ${kubectlLogsFailedCount}/${maxKubectlLogsFailures}`);
|
||||
|
|
@ -208,6 +209,64 @@ class KubernetesTaskRunner {
|
|||
}
|
||||
}
|
||||
|
||||
// After kubectl logs loop ends, read log file as fallback to capture any messages
|
||||
// written after kubectl stopped reading (e.g., "Collected Logs" from post-build)
|
||||
// This ensures all log messages are included in BuildResults for test assertions
|
||||
try {
|
||||
const isPodStillRunning = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
|
||||
if (!isPodStillRunning) {
|
||||
CloudRunnerLogger.log('Pod is terminated, reading log file as fallback to capture post-build messages...');
|
||||
try {
|
||||
// Try to read the log file from the terminated pod
|
||||
// Use kubectl exec with --previous flag or try to access via PVC
|
||||
const logFileContent = await CloudRunnerSystem.Run(
|
||||
`kubectl exec ${podName} -c ${containerName} -n ${namespace} --previous -- cat /home/job-log.txt 2>/dev/null || kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /home/job-log.txt 2>/dev/null || echo ""`,
|
||||
true,
|
||||
true,
|
||||
);
|
||||
|
||||
if (logFileContent && logFileContent.trim()) {
|
||||
CloudRunnerLogger.log(
|
||||
`Read log file from pod as fallback (${logFileContent.length} chars) to capture missing messages`,
|
||||
);
|
||||
// Get the lines we already have in output to avoid duplicates
|
||||
const existingLines = new Set(output.split('\n').map((line) => line.trim()));
|
||||
// Process the log file content line by line and add missing lines
|
||||
for (const line of logFileContent.split(`\n`)) {
|
||||
const trimmedLine = line.trim();
|
||||
const lowerLine = trimmedLine.toLowerCase();
|
||||
// Skip empty lines, kubectl errors, and lines we already have
|
||||
if (
|
||||
trimmedLine &&
|
||||
!lowerLine.includes('unable to retrieve container logs') &&
|
||||
!existingLines.has(trimmedLine)
|
||||
) {
|
||||
// Add missing line to output
|
||||
output += `${line}\n`;
|
||||
// Process through FollowLogStreamService to ensure proper handling
|
||||
({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration(
|
||||
line,
|
||||
shouldReadLogs,
|
||||
shouldCleanup,
|
||||
output,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (logFileError: any) {
|
||||
CloudRunnerLogger.logWarning(
|
||||
`Could not read log file from pod as fallback: ${logFileError?.message || logFileError}`,
|
||||
);
|
||||
// Continue with existing output - this is a best-effort fallback
|
||||
}
|
||||
}
|
||||
} catch (fallbackError: any) {
|
||||
CloudRunnerLogger.logWarning(
|
||||
`Error checking pod status for log file fallback: ${fallbackError?.message || fallbackError}`,
|
||||
);
|
||||
// Continue with existing output - this is a best-effort fallback
|
||||
}
|
||||
|
||||
// Filter out kubectl error messages from the final output
|
||||
// These errors can be added via stderr even when kubectl fails
|
||||
// We filter them out so they don't pollute the BuildResults
|
||||
|
|
|
|||
|
|
@ -110,8 +110,30 @@ export class Caching {
|
|||
CloudRunnerLogger.log(`Cleanup completed. Checking disk space again...`);
|
||||
const diskCheckAfter = await CloudRunnerSystem.Run(`df . 2>/dev/null || df /data 2>/dev/null || true`);
|
||||
CloudRunnerLogger.log(`Disk space after cleanup: ${diskCheckAfter}`);
|
||||
|
||||
// Check disk usage again after cleanup
|
||||
let diskUsageAfterCleanup = 0;
|
||||
try {
|
||||
const usageMatchAfter = diskCheckAfter.match(/(\d+)%/);
|
||||
if (usageMatchAfter) {
|
||||
diskUsageAfterCleanup = Number.parseInt(usageMatchAfter[1], 10);
|
||||
}
|
||||
} catch {
|
||||
// Ignore parsing errors
|
||||
}
|
||||
|
||||
// If disk is still at 100% after cleanup, skip tar operation to prevent hang
|
||||
if (diskUsageAfterCleanup >= 100) {
|
||||
throw new Error(
|
||||
`Cannot create cache archive: disk is still at ${diskUsageAfterCleanup}% after cleanup. Tar operation would hang. Please free up disk space manually.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (cleanupError) {
|
||||
// If cleanupError is our disk space error, rethrow it
|
||||
if (cleanupError instanceof Error && cleanupError.message.includes('Cannot create cache archive')) {
|
||||
throw cleanupError;
|
||||
}
|
||||
CloudRunnerLogger.log(`Proactive cleanup failed: ${cleanupError}`);
|
||||
}
|
||||
}
|
||||
|
|
@ -124,13 +146,32 @@ export class Caching {
|
|||
}
|
||||
|
||||
try {
|
||||
await CloudRunnerSystem.Run(
|
||||
`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${path.basename(sourceFolder)}"`,
|
||||
);
|
||||
// Add timeout to tar command to prevent hanging when disk is full
|
||||
// Use timeout command with 10 minute limit (600 seconds) if available
|
||||
// Check if timeout command exists, otherwise use regular tar
|
||||
const tarCommand = `tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${path.basename(sourceFolder)}"`;
|
||||
let tarCommandToRun = tarCommand;
|
||||
try {
|
||||
// Check if timeout command is available
|
||||
await CloudRunnerSystem.Run(`which timeout > /dev/null 2>&1`, true, true);
|
||||
// Use timeout if available (600 seconds = 10 minutes)
|
||||
tarCommandToRun = `timeout 600 ${tarCommand}`;
|
||||
} catch {
|
||||
// timeout command not available, use regular tar
|
||||
// Note: This could still hang if disk is full, but the disk space check above should prevent this
|
||||
tarCommandToRun = tarCommand;
|
||||
}
|
||||
|
||||
await CloudRunnerSystem.Run(tarCommandToRun);
|
||||
} catch (error: any) {
|
||||
// Check if error is due to disk space
|
||||
// Check if error is due to disk space or timeout
|
||||
const errorMessage = error?.message || error?.toString() || '';
|
||||
if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
|
||||
if (
|
||||
errorMessage.includes('No space left') ||
|
||||
errorMessage.includes('Wrote only') ||
|
||||
errorMessage.includes('timeout') ||
|
||||
errorMessage.includes('Terminated')
|
||||
) {
|
||||
CloudRunnerLogger.log(`Disk space error detected. Attempting aggressive cleanup...`);
|
||||
|
||||
// Try to clean up old cache files more aggressively
|
||||
|
|
|
|||
Loading…
Reference in New Issue