pr feedback

2025-12-13 06:01:59 +00:00 · 2025-12-13 06:01:59 +00:00 · 7f133d8cc7
parent d12244db60
commit 7f133d8cc7
4 changed files with 190 additions and 11 deletions
--- a/dist/index.js
+++ b/dist/index.js
@ -4594,6 +4594,47 @@ class KubernetesTaskRunner {
                break;
            }
        }
+        // After kubectl logs loop ends, read log file as fallback to capture any messages
+        // written after kubectl stopped reading (e.g., "Collected Logs" from post-build)
+        // This ensures all log messages are included in BuildResults for test assertions
+        try {
+            const isPodStillRunning = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
+            if (!isPodStillRunning) {
+                cloud_runner_logger_1.default.log('Pod is terminated, reading log file as fallback to capture post-build messages...');
+                try {
+                    // Try to read the log file from the terminated pod
+                    // Use kubectl exec with --previous flag or try to access via PVC
+                    const logFileContent = await cloud_runner_system_1.CloudRunnerSystem.Run(`kubectl exec ${podName} -c ${containerName} -n ${namespace} --previous -- cat /home/job-log.txt 2>/dev/null || kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /home/job-log.txt 2>/dev/null || echo ""`, true, true);
+                    if (logFileContent && logFileContent.trim()) {
+                        cloud_runner_logger_1.default.log(`Read log file from pod as fallback (${logFileContent.length} chars) to capture missing messages`);
+                        // Get the lines we already have in output to avoid duplicates
+                        const existingLines = new Set(output.split('\n').map((line) => line.trim()));
+                        // Process the log file content line by line and add missing lines
+                        for (const line of logFileContent.split(`\n`)) {
+                            const trimmedLine = line.trim();
+                            const lowerLine = trimmedLine.toLowerCase();
+                            // Skip empty lines, kubectl errors, and lines we already have
+                            if (trimmedLine &&
+                                !lowerLine.includes('unable to retrieve container logs') &&
+                                !existingLines.has(trimmedLine)) {
+                                // Add missing line to output
+                                output += `${line}\n`;
+                                // Process through FollowLogStreamService to ensure proper handling
+                                ({ shouldReadLogs, shouldCleanup, output } = follow_log_stream_service_1.FollowLogStreamService.handleIteration(line, shouldReadLogs, shouldCleanup, output));
+                            }
+                        }
+                    }
+                }
+                catch (logFileError) {
+                    cloud_runner_logger_1.default.logWarning(`Could not read log file from pod as fallback: ${logFileError?.message || logFileError}`);
+                    // Continue with existing output - this is a best-effort fallback
+                }
+            }
+        }
+        catch (fallbackError) {
+            cloud_runner_logger_1.default.logWarning(`Error checking pod status for log file fallback: ${fallbackError?.message || fallbackError}`);
+            // Continue with existing output - this is a best-effort fallback
+        }
        // Filter out kubectl error messages from the final output
        // These errors can be added via stderr even when kubectl fails
        // We filter them out so they don't pollute the BuildResults
@ -5452,9 +5493,28 @@ class Caching {
                        cloud_runner_logger_1.default.log(`Cleanup completed. Checking disk space again...`);
                        const diskCheckAfter = await cloud_runner_system_1.CloudRunnerSystem.Run(`df . 2>/dev/null || df /data 2>/dev/null || true`);
                        cloud_runner_logger_1.default.log(`Disk space after cleanup: ${diskCheckAfter}`);
+                        // Check disk usage again after cleanup
+                        let diskUsageAfterCleanup = 0;
+                        try {
+                            const usageMatchAfter = diskCheckAfter.match(/(\d+)%/);
+                            if (usageMatchAfter) {
+                                diskUsageAfterCleanup = Number.parseInt(usageMatchAfter[1], 10);
+                            }
+                        }
+                        catch {
+                            // Ignore parsing errors
+                        }
+                        // If disk is still at 100% after cleanup, skip tar operation to prevent hang
+                        if (diskUsageAfterCleanup >= 100) {
+                            throw new Error(`Cannot create cache archive: disk is still at ${diskUsageAfterCleanup}% after cleanup. Tar operation would hang. Please free up disk space manually.`);
+                        }
                    }
                }
                catch (cleanupError) {
+                    // If cleanupError is our disk space error, rethrow it
+                    if (cleanupError instanceof Error && cleanupError.message.includes('Cannot create cache archive')) {
+                        throw cleanupError;
+                    }
                    cloud_runner_logger_1.default.log(`Proactive cleanup failed: ${cleanupError}`);
                }
            }
@ -5466,12 +5526,31 @@ class Caching {
                // Ignore cleanup errors
            }
            try {
-                await cloud_runner_system_1.CloudRunnerSystem.Run(`tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${node_path_1.default.basename(sourceFolder)}"`);
+                // Add timeout to tar command to prevent hanging when disk is full
+                // Use timeout command with 10 minute limit (600 seconds) if available
+                // Check if timeout command exists, otherwise use regular tar
+                const tarCommand = `tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${node_path_1.default.basename(sourceFolder)}"`;
+                let tarCommandToRun = tarCommand;
+                try {
+                    // Check if timeout command is available
+                    await cloud_runner_system_1.CloudRunnerSystem.Run(`which timeout > /dev/null 2>&1`, true, true);
+                    // Use timeout if available (600 seconds = 10 minutes)
+                    tarCommandToRun = `timeout 600 ${tarCommand}`;
+                }
+                catch {
+                    // timeout command not available, use regular tar
+                    // Note: This could still hang if disk is full, but the disk space check above should prevent this
+                    tarCommandToRun = tarCommand;
+                }
+                await cloud_runner_system_1.CloudRunnerSystem.Run(tarCommandToRun);
            }
            catch (error) {
-                // Check if error is due to disk space
+                // Check if error is due to disk space or timeout
                const errorMessage = error?.message || error?.toString() || '';
-                if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
+                if (errorMessage.includes('No space left') ||
+                    errorMessage.includes('Wrote only') ||
+                    errorMessage.includes('timeout') ||
+                    errorMessage.includes('Terminated')) {
                    cloud_runner_logger_1.default.log(`Disk space error detected. Attempting aggressive cleanup...`);
                    // Try to clean up old cache files more aggressively
                    try {
--- a/dist/index.js.map
+++ b/dist/index.js.map
--- a/src/model/cloud-runner/providers/k8s/kubernetes-task-runner.ts
+++ b/src/model/cloud-runner/providers/k8s/kubernetes-task-runner.ts
@ -76,8 +76,9 @@ class KubernetesTaskRunner {

        // Filter out kubectl error messages from the error output
        const errorMessage = error?.message || error?.toString() || '';
-        const isKubectlLogsError = errorMessage.includes('unable to retrieve container logs for containerd://') || 
-                                   errorMessage.toLowerCase().includes('unable to retrieve container logs');
+        const isKubectlLogsError =
+          errorMessage.includes('unable to retrieve container logs for containerd://') ||
+          errorMessage.toLowerCase().includes('unable to retrieve container logs');
        
        if (isKubectlLogsError) {
          CloudRunnerLogger.log(`Kubectl unable to retrieve logs, attempt ${kubectlLogsFailedCount}/${maxKubectlLogsFailures}`);
@ -208,6 +209,64 @@ class KubernetesTaskRunner {
      }
    }

+    // After kubectl logs loop ends, read log file as fallback to capture any messages
+    // written after kubectl stopped reading (e.g., "Collected Logs" from post-build)
+    // This ensures all log messages are included in BuildResults for test assertions
+    try {
+      const isPodStillRunning = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
+      if (!isPodStillRunning) {
+        CloudRunnerLogger.log('Pod is terminated, reading log file as fallback to capture post-build messages...');
+        try {
+          // Try to read the log file from the terminated pod
+          // Use kubectl exec with --previous flag or try to access via PVC
+          const logFileContent = await CloudRunnerSystem.Run(
+            `kubectl exec ${podName} -c ${containerName} -n ${namespace} --previous -- cat /home/job-log.txt 2>/dev/null || kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /home/job-log.txt 2>/dev/null || echo ""`,
+            true,
+            true,
+          );
+
+          if (logFileContent && logFileContent.trim()) {
+            CloudRunnerLogger.log(
+              `Read log file from pod as fallback (${logFileContent.length} chars) to capture missing messages`,
+            );
+            // Get the lines we already have in output to avoid duplicates
+            const existingLines = new Set(output.split('\n').map((line) => line.trim()));
+            // Process the log file content line by line and add missing lines
+            for (const line of logFileContent.split(`\n`)) {
+              const trimmedLine = line.trim();
+              const lowerLine = trimmedLine.toLowerCase();
+              // Skip empty lines, kubectl errors, and lines we already have
+              if (
+                trimmedLine &&
+                !lowerLine.includes('unable to retrieve container logs') &&
+                !existingLines.has(trimmedLine)
+              ) {
+                // Add missing line to output
+                output += `${line}\n`;
+                // Process through FollowLogStreamService to ensure proper handling
+                ({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration(
+                  line,
+                  shouldReadLogs,
+                  shouldCleanup,
+                  output,
+                ));
+              }
+            }
+          }
+        } catch (logFileError: any) {
+          CloudRunnerLogger.logWarning(
+            `Could not read log file from pod as fallback: ${logFileError?.message || logFileError}`,
+          );
+          // Continue with existing output - this is a best-effort fallback
+        }
+      }
+    } catch (fallbackError: any) {
+      CloudRunnerLogger.logWarning(
+        `Error checking pod status for log file fallback: ${fallbackError?.message || fallbackError}`,
+      );
+      // Continue with existing output - this is a best-effort fallback
+    }
+
    // Filter out kubectl error messages from the final output
    // These errors can be added via stderr even when kubectl fails
    // We filter them out so they don't pollute the BuildResults
--- a/src/model/cloud-runner/remote-client/caching.ts
+++ b/src/model/cloud-runner/remote-client/caching.ts
@ -110,8 +110,30 @@ export class Caching {
            CloudRunnerLogger.log(`Cleanup completed. Checking disk space again...`);
            const diskCheckAfter = await CloudRunnerSystem.Run(`df . 2>/dev/null || df /data 2>/dev/null || true`);
            CloudRunnerLogger.log(`Disk space after cleanup: ${diskCheckAfter}`);
+
+            // Check disk usage again after cleanup
+            let diskUsageAfterCleanup = 0;
+            try {
+              const usageMatchAfter = diskCheckAfter.match(/(\d+)%/);
+              if (usageMatchAfter) {
+                diskUsageAfterCleanup = Number.parseInt(usageMatchAfter[1], 10);
+              }
+            } catch {
+              // Ignore parsing errors
+            }
+
+            // If disk is still at 100% after cleanup, skip tar operation to prevent hang
+            if (diskUsageAfterCleanup >= 100) {
+              throw new Error(
+                `Cannot create cache archive: disk is still at ${diskUsageAfterCleanup}% after cleanup. Tar operation would hang. Please free up disk space manually.`,
+              );
+            }
          }
        } catch (cleanupError) {
+          // If cleanupError is our disk space error, rethrow it
+          if (cleanupError instanceof Error && cleanupError.message.includes('Cannot create cache archive')) {
+            throw cleanupError;
+          }
          CloudRunnerLogger.log(`Proactive cleanup failed: ${cleanupError}`);
        }
      }
@ -124,13 +146,32 @@ export class Caching {
      }

      try {
-        await CloudRunnerSystem.Run(
-          `tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${path.basename(sourceFolder)}"`,
-        );
+        // Add timeout to tar command to prevent hanging when disk is full
+        // Use timeout command with 10 minute limit (600 seconds) if available
+        // Check if timeout command exists, otherwise use regular tar
+        const tarCommand = `tar -cf ${cacheArtifactName}.tar${compressionSuffix} "${path.basename(sourceFolder)}"`;
+        let tarCommandToRun = tarCommand;
+        try {
+          // Check if timeout command is available
+          await CloudRunnerSystem.Run(`which timeout > /dev/null 2>&1`, true, true);
+          // Use timeout if available (600 seconds = 10 minutes)
+          tarCommandToRun = `timeout 600 ${tarCommand}`;
+        } catch {
+          // timeout command not available, use regular tar
+          // Note: This could still hang if disk is full, but the disk space check above should prevent this
+          tarCommandToRun = tarCommand;
+        }
+
+        await CloudRunnerSystem.Run(tarCommandToRun);
      } catch (error: any) {
-        // Check if error is due to disk space
+        // Check if error is due to disk space or timeout
        const errorMessage = error?.message || error?.toString() || '';
-        if (errorMessage.includes('No space left') || errorMessage.includes('Wrote only')) {
+        if (
+          errorMessage.includes('No space left') ||
+          errorMessage.includes('Wrote only') ||
+          errorMessage.includes('timeout') ||
+          errorMessage.includes('Terminated')
+        ) {
          CloudRunnerLogger.log(`Disk space error detected. Attempting aggressive cleanup...`);

          // Try to clean up old cache files more aggressively