import { CoreV1Api, KubeConfig } from '@kubernetes/client-node'; import CloudRunnerLogger from '../../services/core/cloud-runner-logger'; import { waitUntil } from 'async-wait-until'; import { CloudRunnerSystem } from '../../services/core/cloud-runner-system'; import CloudRunner from '../../cloud-runner'; import KubernetesPods from './kubernetes-pods'; import { FollowLogStreamService } from '../../services/core/follow-log-stream-service'; class KubernetesTaskRunner { static readonly maxRetry: number = 3; static lastReceivedMessage: string = ``; static async runTask( kubeConfig: KubeConfig, kubeClient: CoreV1Api, jobName: string, podName: string, containerName: string, namespace: string, ) { let output = ''; let shouldReadLogs = true; let shouldCleanup = true; let retriesAfterFinish = 0; let kubectlLogsFailedCount = 0; const maxKubectlLogsFailures = 3; // eslint-disable-next-line no-constant-condition while (true) { await new Promise((resolve) => setTimeout(resolve, 3000)); CloudRunnerLogger.log( `Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${CloudRunner.buildParameters.kubeVolumeSize}/${CloudRunner.buildParameters.containerCpu}/${CloudRunner.buildParameters.containerMemory}`, ); const isRunning = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient); const callback = (outputChunk: string) => { // Filter out kubectl error messages about being unable to retrieve container logs // These errors pollute the output and don't contain useful information const lowerChunk = outputChunk.toLowerCase(); if (lowerChunk.includes('unable to retrieve container logs')) { CloudRunnerLogger.log(`Filtered kubectl error: ${outputChunk.trim()}`); return; } output += outputChunk; // split output chunk and handle per line for (const chunk of outputChunk.split(`\n`)) { // Skip empty chunks and kubectl error messages (case-insensitive) const lowerChunk = chunk.toLowerCase(); if (chunk.trim() && !lowerChunk.includes('unable to retrieve container logs')) { ({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration( chunk, shouldReadLogs, shouldCleanup, output, )); } } }; try { // Always specify container name explicitly to avoid containerd:// errors // Use -f for running pods, --previous for terminated pods await CloudRunnerSystem.Run( `kubectl logs ${podName} -c ${containerName} -n ${namespace}${isRunning ? ' -f' : ' --previous'}`, false, true, callback, ); // Reset failure count on success kubectlLogsFailedCount = 0; } catch (error: any) { kubectlLogsFailedCount++; await new Promise((resolve) => setTimeout(resolve, 3000)); const continueStreaming = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient); CloudRunnerLogger.log(`K8s logging error ${error} ${continueStreaming}`); // Filter out kubectl error messages from the error output const errorMessage = error?.message || error?.toString() || ''; const isKubectlLogsError = errorMessage.includes('unable to retrieve container logs for containerd://') || errorMessage.toLowerCase().includes('unable to retrieve container logs'); if (isKubectlLogsError) { CloudRunnerLogger.log( `Kubectl unable to retrieve logs, attempt ${kubectlLogsFailedCount}/${maxKubectlLogsFailures}`, ); // If kubectl logs has failed multiple times, try reading the log file directly from the pod // This works even if the pod is terminated, as long as it hasn't been deleted if (kubectlLogsFailedCount >= maxKubectlLogsFailures && !isRunning && !continueStreaming) { CloudRunnerLogger.log(`Attempting to read log file directly from pod as fallback...`); try { // Try to read the log file from the pod // Use kubectl exec for running pods, or try to access via PVC if pod is terminated let logFileContent = ''; if (isRunning) { // Pod is still running, try exec logFileContent = await CloudRunnerSystem.Run( `kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /home/job-log.txt 2>/dev/null || echo ""`, true, true, ); } else { // Pod is terminated, try to create a temporary pod to read from the PVC // First, check if we can still access the pod's filesystem CloudRunnerLogger.log(`Pod is terminated, attempting to read log file via temporary pod...`); // For terminated pods, we might not be able to exec, so we'll skip this fallback // and rely on the log file being written to the PVC (if mounted) CloudRunnerLogger.logWarning(`Cannot read log file from terminated pod via exec`); } if (logFileContent && logFileContent.trim()) { CloudRunnerLogger.log(`Successfully read log file from pod (${logFileContent.length} chars)`); // Process the log file content line by line for (const line of logFileContent.split(`\n`)) { const lowerLine = line.toLowerCase(); if (line.trim() && !lowerLine.includes('unable to retrieve container logs')) { ({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration( line, shouldReadLogs, shouldCleanup, output, )); } } // Check if we got the end of transmission marker if (FollowLogStreamService.DidReceiveEndOfTransmission) { CloudRunnerLogger.log('end of log stream (from log file)'); break; } } else { CloudRunnerLogger.logWarning(`Log file read returned empty content, continuing with available logs`); // If we can't read the log file, break out of the loop to return whatever logs we have // This prevents infinite retries when kubectl logs consistently fails break; } } catch (execError: any) { CloudRunnerLogger.logWarning(`Failed to read log file from pod: ${execError}`); // If we've exhausted all options, break to return whatever logs we have break; } } } // If pod is not running and we tried --previous but it failed, try without --previous if (!isRunning && !continueStreaming && error?.message?.includes('previous terminated container')) { CloudRunnerLogger.log(`Previous container not found, trying current container logs...`); try { await CloudRunnerSystem.Run( `kubectl logs ${podName} -c ${containerName} -n ${namespace}`, false, true, callback, ); // If we successfully got logs, check for end of transmission if (FollowLogStreamService.DidReceiveEndOfTransmission) { CloudRunnerLogger.log('end of log stream'); break; } // If we got logs but no end marker, continue trying (might be more logs) if (retriesAfterFinish < KubernetesTaskRunner.maxRetry) { retriesAfterFinish++; continue; } // If we've exhausted retries, break break; } catch (fallbackError: any) { CloudRunnerLogger.log(`Fallback log fetch also failed: ${fallbackError}`); // If both fail, continue retrying if we haven't exhausted retries if (retriesAfterFinish < KubernetesTaskRunner.maxRetry) { retriesAfterFinish++; continue; } // Only break if we've exhausted all retries CloudRunnerLogger.logWarning( `Could not fetch any container logs after ${KubernetesTaskRunner.maxRetry} retries`, ); break; } } if (continueStreaming) { continue; } if (retriesAfterFinish < KubernetesTaskRunner.maxRetry) { retriesAfterFinish++; continue; } // If we've exhausted retries and it's not a previous container issue, throw if (!error?.message?.includes('previous terminated container')) { throw error; } // For previous container errors, we've already tried fallback, so just break CloudRunnerLogger.logWarning( `Could not fetch previous container logs after retries, but continuing with available logs`, ); break; } if (FollowLogStreamService.DidReceiveEndOfTransmission) { CloudRunnerLogger.log('end of log stream'); break; } } // After kubectl logs loop ends, read log file as fallback to capture any messages // written after kubectl stopped reading (e.g., "Collected Logs" from post-build) // This ensures all log messages are included in BuildResults for test assertions // If output is empty, we need to be more aggressive about getting logs const needsFallback = output.trim().length === 0; const missingCollectedLogs = !output.includes('Collected Logs'); if (needsFallback) { CloudRunnerLogger.log('Output is empty, attempting aggressive log collection fallback...'); // Give the pod a moment to finish writing logs before we try to read them await new Promise((resolve) => setTimeout(resolve, 5000)); } // Always try fallback if output is empty, if pod is terminated, or if "Collected Logs" is missing // The "Collected Logs" check ensures we try to get post-build messages even if we have some output try { const isPodStillRunning = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient); const shouldTryFallback = !isPodStillRunning || needsFallback || missingCollectedLogs; if (shouldTryFallback) { const reason = needsFallback ? 'output is empty' : missingCollectedLogs ? 'Collected Logs missing from output' : 'pod is terminated'; CloudRunnerLogger.log( `Pod is ${isPodStillRunning ? 'running' : 'terminated'} and ${reason}, reading log file as fallback...`, ); try { // Try to read the log file from the pod // For killed pods (OOM), kubectl exec might not work, so we try multiple approaches // First try --previous flag for terminated containers, then try without it let logFileContent = ''; // Try multiple approaches to get the log file // Order matters: try terminated container first, then current, then PVC, then kubectl logs as last resort // For K8s, the PVC is mounted at /data, so try reading from there too const attempts = [ // For terminated pods, try --previous first `kubectl exec ${podName} -c ${containerName} -n ${namespace} --previous -- cat /home/job-log.txt 2>/dev/null || echo ""`, // Try current container `kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /home/job-log.txt 2>/dev/null || echo ""`, // Try reading from PVC (/data) in case log was copied there `kubectl exec ${podName} -c ${containerName} -n ${namespace} --previous -- cat /data/job-log.txt 2>/dev/null || echo ""`, `kubectl exec ${podName} -c ${containerName} -n ${namespace} -- cat /data/job-log.txt 2>/dev/null || echo ""`, // Try kubectl logs as fallback (might capture stdout even if exec fails) `kubectl logs ${podName} -c ${containerName} -n ${namespace} --previous 2>/dev/null || echo ""`, `kubectl logs ${podName} -c ${containerName} -n ${namespace} 2>/dev/null || echo ""`, ]; for (const attempt of attempts) { // If we already have content with "Collected Logs", no need to try more if (logFileContent && logFileContent.trim() && logFileContent.includes('Collected Logs')) { CloudRunnerLogger.log('Found "Collected Logs" in fallback content, stopping attempts.'); break; } try { CloudRunnerLogger.log(`Trying fallback method: ${attempt.substring(0, 80)}...`); const result = await CloudRunnerSystem.Run(attempt, true, true); if (result && result.trim()) { // Prefer content that has "Collected Logs" over content that doesn't if (!logFileContent || !logFileContent.includes('Collected Logs')) { logFileContent = result; CloudRunnerLogger.log( `Successfully read logs using fallback method (${logFileContent.length} chars): ${attempt.substring( 0, 50, )}...`, ); // If this content has "Collected Logs", we're done if (logFileContent.includes('Collected Logs')) { CloudRunnerLogger.log('Fallback method successfully captured "Collected Logs".'); break; } } else { CloudRunnerLogger.log(`Skipping this result - already have content with "Collected Logs".`); } } else { CloudRunnerLogger.log(`Fallback method returned empty result: ${attempt.substring(0, 50)}...`); } } catch (attemptError: any) { CloudRunnerLogger.log( `Fallback method failed: ${attempt.substring(0, 50)}... Error: ${ attemptError?.message || attemptError }`, ); // Continue to next attempt } } if (!logFileContent || !logFileContent.trim()) { CloudRunnerLogger.logWarning( 'Could not read log file from pod after all fallback attempts (may be OOM-killed or pod not accessible).', ); } if (logFileContent && logFileContent.trim()) { CloudRunnerLogger.log( `Read log file from pod as fallback (${logFileContent.length} chars) to capture missing messages`, ); // Get the lines we already have in output to avoid duplicates const existingLines = new Set(output.split('\n').map((line) => line.trim())); // Process the log file content line by line and add missing lines for (const line of logFileContent.split(`\n`)) { const trimmedLine = line.trim(); const lowerLine = trimmedLine.toLowerCase(); // Skip empty lines, kubectl errors, and lines we already have if ( trimmedLine && !lowerLine.includes('unable to retrieve container logs') && !existingLines.has(trimmedLine) ) { // Process through FollowLogStreamService - it will append to output // Don't add to output manually since handleIteration does it ({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration( trimmedLine, shouldReadLogs, shouldCleanup, output, )); } } } } catch (logFileError: any) { CloudRunnerLogger.logWarning( `Could not read log file from pod as fallback: ${logFileError?.message || logFileError}`, ); // Continue with existing output - this is a best-effort fallback } } // If output is still empty or missing "Collected Logs" after fallback attempts, add a warning message // This ensures BuildResults is not completely empty, which would cause test failures if ((needsFallback && output.trim().length === 0) || (!output.includes('Collected Logs') && shouldTryFallback)) { CloudRunnerLogger.logWarning( 'Could not retrieve "Collected Logs" from pod after all attempts. Pod may have been killed before logs were written.', ); // Add a minimal message so BuildResults is not completely empty // This helps with debugging and prevents test failures due to empty results if (output.trim().length === 0) { output = 'Pod logs unavailable - pod may have been terminated before logs could be collected.\n'; } else if (!output.includes('Collected Logs')) { // We have some output but missing "Collected Logs" - append the fallback message output += '\nPod logs incomplete - "Collected Logs" marker not found. Pod may have been terminated before post-build completed.\n'; } } } catch (fallbackError: any) { CloudRunnerLogger.logWarning( `Error checking pod status for log file fallback: ${fallbackError?.message || fallbackError}`, ); // If output is empty and we hit an error, still add a message so BuildResults isn't empty if (needsFallback && output.trim().length === 0) { output = `Error retrieving logs: ${fallbackError?.message || fallbackError}\n`; } // Continue with existing output - this is a best-effort fallback } // Filter out kubectl error messages from the final output // These errors can be added via stderr even when kubectl fails // We filter them out so they don't pollute the BuildResults const lines = output.split('\n'); const filteredLines = lines.filter((line) => !line.toLowerCase().includes('unable to retrieve container logs')); const filteredOutput = filteredLines.join('\n'); // Log if we filtered out significant content const originalLineCount = lines.length; const filteredLineCount = filteredLines.length; if (originalLineCount > filteredLineCount) { CloudRunnerLogger.log( `Filtered out ${originalLineCount - filteredLineCount} kubectl error message(s) from output`, ); } return filteredOutput; } static async watchUntilPodRunning(kubeClient: CoreV1Api, podName: string, namespace: string) { let waitComplete: boolean = false; let message = ``; let lastPhase = ''; let consecutivePendingCount = 0; CloudRunnerLogger.log(`Watching ${podName} ${namespace}`); try { await waitUntil( async () => { const status = await kubeClient.readNamespacedPodStatus(podName, namespace); const phase = status?.body.status?.phase || 'Unknown'; const conditions = status?.body.status?.conditions || []; const containerStatuses = status?.body.status?.containerStatuses || []; // Log phase changes if (phase !== lastPhase) { CloudRunnerLogger.log(`Pod ${podName} phase changed: ${lastPhase} -> ${phase}`); lastPhase = phase; consecutivePendingCount = 0; } // Check for failure conditions that mean the pod will never start (permanent failures) // Note: We don't treat "Failed" phase as a permanent failure because the pod might have // completed its work before being killed (OOM), and we should still try to get logs const permanentFailureReasons = [ 'Unschedulable', 'ImagePullBackOff', 'ErrImagePull', 'CreateContainerError', 'CreateContainerConfigError', ]; const hasPermanentFailureCondition = conditions.some((condition: any) => permanentFailureReasons.some((reason) => condition.reason?.includes(reason)), ); const hasPermanentFailureContainerStatus = containerStatuses.some((containerStatus: any) => permanentFailureReasons.some((reason) => containerStatus.state?.waiting?.reason?.includes(reason)), ); // Only treat permanent failures as errors - pods that completed (Failed/Succeeded) should continue if (hasPermanentFailureCondition || hasPermanentFailureContainerStatus) { // Get detailed failure information const failureCondition = conditions.find((condition: any) => permanentFailureReasons.some((reason) => condition.reason?.includes(reason)), ); const failureContainer = containerStatuses.find((containerStatus: any) => permanentFailureReasons.some((reason) => containerStatus.state?.waiting?.reason?.includes(reason)), ); message = `Pod ${podName} failed to start (permanent failure):\nPhase: ${phase}\n`; if (failureCondition) { message += `Condition Reason: ${failureCondition.reason}\nCondition Message: ${failureCondition.message}\n`; } if (failureContainer) { message += `Container Reason: ${failureContainer.state?.waiting?.reason}\nContainer Message: ${failureContainer.state?.waiting?.message}\n`; } // Log pod events for additional context try { const events = await kubeClient.listNamespacedEvent(namespace); const podEvents = events.body.items .filter((x) => x.involvedObject?.name === podName) .map((x) => ({ message: x.message || ``, reason: x.reason || ``, type: x.type || ``, })); if (podEvents.length > 0) { message += `\nRecent Events:\n${JSON.stringify(podEvents.slice(-5), undefined, 2)}`; } } catch (eventError) { // Ignore event fetch errors } CloudRunnerLogger.logWarning(message); // For permanent failures, mark as incomplete and store the error message // We'll throw an error after the wait loop exits waitComplete = false; return true; // Return true to exit wait loop } // Pod is complete if it's not Pending or Unknown - it might be Running, Succeeded, or Failed // For Failed/Succeeded pods, we still want to try to get logs, so we mark as complete waitComplete = phase !== 'Pending' && phase !== 'Unknown'; // If pod completed (Succeeded/Failed), log it but don't throw - we'll try to get logs if (waitComplete && phase !== 'Running') { CloudRunnerLogger.log(`Pod ${podName} completed with phase: ${phase}. Will attempt to retrieve logs.`); } if (phase === 'Pending') { consecutivePendingCount++; // Check for scheduling failures in events (faster than waiting for conditions) try { const events = await kubeClient.listNamespacedEvent(namespace); const podEvents = events.body.items.filter((x) => x.involvedObject?.name === podName); const failedSchedulingEvents = podEvents.filter( (x) => x.reason === 'FailedScheduling' || x.reason === 'SchedulingGated', ); if (failedSchedulingEvents.length > 0) { const schedulingMessage = failedSchedulingEvents .map((x) => `${x.reason}: ${x.message || ''}`) .join('; '); message = `Pod ${podName} cannot be scheduled:\n${schedulingMessage}`; CloudRunnerLogger.logWarning(message); waitComplete = false; return true; // Exit wait loop to throw error } // Check if pod is actively pulling an image - if so, allow more time const isPullingImage = podEvents.some( (x) => x.reason === 'Pulling' || x.reason === 'Pulled' || x.message?.includes('Pulling image'), ); const hasImagePullError = podEvents.some( (x) => x.reason === 'Failed' && (x.message?.includes('pull') || x.message?.includes('image')), ); if (hasImagePullError) { message = `Pod ${podName} failed to pull image. Check image availability and credentials.`; CloudRunnerLogger.logWarning(message); waitComplete = false; return true; // Exit wait loop to throw error } // If actively pulling image, reset pending count to allow more time // Large images (like Unity 3.9GB) can take 3-5 minutes to pull if (isPullingImage && consecutivePendingCount > 4) { CloudRunnerLogger.log( `Pod ${podName} is pulling image (check ${consecutivePendingCount}). This may take several minutes for large images.`, ); // Don't increment consecutivePendingCount if we're actively pulling consecutivePendingCount = Math.max(4, consecutivePendingCount - 1); } } catch { // Ignore event fetch errors } // For tests, allow more time if image is being pulled (large images need 5+ minutes) // Otherwise fail faster if stuck in Pending (2 minutes = 8 checks at 15s interval) const isTest = process.env['cloudRunnerTests'] === 'true'; const isPullingImage = containerStatuses.some( (cs: any) => cs.state?.waiting?.reason === 'ImagePull' || cs.state?.waiting?.reason === 'ErrImagePull', ) || conditions.some((c: any) => c.reason?.includes('Pulling')); // Allow up to 20 minutes for image pulls in tests (80 checks), 2 minutes otherwise const maxPendingChecks = isTest && isPullingImage ? 80 : isTest ? 8 : 80; if (consecutivePendingCount >= maxPendingChecks) { message = `Pod ${podName} stuck in Pending state for too long (${consecutivePendingCount} checks). This indicates a scheduling problem.`; // Get events for context try { const events = await kubeClient.listNamespacedEvent(namespace); const podEvents = events.body.items .filter((x) => x.involvedObject?.name === podName) .slice(-10) .map((x) => `${x.type}: ${x.reason} - ${x.message}`); if (podEvents.length > 0) { message += `\n\nRecent Events:\n${podEvents.join('\n')}`; } // Get pod details to check for scheduling issues try { const podStatus = await kubeClient.readNamespacedPodStatus(podName, namespace); const podSpec = podStatus.body.spec; const podStatusDetails = podStatus.body.status; // Check container resource requests if (podSpec?.containers?.[0]?.resources?.requests) { const requests = podSpec.containers[0].resources.requests; message += `\n\nContainer Resource Requests:\n CPU: ${requests.cpu || 'not set'}\n Memory: ${ requests.memory || 'not set' }\n Ephemeral Storage: ${requests['ephemeral-storage'] || 'not set'}`; } // Check node selector and tolerations if (podSpec?.nodeSelector && Object.keys(podSpec.nodeSelector).length > 0) { message += `\n\nNode Selector: ${JSON.stringify(podSpec.nodeSelector)}`; } if (podSpec?.tolerations && podSpec.tolerations.length > 0) { message += `\n\nTolerations: ${JSON.stringify(podSpec.tolerations)}`; } // Check pod conditions for scheduling issues if (podStatusDetails?.conditions) { const unschedulable = podStatusDetails.conditions.find( (c: any) => c.type === 'PodScheduled' && c.status === 'False', ); if (unschedulable) { message += `\n\nScheduling Issue: ${unschedulable.reason || 'Unknown'} - ${ unschedulable.message || 'No message' }`; } } } catch (podStatusError) { // Ignore pod status fetch errors } } catch { // Ignore event fetch errors } CloudRunnerLogger.logWarning(message); waitComplete = false; return true; // Exit wait loop to throw error } // Log diagnostic info every 4 checks (1 minute) if still pending if (consecutivePendingCount % 4 === 0) { const pendingMessage = `Pod ${podName} still Pending (check ${consecutivePendingCount}/${maxPendingChecks}). Phase: ${phase}`; const conditionMessages = conditions .map((c: any) => `${c.type}: ${c.reason || 'N/A'} - ${c.message || 'N/A'}`) .join('; '); CloudRunnerLogger.log(`${pendingMessage}. Conditions: ${conditionMessages || 'None'}`); // Log events periodically to help diagnose if (consecutivePendingCount % 8 === 0) { try { const events = await kubeClient.listNamespacedEvent(namespace); const podEvents = events.body.items .filter((x) => x.involvedObject?.name === podName) .slice(-3) .map((x) => `${x.type}: ${x.reason} - ${x.message}`) .join('; '); if (podEvents) { CloudRunnerLogger.log(`Recent pod events: ${podEvents}`); } } catch { // Ignore event fetch errors } } } } message = `Phase:${phase} \n Reason:${conditions[0]?.reason || ''} \n Message:${ conditions[0]?.message || '' }`; if (waitComplete || phase !== 'Pending') return true; return false; }, { timeout: process.env['cloudRunnerTests'] === 'true' ? 300000 : 2000000, // 5 minutes for tests, ~33 minutes for production intervalBetweenAttempts: 15000, // 15 seconds }, ); } catch (waitError: any) { // If waitUntil times out or throws, get final pod status try { const finalStatus = await kubeClient.readNamespacedPodStatus(podName, namespace); const phase = finalStatus?.body.status?.phase || 'Unknown'; const conditions = finalStatus?.body.status?.conditions || []; message = `Pod ${podName} timed out waiting to start.\nFinal Phase: ${phase}\n`; message += conditions.map((c: any) => `${c.type}: ${c.reason} - ${c.message}`).join('\n'); // Get events for context try { const events = await kubeClient.listNamespacedEvent(namespace); const podEvents = events.body.items .filter((x) => x.involvedObject?.name === podName) .slice(-5) .map((x) => `${x.type}: ${x.reason} - ${x.message}`); if (podEvents.length > 0) { message += `\n\nRecent Events:\n${podEvents.join('\n')}`; } } catch { // Ignore event fetch errors } CloudRunnerLogger.logWarning(message); } catch (statusError) { message = `Pod ${podName} timed out and could not retrieve final status: ${waitError?.message || waitError}`; CloudRunnerLogger.logWarning(message); } throw new Error(`Pod ${podName} failed to start within timeout. ${message}`); } // Only throw if we detected a permanent failure condition // If the pod completed (Failed/Succeeded), we should still try to get logs if (!waitComplete) { // Check the final phase to see if it's a permanent failure or just completed try { const finalStatus = await kubeClient.readNamespacedPodStatus(podName, namespace); const finalPhase = finalStatus?.body.status?.phase || 'Unknown'; if (finalPhase === 'Failed' || finalPhase === 'Succeeded') { CloudRunnerLogger.logWarning( `Pod ${podName} completed with phase ${finalPhase} before reaching Running state. Will attempt to retrieve logs.`, ); return true; // Allow workflow to continue and try to get logs } } catch { // If we can't check status, fall through to throw error } CloudRunnerLogger.logWarning(`Pod ${podName} did not reach running state: ${message}`); throw new Error(`Pod ${podName} did not start successfully: ${message}`); } return waitComplete; } } export default KubernetesTaskRunner;