pr feedback

cloud-runner-develop
Frostebite 2025-12-06 02:15:50 +00:00
parent a9c76d0324
commit f61478ba77
4 changed files with 42 additions and 5 deletions

23
dist/index.js vendored
View File

@ -3940,7 +3940,9 @@ class KubernetesJobSpecFactory {
}, },
}; };
} }
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '10Gi'; // Set ephemeral-storage request to a reasonable value (2Gi) to prevent evictions
// The node needs some free space, so requesting 10Gi when node only has ~2.6GB available causes evictions
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '2Gi';
return job; return job;
} }
} }
@ -4410,8 +4412,9 @@ class KubernetesTaskRunner {
while (true) { while (true) {
await new Promise((resolve) => setTimeout(resolve, 3000)); await new Promise((resolve) => setTimeout(resolve, 3000));
cloud_runner_logger_1.default.log(`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${cloud_runner_1.default.buildParameters.kubeVolumeSize}/${cloud_runner_1.default.buildParameters.containerCpu}/${cloud_runner_1.default.buildParameters.containerMemory}`); cloud_runner_logger_1.default.log(`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${cloud_runner_1.default.buildParameters.kubeVolumeSize}/${cloud_runner_1.default.buildParameters.containerCpu}/${cloud_runner_1.default.buildParameters.containerMemory}`);
const isRunning = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
let extraFlags = ``; let extraFlags = ``;
extraFlags += (await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient)) extraFlags += isRunning
? ` -f -c ${containerName} -n ${namespace}` ? ` -f -c ${containerName} -n ${namespace}`
: ` --previous -n ${namespace}`; : ` --previous -n ${namespace}`;
const callback = (outputChunk) => { const callback = (outputChunk) => {
@ -4428,6 +4431,17 @@ class KubernetesTaskRunner {
await new Promise((resolve) => setTimeout(resolve, 3000)); await new Promise((resolve) => setTimeout(resolve, 3000));
const continueStreaming = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient); const continueStreaming = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
cloud_runner_logger_1.default.log(`K8s logging error ${error} ${continueStreaming}`); cloud_runner_logger_1.default.log(`K8s logging error ${error} ${continueStreaming}`);
// If pod is not running and we tried --previous but it failed, try without --previous
if (!isRunning && !continueStreaming && error?.message?.includes('previous terminated container')) {
cloud_runner_logger_1.default.log(`Previous container not found, trying current container logs...`);
try {
await cloud_runner_system_1.CloudRunnerSystem.Run(`kubectl logs ${podName} -c ${containerName} -n ${namespace}`, false, true, callback);
}
catch (fallbackError) {
cloud_runner_logger_1.default.log(`Fallback log fetch also failed: ${fallbackError}`);
// If both fail, continue - we'll get what we can from pod status
}
}
if (continueStreaming) { if (continueStreaming) {
continue; continue;
} }
@ -4435,6 +4449,11 @@ class KubernetesTaskRunner {
retriesAfterFinish++; retriesAfterFinish++;
continue; continue;
} }
// Don't throw if we're just missing previous container logs - this is non-fatal
if (error?.message?.includes('previous terminated container')) {
cloud_runner_logger_1.default.logWarning(`Could not fetch previous container logs, but continuing...`);
break;
}
throw error; throw error;
} }
if (follow_log_stream_service_1.FollowLogStreamService.DidReceiveEndOfTransmission) { if (follow_log_stream_service_1.FollowLogStreamService.DidReceiveEndOfTransmission) {

2
dist/index.js.map vendored

File diff suppressed because one or more lines are too long

View File

@ -145,7 +145,9 @@ class KubernetesJobSpecFactory {
}; };
} }
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '10Gi'; // Set ephemeral-storage request to a reasonable value (2Gi) to prevent evictions
// The node needs some free space, so requesting 10Gi when node only has ~2.6GB available causes evictions
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '2Gi';
return job; return job;
} }

View File

@ -28,8 +28,9 @@ class KubernetesTaskRunner {
CloudRunnerLogger.log( CloudRunnerLogger.log(
`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${CloudRunner.buildParameters.kubeVolumeSize}/${CloudRunner.buildParameters.containerCpu}/${CloudRunner.buildParameters.containerMemory}`, `Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${CloudRunner.buildParameters.kubeVolumeSize}/${CloudRunner.buildParameters.containerCpu}/${CloudRunner.buildParameters.containerMemory}`,
); );
const isRunning = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
let extraFlags = ``; let extraFlags = ``;
extraFlags += (await KubernetesPods.IsPodRunning(podName, namespace, kubeClient)) extraFlags += isRunning
? ` -f -c ${containerName} -n ${namespace}` ? ` -f -c ${containerName} -n ${namespace}`
: ` --previous -n ${namespace}`; : ` --previous -n ${namespace}`;
@ -52,6 +53,16 @@ class KubernetesTaskRunner {
await new Promise((resolve) => setTimeout(resolve, 3000)); await new Promise((resolve) => setTimeout(resolve, 3000));
const continueStreaming = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient); const continueStreaming = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
CloudRunnerLogger.log(`K8s logging error ${error} ${continueStreaming}`); CloudRunnerLogger.log(`K8s logging error ${error} ${continueStreaming}`);
// If pod is not running and we tried --previous but it failed, try without --previous
if (!isRunning && !continueStreaming && error?.message?.includes('previous terminated container')) {
CloudRunnerLogger.log(`Previous container not found, trying current container logs...`);
try {
await CloudRunnerSystem.Run(`kubectl logs ${podName} -c ${containerName} -n ${namespace}`, false, true, callback);
} catch (fallbackError: any) {
CloudRunnerLogger.log(`Fallback log fetch also failed: ${fallbackError}`);
// If both fail, continue - we'll get what we can from pod status
}
}
if (continueStreaming) { if (continueStreaming) {
continue; continue;
} }
@ -60,6 +71,11 @@ class KubernetesTaskRunner {
continue; continue;
} }
// Don't throw if we're just missing previous container logs - this is non-fatal
if (error?.message?.includes('previous terminated container')) {
CloudRunnerLogger.logWarning(`Could not fetch previous container logs, but continuing...`);
break;
}
throw error; throw error;
} }
if (FollowLogStreamService.DidReceiveEndOfTransmission) { if (FollowLogStreamService.DidReceiveEndOfTransmission) {