pr feedback

cloud-runner-develop
Frostebite 2025-12-06 02:15:50 +00:00
parent a9c76d0324
commit f61478ba77
4 changed files with 42 additions and 5 deletions

23
dist/index.js vendored
View File

@ -3940,7 +3940,9 @@ class KubernetesJobSpecFactory {
},
};
}
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '10Gi';
// Set ephemeral-storage request to a reasonable value (2Gi) to prevent evictions
// The node needs some free space, so requesting 10Gi when node only has ~2.6GB available causes evictions
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '2Gi';
return job;
}
}
@ -4410,8 +4412,9 @@ class KubernetesTaskRunner {
while (true) {
await new Promise((resolve) => setTimeout(resolve, 3000));
cloud_runner_logger_1.default.log(`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${cloud_runner_1.default.buildParameters.kubeVolumeSize}/${cloud_runner_1.default.buildParameters.containerCpu}/${cloud_runner_1.default.buildParameters.containerMemory}`);
const isRunning = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
let extraFlags = ``;
extraFlags += (await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient))
extraFlags += isRunning
? ` -f -c ${containerName} -n ${namespace}`
: ` --previous -n ${namespace}`;
const callback = (outputChunk) => {
@ -4428,6 +4431,17 @@ class KubernetesTaskRunner {
await new Promise((resolve) => setTimeout(resolve, 3000));
const continueStreaming = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
cloud_runner_logger_1.default.log(`K8s logging error ${error} ${continueStreaming}`);
// If pod is not running and we tried --previous but it failed, try without --previous
if (!isRunning && !continueStreaming && error?.message?.includes('previous terminated container')) {
cloud_runner_logger_1.default.log(`Previous container not found, trying current container logs...`);
try {
await cloud_runner_system_1.CloudRunnerSystem.Run(`kubectl logs ${podName} -c ${containerName} -n ${namespace}`, false, true, callback);
}
catch (fallbackError) {
cloud_runner_logger_1.default.log(`Fallback log fetch also failed: ${fallbackError}`);
// If both fail, continue - we'll get what we can from pod status
}
}
if (continueStreaming) {
continue;
}
@ -4435,6 +4449,11 @@ class KubernetesTaskRunner {
retriesAfterFinish++;
continue;
}
// Don't throw if we're just missing previous container logs - this is non-fatal
if (error?.message?.includes('previous terminated container')) {
cloud_runner_logger_1.default.logWarning(`Could not fetch previous container logs, but continuing...`);
break;
}
throw error;
}
if (follow_log_stream_service_1.FollowLogStreamService.DidReceiveEndOfTransmission) {

2
dist/index.js.map vendored

File diff suppressed because one or more lines are too long

View File

@ -145,7 +145,9 @@ class KubernetesJobSpecFactory {
};
}
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '10Gi';
// Set ephemeral-storage request to a reasonable value (2Gi) to prevent evictions
// The node needs some free space, so requesting 10Gi when node only has ~2.6GB available causes evictions
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '2Gi';
return job;
}

View File

@ -28,8 +28,9 @@ class KubernetesTaskRunner {
CloudRunnerLogger.log(
`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${CloudRunner.buildParameters.kubeVolumeSize}/${CloudRunner.buildParameters.containerCpu}/${CloudRunner.buildParameters.containerMemory}`,
);
const isRunning = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
let extraFlags = ``;
extraFlags += (await KubernetesPods.IsPodRunning(podName, namespace, kubeClient))
extraFlags += isRunning
? ` -f -c ${containerName} -n ${namespace}`
: ` --previous -n ${namespace}`;
@ -52,6 +53,16 @@ class KubernetesTaskRunner {
await new Promise((resolve) => setTimeout(resolve, 3000));
const continueStreaming = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
CloudRunnerLogger.log(`K8s logging error ${error} ${continueStreaming}`);
// If pod is not running and we tried --previous but it failed, try without --previous
if (!isRunning && !continueStreaming && error?.message?.includes('previous terminated container')) {
CloudRunnerLogger.log(`Previous container not found, trying current container logs...`);
try {
await CloudRunnerSystem.Run(`kubectl logs ${podName} -c ${containerName} -n ${namespace}`, false, true, callback);
} catch (fallbackError: any) {
CloudRunnerLogger.log(`Fallback log fetch also failed: ${fallbackError}`);
// If both fail, continue - we'll get what we can from pod status
}
}
if (continueStreaming) {
continue;
}
@ -60,6 +71,11 @@ class KubernetesTaskRunner {
continue;
}
// Don't throw if we're just missing previous container logs - this is non-fatal
if (error?.message?.includes('previous terminated container')) {
CloudRunnerLogger.logWarning(`Could not fetch previous container logs, but continuing...`);
break;
}
throw error;
}
if (FollowLogStreamService.DidReceiveEndOfTransmission) {