pr feedback
parent
a9c76d0324
commit
f61478ba77
|
|
@ -3940,7 +3940,9 @@ class KubernetesJobSpecFactory {
|
|||
},
|
||||
};
|
||||
}
|
||||
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '10Gi';
|
||||
// Set ephemeral-storage request to a reasonable value (2Gi) to prevent evictions
|
||||
// The node needs some free space, so requesting 10Gi when node only has ~2.6GB available causes evictions
|
||||
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '2Gi';
|
||||
return job;
|
||||
}
|
||||
}
|
||||
|
|
@ -4410,8 +4412,9 @@ class KubernetesTaskRunner {
|
|||
while (true) {
|
||||
await new Promise((resolve) => setTimeout(resolve, 3000));
|
||||
cloud_runner_logger_1.default.log(`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${cloud_runner_1.default.buildParameters.kubeVolumeSize}/${cloud_runner_1.default.buildParameters.containerCpu}/${cloud_runner_1.default.buildParameters.containerMemory}`);
|
||||
const isRunning = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
|
||||
let extraFlags = ``;
|
||||
extraFlags += (await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient))
|
||||
extraFlags += isRunning
|
||||
? ` -f -c ${containerName} -n ${namespace}`
|
||||
: ` --previous -n ${namespace}`;
|
||||
const callback = (outputChunk) => {
|
||||
|
|
@ -4428,6 +4431,17 @@ class KubernetesTaskRunner {
|
|||
await new Promise((resolve) => setTimeout(resolve, 3000));
|
||||
const continueStreaming = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
|
||||
cloud_runner_logger_1.default.log(`K8s logging error ${error} ${continueStreaming}`);
|
||||
// If pod is not running and we tried --previous but it failed, try without --previous
|
||||
if (!isRunning && !continueStreaming && error?.message?.includes('previous terminated container')) {
|
||||
cloud_runner_logger_1.default.log(`Previous container not found, trying current container logs...`);
|
||||
try {
|
||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`kubectl logs ${podName} -c ${containerName} -n ${namespace}`, false, true, callback);
|
||||
}
|
||||
catch (fallbackError) {
|
||||
cloud_runner_logger_1.default.log(`Fallback log fetch also failed: ${fallbackError}`);
|
||||
// If both fail, continue - we'll get what we can from pod status
|
||||
}
|
||||
}
|
||||
if (continueStreaming) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -4435,6 +4449,11 @@ class KubernetesTaskRunner {
|
|||
retriesAfterFinish++;
|
||||
continue;
|
||||
}
|
||||
// Don't throw if we're just missing previous container logs - this is non-fatal
|
||||
if (error?.message?.includes('previous terminated container')) {
|
||||
cloud_runner_logger_1.default.logWarning(`Could not fetch previous container logs, but continuing...`);
|
||||
break;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
if (follow_log_stream_service_1.FollowLogStreamService.DidReceiveEndOfTransmission) {
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -145,7 +145,9 @@ class KubernetesJobSpecFactory {
|
|||
};
|
||||
}
|
||||
|
||||
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '10Gi';
|
||||
// Set ephemeral-storage request to a reasonable value (2Gi) to prevent evictions
|
||||
// The node needs some free space, so requesting 10Gi when node only has ~2.6GB available causes evictions
|
||||
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '2Gi';
|
||||
|
||||
return job;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,8 +28,9 @@ class KubernetesTaskRunner {
|
|||
CloudRunnerLogger.log(
|
||||
`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${CloudRunner.buildParameters.kubeVolumeSize}/${CloudRunner.buildParameters.containerCpu}/${CloudRunner.buildParameters.containerMemory}`,
|
||||
);
|
||||
const isRunning = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
|
||||
let extraFlags = ``;
|
||||
extraFlags += (await KubernetesPods.IsPodRunning(podName, namespace, kubeClient))
|
||||
extraFlags += isRunning
|
||||
? ` -f -c ${containerName} -n ${namespace}`
|
||||
: ` --previous -n ${namespace}`;
|
||||
|
||||
|
|
@ -52,6 +53,16 @@ class KubernetesTaskRunner {
|
|||
await new Promise((resolve) => setTimeout(resolve, 3000));
|
||||
const continueStreaming = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
|
||||
CloudRunnerLogger.log(`K8s logging error ${error} ${continueStreaming}`);
|
||||
// If pod is not running and we tried --previous but it failed, try without --previous
|
||||
if (!isRunning && !continueStreaming && error?.message?.includes('previous terminated container')) {
|
||||
CloudRunnerLogger.log(`Previous container not found, trying current container logs...`);
|
||||
try {
|
||||
await CloudRunnerSystem.Run(`kubectl logs ${podName} -c ${containerName} -n ${namespace}`, false, true, callback);
|
||||
} catch (fallbackError: any) {
|
||||
CloudRunnerLogger.log(`Fallback log fetch also failed: ${fallbackError}`);
|
||||
// If both fail, continue - we'll get what we can from pod status
|
||||
}
|
||||
}
|
||||
if (continueStreaming) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -60,6 +71,11 @@ class KubernetesTaskRunner {
|
|||
|
||||
continue;
|
||||
}
|
||||
// Don't throw if we're just missing previous container logs - this is non-fatal
|
||||
if (error?.message?.includes('previous terminated container')) {
|
||||
CloudRunnerLogger.logWarning(`Could not fetch previous container logs, but continuing...`);
|
||||
break;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
if (FollowLogStreamService.DidReceiveEndOfTransmission) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue