pr feedback
parent
a9c76d0324
commit
f61478ba77
|
|
@ -3940,7 +3940,9 @@ class KubernetesJobSpecFactory {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '10Gi';
|
// Set ephemeral-storage request to a reasonable value (2Gi) to prevent evictions
|
||||||
|
// The node needs some free space, so requesting 10Gi when node only has ~2.6GB available causes evictions
|
||||||
|
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '2Gi';
|
||||||
return job;
|
return job;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -4410,8 +4412,9 @@ class KubernetesTaskRunner {
|
||||||
while (true) {
|
while (true) {
|
||||||
await new Promise((resolve) => setTimeout(resolve, 3000));
|
await new Promise((resolve) => setTimeout(resolve, 3000));
|
||||||
cloud_runner_logger_1.default.log(`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${cloud_runner_1.default.buildParameters.kubeVolumeSize}/${cloud_runner_1.default.buildParameters.containerCpu}/${cloud_runner_1.default.buildParameters.containerMemory}`);
|
cloud_runner_logger_1.default.log(`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${cloud_runner_1.default.buildParameters.kubeVolumeSize}/${cloud_runner_1.default.buildParameters.containerCpu}/${cloud_runner_1.default.buildParameters.containerMemory}`);
|
||||||
|
const isRunning = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
|
||||||
let extraFlags = ``;
|
let extraFlags = ``;
|
||||||
extraFlags += (await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient))
|
extraFlags += isRunning
|
||||||
? ` -f -c ${containerName} -n ${namespace}`
|
? ` -f -c ${containerName} -n ${namespace}`
|
||||||
: ` --previous -n ${namespace}`;
|
: ` --previous -n ${namespace}`;
|
||||||
const callback = (outputChunk) => {
|
const callback = (outputChunk) => {
|
||||||
|
|
@ -4428,6 +4431,17 @@ class KubernetesTaskRunner {
|
||||||
await new Promise((resolve) => setTimeout(resolve, 3000));
|
await new Promise((resolve) => setTimeout(resolve, 3000));
|
||||||
const continueStreaming = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
|
const continueStreaming = await kubernetes_pods_1.default.IsPodRunning(podName, namespace, kubeClient);
|
||||||
cloud_runner_logger_1.default.log(`K8s logging error ${error} ${continueStreaming}`);
|
cloud_runner_logger_1.default.log(`K8s logging error ${error} ${continueStreaming}`);
|
||||||
|
// If pod is not running and we tried --previous but it failed, try without --previous
|
||||||
|
if (!isRunning && !continueStreaming && error?.message?.includes('previous terminated container')) {
|
||||||
|
cloud_runner_logger_1.default.log(`Previous container not found, trying current container logs...`);
|
||||||
|
try {
|
||||||
|
await cloud_runner_system_1.CloudRunnerSystem.Run(`kubectl logs ${podName} -c ${containerName} -n ${namespace}`, false, true, callback);
|
||||||
|
}
|
||||||
|
catch (fallbackError) {
|
||||||
|
cloud_runner_logger_1.default.log(`Fallback log fetch also failed: ${fallbackError}`);
|
||||||
|
// If both fail, continue - we'll get what we can from pod status
|
||||||
|
}
|
||||||
|
}
|
||||||
if (continueStreaming) {
|
if (continueStreaming) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -4435,6 +4449,11 @@ class KubernetesTaskRunner {
|
||||||
retriesAfterFinish++;
|
retriesAfterFinish++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
// Don't throw if we're just missing previous container logs - this is non-fatal
|
||||||
|
if (error?.message?.includes('previous terminated container')) {
|
||||||
|
cloud_runner_logger_1.default.logWarning(`Could not fetch previous container logs, but continuing...`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
if (follow_log_stream_service_1.FollowLogStreamService.DidReceiveEndOfTransmission) {
|
if (follow_log_stream_service_1.FollowLogStreamService.DidReceiveEndOfTransmission) {
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
|
|
@ -145,7 +145,9 @@ class KubernetesJobSpecFactory {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '10Gi';
|
// Set ephemeral-storage request to a reasonable value (2Gi) to prevent evictions
|
||||||
|
// The node needs some free space, so requesting 10Gi when node only has ~2.6GB available causes evictions
|
||||||
|
job.spec.template.spec.containers[0].resources.requests[`ephemeral-storage`] = '2Gi';
|
||||||
|
|
||||||
return job;
|
return job;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,8 +28,9 @@ class KubernetesTaskRunner {
|
||||||
CloudRunnerLogger.log(
|
CloudRunnerLogger.log(
|
||||||
`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${CloudRunner.buildParameters.kubeVolumeSize}/${CloudRunner.buildParameters.containerCpu}/${CloudRunner.buildParameters.containerMemory}`,
|
`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace} ${CloudRunner.buildParameters.kubeVolumeSize}/${CloudRunner.buildParameters.containerCpu}/${CloudRunner.buildParameters.containerMemory}`,
|
||||||
);
|
);
|
||||||
|
const isRunning = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
|
||||||
let extraFlags = ``;
|
let extraFlags = ``;
|
||||||
extraFlags += (await KubernetesPods.IsPodRunning(podName, namespace, kubeClient))
|
extraFlags += isRunning
|
||||||
? ` -f -c ${containerName} -n ${namespace}`
|
? ` -f -c ${containerName} -n ${namespace}`
|
||||||
: ` --previous -n ${namespace}`;
|
: ` --previous -n ${namespace}`;
|
||||||
|
|
||||||
|
|
@ -52,6 +53,16 @@ class KubernetesTaskRunner {
|
||||||
await new Promise((resolve) => setTimeout(resolve, 3000));
|
await new Promise((resolve) => setTimeout(resolve, 3000));
|
||||||
const continueStreaming = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
|
const continueStreaming = await KubernetesPods.IsPodRunning(podName, namespace, kubeClient);
|
||||||
CloudRunnerLogger.log(`K8s logging error ${error} ${continueStreaming}`);
|
CloudRunnerLogger.log(`K8s logging error ${error} ${continueStreaming}`);
|
||||||
|
// If pod is not running and we tried --previous but it failed, try without --previous
|
||||||
|
if (!isRunning && !continueStreaming && error?.message?.includes('previous terminated container')) {
|
||||||
|
CloudRunnerLogger.log(`Previous container not found, trying current container logs...`);
|
||||||
|
try {
|
||||||
|
await CloudRunnerSystem.Run(`kubectl logs ${podName} -c ${containerName} -n ${namespace}`, false, true, callback);
|
||||||
|
} catch (fallbackError: any) {
|
||||||
|
CloudRunnerLogger.log(`Fallback log fetch also failed: ${fallbackError}`);
|
||||||
|
// If both fail, continue - we'll get what we can from pod status
|
||||||
|
}
|
||||||
|
}
|
||||||
if (continueStreaming) {
|
if (continueStreaming) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -60,6 +71,11 @@ class KubernetesTaskRunner {
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
// Don't throw if we're just missing previous container logs - this is non-fatal
|
||||||
|
if (error?.message?.includes('previous terminated container')) {
|
||||||
|
CloudRunnerLogger.logWarning(`Could not fetch previous container logs, but continuing...`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
if (FollowLogStreamService.DidReceiveEndOfTransmission) {
|
if (FollowLogStreamService.DidReceiveEndOfTransmission) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue