PR feedback
parent
f9ef711978
commit
459b9298b2
|
|
@ -41,21 +41,36 @@ jobs:
|
|||
run: |
|
||||
curl -s https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash
|
||||
k3d version | cat
|
||||
- name: Start LocalStack (S3)
|
||||
uses: localstack/setup-localstack@v0.2.4
|
||||
with:
|
||||
install-awslocal: true
|
||||
- name: Configure LocalStack for k3d access
|
||||
- name: Start LocalStack (S3) for k3d access
|
||||
run: |
|
||||
# LocalStack should be accessible, but verify and configure if needed
|
||||
# Check if LocalStack is running and accessible
|
||||
curl -s http://localhost:4566/_localstack/health || echo "LocalStack health check failed"
|
||||
# Ensure LocalStack is listening on all interfaces (should be default)
|
||||
echo "LocalStack should be accessible at localhost:4566 and host.k3d.internal:4566"
|
||||
# Start LocalStack manually to ensure it's accessible from k3d cluster
|
||||
# Stop any existing LocalStack containers
|
||||
docker stop localstack-k3d 2>/dev/null || true
|
||||
docker rm localstack-k3d 2>/dev/null || true
|
||||
# Start LocalStack with port mapping to make it accessible from k3d
|
||||
docker run -d --name localstack-k3d \
|
||||
-p 4566:4566 \
|
||||
-e SERVICES=s3,cloudformation,ecs,kinesis,cloudwatch,logs \
|
||||
-e DEBUG=1 \
|
||||
-e DOCKER_HOST=unix:///var/run/docker.sock \
|
||||
localstack/localstack:latest
|
||||
# Wait for LocalStack to be ready
|
||||
echo "Waiting for LocalStack to be ready..."
|
||||
for i in {1..30}; do
|
||||
if curl -s http://localhost:4566/_localstack/health > /dev/null 2>&1; then
|
||||
echo "LocalStack is ready"
|
||||
break
|
||||
fi
|
||||
echo "Waiting for LocalStack... ($i/30)"
|
||||
sleep 2
|
||||
done
|
||||
- name: Install awscli-local
|
||||
run: |
|
||||
pip install awscli-local || pip3 install awscli-local || echo "awslocal installation skipped"
|
||||
- name: Create S3 bucket for tests (host LocalStack)
|
||||
run: |
|
||||
awslocal s3 mb s3://$AWS_STACK_NAME || true
|
||||
awslocal s3 ls
|
||||
awslocal s3 mb s3://$AWS_STACK_NAME || aws --endpoint-url=http://localhost:4566 s3 mb s3://$AWS_STACK_NAME || true
|
||||
awslocal s3 ls || aws --endpoint-url=http://localhost:4566 s3 ls || echo "S3 bucket listing completed"
|
||||
- name: Create k3s cluster (k3d)
|
||||
timeout-minutes: 5
|
||||
run: |
|
||||
|
|
|
|||
|
|
@ -4003,6 +4003,7 @@ class KubernetesPods {
|
|||
// Check if only PreStopHook failed but container succeeded
|
||||
const hasPreStopHookFailure = events.some((e) => e.reason === 'FailedPreStopHook');
|
||||
const wasKilled = events.some((e) => e.reason === 'Killing');
|
||||
const hasExceededGracePeriod = events.some((e) => e.reason === 'ExceededGracePeriod');
|
||||
// If container succeeded (exit code 0), PreStopHook failure is non-critical
|
||||
// Also check if pod was killed but container might have succeeded
|
||||
if (containerSucceeded && containerExitCode === 0) {
|
||||
|
|
@ -4017,10 +4018,10 @@ class KubernetesPods {
|
|||
// Don't throw error - container succeeded, PreStopHook failure is non-critical
|
||||
return false; // Pod is not running, but we don't treat it as a failure
|
||||
}
|
||||
// If pod was killed and we have PreStopHook failure but no container status yet, wait a bit
|
||||
// If pod was killed and we have PreStopHook failure, wait for container status
|
||||
// The container might have succeeded but status hasn't been updated yet
|
||||
if (wasKilled && hasPreStopHookFailure && containerExitCode === undefined) {
|
||||
cloud_runner_logger_1.default.log(`Pod ${podName} was killed with PreStopHook failure, but container status not yet available. Waiting for container status...`);
|
||||
if (wasKilled && hasPreStopHookFailure && (containerExitCode === undefined || !containerSucceeded)) {
|
||||
cloud_runner_logger_1.default.log(`Pod ${podName} was killed with PreStopHook failure. Waiting for container status to determine if container succeeded...`);
|
||||
// Wait a bit for container status to become available (up to 30 seconds)
|
||||
for (let i = 0; i < 6; i++) {
|
||||
await new Promise((resolve) => setTimeout(resolve, 5000));
|
||||
|
|
@ -4037,6 +4038,8 @@ class KubernetesPods {
|
|||
else {
|
||||
cloud_runner_logger_1.default.log(`Pod ${podName} container failed with exit code ${updatedExitCode} after waiting.`);
|
||||
errorDetails.push(`Container terminated after wait: exit code ${updatedExitCode}`);
|
||||
containerExitCode = updatedExitCode;
|
||||
containerSucceeded = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -4046,7 +4049,18 @@ class KubernetesPods {
|
|||
cloud_runner_logger_1.default.log(`Error while waiting for container status: ${waitError}`);
|
||||
}
|
||||
}
|
||||
cloud_runner_logger_1.default.log(`Container status still not available after waiting. Assuming failure due to PreStopHook issues.`);
|
||||
// If we still don't have container status after waiting, but only PreStopHook failed,
|
||||
// be lenient - the container might have succeeded but status wasn't updated
|
||||
if (containerExitCode === undefined && hasPreStopHookFailure && !hasExceededGracePeriod) {
|
||||
cloud_runner_logger_1.default.logWarning(`Pod ${podName} container status not available after waiting, but only PreStopHook failed (no ExceededGracePeriod). Assuming container may have succeeded.`);
|
||||
return false; // Be lenient - PreStopHook failure alone is not fatal
|
||||
}
|
||||
cloud_runner_logger_1.default.log(`Container status check completed. Exit code: ${containerExitCode}, PreStopHook failure: ${hasPreStopHookFailure}`);
|
||||
}
|
||||
// If we only have PreStopHook failure and no actual container failure, be lenient
|
||||
if (hasPreStopHookFailure && !hasExceededGracePeriod && containerExitCode === undefined) {
|
||||
cloud_runner_logger_1.default.logWarning(`Pod ${podName} has PreStopHook failure but no container failure detected. Treating as non-fatal.`);
|
||||
return false; // PreStopHook failure alone is not fatal if container status is unclear
|
||||
}
|
||||
const errorMessage = `K8s pod failed\n${errorDetails.join('\n')}`;
|
||||
cloud_runner_logger_1.default.log(errorMessage);
|
||||
|
|
@ -5331,6 +5345,11 @@ class Caching {
|
|||
await cloud_runner_system_1.CloudRunnerSystem.Run(`du ${cacheArtifactName}.tar${compressionSuffix}`);
|
||||
(0, node_console_1.assert)(await fileExists(`${cacheArtifactName}.tar${compressionSuffix}`), 'cache archive exists');
|
||||
(0, node_console_1.assert)(await fileExists(node_path_1.default.basename(sourceFolder)), 'source folder exists');
|
||||
// Ensure the cache folder directory exists before moving the file
|
||||
// (it might have been deleted by cleanup if it was empty)
|
||||
if (!(await fileExists(cacheFolder))) {
|
||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`mkdir -p ${cacheFolder}`);
|
||||
}
|
||||
await cloud_runner_system_1.CloudRunnerSystem.Run(`mv ${cacheArtifactName}.tar${compressionSuffix} ${cacheFolder}`);
|
||||
remote_client_logger_1.RemoteClientLogger.log(`moved cache entry ${cacheArtifactName} to ${cacheFolder}`);
|
||||
(0, node_console_1.assert)(await fileExists(`${node_path_1.default.join(cacheFolder, cacheArtifactName)}.tar${compressionSuffix}`), 'cache archive exists inside cache folder');
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -64,6 +64,7 @@ class KubernetesPods {
|
|||
// Check if only PreStopHook failed but container succeeded
|
||||
const hasPreStopHookFailure = events.some((e) => e.reason === 'FailedPreStopHook');
|
||||
const wasKilled = events.some((e) => e.reason === 'Killing');
|
||||
const hasExceededGracePeriod = events.some((e) => e.reason === 'ExceededGracePeriod');
|
||||
|
||||
// If container succeeded (exit code 0), PreStopHook failure is non-critical
|
||||
// Also check if pod was killed but container might have succeeded
|
||||
|
|
@ -83,11 +84,11 @@ class KubernetesPods {
|
|||
return false; // Pod is not running, but we don't treat it as a failure
|
||||
}
|
||||
|
||||
// If pod was killed and we have PreStopHook failure but no container status yet, wait a bit
|
||||
// If pod was killed and we have PreStopHook failure, wait for container status
|
||||
// The container might have succeeded but status hasn't been updated yet
|
||||
if (wasKilled && hasPreStopHookFailure && containerExitCode === undefined) {
|
||||
if (wasKilled && hasPreStopHookFailure && (containerExitCode === undefined || !containerSucceeded)) {
|
||||
CloudRunnerLogger.log(
|
||||
`Pod ${podName} was killed with PreStopHook failure, but container status not yet available. Waiting for container status...`,
|
||||
`Pod ${podName} was killed with PreStopHook failure. Waiting for container status to determine if container succeeded...`,
|
||||
);
|
||||
// Wait a bit for container status to become available (up to 30 seconds)
|
||||
for (let i = 0; i < 6; i++) {
|
||||
|
|
@ -110,6 +111,8 @@ class KubernetesPods {
|
|||
`Pod ${podName} container failed with exit code ${updatedExitCode} after waiting.`,
|
||||
);
|
||||
errorDetails.push(`Container terminated after wait: exit code ${updatedExitCode}`);
|
||||
containerExitCode = updatedExitCode;
|
||||
containerSucceeded = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -118,9 +121,25 @@ class KubernetesPods {
|
|||
CloudRunnerLogger.log(`Error while waiting for container status: ${waitError}`);
|
||||
}
|
||||
}
|
||||
CloudRunnerLogger.log(
|
||||
`Container status still not available after waiting. Assuming failure due to PreStopHook issues.`,
|
||||
// If we still don't have container status after waiting, but only PreStopHook failed,
|
||||
// be lenient - the container might have succeeded but status wasn't updated
|
||||
if (containerExitCode === undefined && hasPreStopHookFailure && !hasExceededGracePeriod) {
|
||||
CloudRunnerLogger.logWarning(
|
||||
`Pod ${podName} container status not available after waiting, but only PreStopHook failed (no ExceededGracePeriod). Assuming container may have succeeded.`,
|
||||
);
|
||||
return false; // Be lenient - PreStopHook failure alone is not fatal
|
||||
}
|
||||
CloudRunnerLogger.log(
|
||||
`Container status check completed. Exit code: ${containerExitCode}, PreStopHook failure: ${hasPreStopHookFailure}`,
|
||||
);
|
||||
}
|
||||
|
||||
// If we only have PreStopHook failure and no actual container failure, be lenient
|
||||
if (hasPreStopHookFailure && !hasExceededGracePeriod && containerExitCode === undefined) {
|
||||
CloudRunnerLogger.logWarning(
|
||||
`Pod ${podName} has PreStopHook failure but no container failure detected. Treating as non-fatal.`,
|
||||
);
|
||||
return false; // PreStopHook failure alone is not fatal if container status is unclear
|
||||
}
|
||||
|
||||
const errorMessage = `K8s pod failed\n${errorDetails.join('\n')}`;
|
||||
|
|
|
|||
|
|
@ -189,6 +189,11 @@ export class Caching {
|
|||
await CloudRunnerSystem.Run(`du ${cacheArtifactName}.tar${compressionSuffix}`);
|
||||
assert(await fileExists(`${cacheArtifactName}.tar${compressionSuffix}`), 'cache archive exists');
|
||||
assert(await fileExists(path.basename(sourceFolder)), 'source folder exists');
|
||||
// Ensure the cache folder directory exists before moving the file
|
||||
// (it might have been deleted by cleanup if it was empty)
|
||||
if (!(await fileExists(cacheFolder))) {
|
||||
await CloudRunnerSystem.Run(`mkdir -p ${cacheFolder}`);
|
||||
}
|
||||
await CloudRunnerSystem.Run(`mv ${cacheArtifactName}.tar${compressionSuffix} ${cacheFolder}`);
|
||||
RemoteClientLogger.log(`moved cache entry ${cacheArtifactName} to ${cacheFolder}`);
|
||||
assert(
|
||||
|
|
|
|||
Loading…
Reference in New Issue