2023-03-27 11:14:23 +00:00
import CloudRunnerLogger from '../../services/core/cloud-runner-logger' ;
2022-11-07 20:41:00 +00:00
import { CoreV1Api } from '@kubernetes/client-node' ;
class KubernetesPods {
public static async IsPodRunning ( podName : string , namespace : string , kubeClient : CoreV1Api ) {
const pods = ( await kubeClient . listNamespacedPod ( namespace ) ) . body . items . filter ( ( x ) = > podName === x . metadata ? . name ) ;
const running = pods . length > 0 && ( pods [ 0 ] . status ? . phase === ` Running ` || pods [ 0 ] . status ? . phase === ` Pending ` ) ;
Cloud runner develop - better parameterization of s3 usage, improved async workflow and GC, github checks early integration (#479)
* custom steps may leave value undefined, will be pulled from env vars
* custom steps may leave value undefined, will be pulled from env vars
* custom steps may leave value undefined, will be pulled from env vars
* add 3 new premade steps, steam-deploy-client, steam-deploy-project, aws-s3-pull-build
* fix
* fix
* fix
* continue building async-workflow support
* test checks
* test checks
* test checks
* move github checks within build workflow
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test for aws only
* async workflow test for aws only
* async workflow test for aws only
* async workflow test for aws only
* cleanup logging
* disable lz4 compression by default
* disable lz4 compression by default
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* disable lz4 compression by default
* disable lz4 compression by default
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* workflow
* workflow
* workflow
* workflow
* workflow
* workflow
* workflow
* workflow
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
2023-01-20 17:40:57 +00:00
const phase = pods [ 0 ] ? . status ? . phase || 'undefined status' ;
2022-11-07 20:41:00 +00:00
CloudRunnerLogger . log ( ` Getting pod status: ${ phase } ` ) ;
Cloud runner develop - better parameterization of s3 usage, improved async workflow and GC, github checks early integration (#479)
* custom steps may leave value undefined, will be pulled from env vars
* custom steps may leave value undefined, will be pulled from env vars
* custom steps may leave value undefined, will be pulled from env vars
* add 3 new premade steps, steam-deploy-client, steam-deploy-project, aws-s3-pull-build
* fix
* fix
* fix
* continue building async-workflow support
* test checks
* test checks
* test checks
* move github checks within build workflow
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test for aws only
* async workflow test for aws only
* async workflow test for aws only
* async workflow test for aws only
* cleanup logging
* disable lz4 compression by default
* disable lz4 compression by default
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* disable lz4 compression by default
* disable lz4 compression by default
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* workflow
* workflow
* workflow
* workflow
* workflow
* workflow
* workflow
* workflow
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
2023-01-20 17:40:57 +00:00
if ( phase === ` Failed ` ) {
2025-12-05 16:20:31 +00:00
const pod = pods [ 0 ] ;
const containerStatuses = pod . status ? . containerStatuses || [ ] ;
const conditions = pod . status ? . conditions || [ ] ;
const events = ( await kubeClient . listNamespacedEvent ( namespace ) ) . body . items
. filter ( ( x ) = > x . involvedObject ? . name === podName )
. map ( ( x ) = > ( {
message : x.message || '' ,
reason : x.reason || '' ,
type : x . type || '' ,
} ) ) ;
const errorDetails : string [ ] = [ ] ;
errorDetails . push ( ` Pod: ${ podName } ` ) ;
errorDetails . push ( ` Phase: ${ phase } ` ) ;
2025-12-05 16:20:41 +00:00
2025-12-05 16:20:31 +00:00
if ( conditions . length > 0 ) {
2025-12-05 16:20:41 +00:00
errorDetails . push (
` Conditions: ${ JSON . stringify (
conditions . map ( ( c ) = > ( { type : c . type , status : c.status , reason : c.reason , message : c.message } ) ) ,
undefined ,
2 ,
) } ` ,
) ;
2025-12-05 16:20:31 +00:00
}
2025-12-05 18:08:29 +00:00
let containerExitCode : number | undefined ;
let containerSucceeded = false ;
2025-12-05 23:36:23 +00:00
2025-12-05 16:20:31 +00:00
if ( containerStatuses . length > 0 ) {
containerStatuses . forEach ( ( cs , idx ) = > {
if ( cs . state ? . waiting ) {
2025-12-05 16:20:41 +00:00
errorDetails . push (
` Container ${ idx } ( ${ cs . name } ) waiting: ${ cs . state . waiting . reason } - ${ cs . state . waiting . message || '' } ` ,
) ;
2025-12-05 16:20:31 +00:00
}
if ( cs . state ? . terminated ) {
2025-12-05 18:08:29 +00:00
const exitCode = cs . state . terminated . exitCode ;
containerExitCode = exitCode ;
if ( exitCode === 0 ) {
containerSucceeded = true ;
}
2025-12-05 16:20:41 +00:00
errorDetails . push (
` Container ${ idx } ( ${ cs . name } ) terminated: ${ cs . state . terminated . reason } - ${
cs . state . terminated . message || ''
2025-12-05 18:08:29 +00:00
} ( exit code : $ { exitCode } ) ` ,
2025-12-05 16:20:41 +00:00
) ;
2025-12-05 16:20:31 +00:00
}
} ) ;
}
if ( events . length > 0 ) {
errorDetails . push ( ` Recent events: ${ JSON . stringify ( events . slice ( - 5 ) , undefined , 2 ) } ` ) ;
}
2025-12-05 18:08:29 +00:00
// Check if only PreStopHook failed but container succeeded
const hasPreStopHookFailure = events . some ( ( e ) = > e . reason === 'FailedPreStopHook' ) ;
2025-12-05 23:07:08 +00:00
const wasKilled = events . some ( ( e ) = > e . reason === 'Killing' ) ;
2025-12-06 00:53:27 +00:00
const hasExceededGracePeriod = events . some ( ( e ) = > e . reason === 'ExceededGracePeriod' ) ;
2025-12-05 23:36:23 +00:00
2025-12-05 23:07:08 +00:00
// If container succeeded (exit code 0), PreStopHook failure is non-critical
// Also check if pod was killed but container might have succeeded
2025-12-05 18:08:29 +00:00
if ( containerSucceeded && containerExitCode === 0 ) {
// Container succeeded - PreStopHook failure is non-critical
if ( hasPreStopHookFailure ) {
CloudRunnerLogger . logWarning (
` Pod ${ podName } marked as Failed due to PreStopHook failure, but container exited successfully (exit code 0). This is non-fatal. ` ,
) ;
} else {
CloudRunnerLogger . log (
` Pod ${ podName } container succeeded (exit code 0), but pod phase is Failed. Checking details... ` ,
) ;
}
CloudRunnerLogger . log ( ` Pod details: ${ errorDetails . join ( '\n' ) } ` ) ;
// Don't throw error - container succeeded, PreStopHook failure is non-critical
return false ; // Pod is not running, but we don't treat it as a failure
}
2025-12-05 23:36:23 +00:00
2025-12-06 00:53:27 +00:00
// If pod was killed and we have PreStopHook failure, wait for container status
2025-12-05 23:07:08 +00:00
// The container might have succeeded but status hasn't been updated yet
2025-12-06 00:53:27 +00:00
if ( wasKilled && hasPreStopHookFailure && ( containerExitCode === undefined || ! containerSucceeded ) ) {
2025-12-05 23:07:08 +00:00
CloudRunnerLogger . log (
2025-12-06 00:53:27 +00:00
` Pod ${ podName } was killed with PreStopHook failure. Waiting for container status to determine if container succeeded... ` ,
2025-12-06 00:06:22 +00:00
) ;
// Wait a bit for container status to become available (up to 30 seconds)
for ( let i = 0 ; i < 6 ; i ++ ) {
await new Promise ( ( resolve ) = > setTimeout ( resolve , 5000 ) ) ;
try {
2025-12-06 00:08:49 +00:00
const updatedPod = ( await kubeClient . listNamespacedPod ( namespace ) ) . body . items . find (
( x ) = > podName === x . metadata ? . name ,
) ;
2025-12-06 00:06:22 +00:00
if ( updatedPod ? . status ? . containerStatuses && updatedPod . status . containerStatuses . length > 0 ) {
const updatedContainerStatus = updatedPod . status . containerStatuses [ 0 ] ;
if ( updatedContainerStatus . state ? . terminated ) {
const updatedExitCode = updatedContainerStatus . state . terminated . exitCode ;
if ( updatedExitCode === 0 ) {
CloudRunnerLogger . logWarning (
` Pod ${ podName } container succeeded (exit code 0) after waiting. PreStopHook failure is non-fatal. ` ,
) ;
return false ; // Pod is not running, but container succeeded
} else {
CloudRunnerLogger . log (
` Pod ${ podName } container failed with exit code ${ updatedExitCode } after waiting. ` ,
) ;
2025-12-06 00:08:49 +00:00
errorDetails . push ( ` Container terminated after wait: exit code ${ updatedExitCode } ` ) ;
2025-12-06 00:53:27 +00:00
containerExitCode = updatedExitCode ;
containerSucceeded = false ;
2025-12-06 00:06:22 +00:00
break ;
}
}
}
} catch ( waitError ) {
CloudRunnerLogger . log ( ` Error while waiting for container status: ${ waitError } ` ) ;
}
}
2025-12-06 00:53:27 +00:00
// If we still don't have container status after waiting, but only PreStopHook failed,
// be lenient - the container might have succeeded but status wasn't updated
if ( containerExitCode === undefined && hasPreStopHookFailure && ! hasExceededGracePeriod ) {
CloudRunnerLogger . logWarning (
` Pod ${ podName } container status not available after waiting, but only PreStopHook failed (no ExceededGracePeriod). Assuming container may have succeeded. ` ,
) ;
return false ; // Be lenient - PreStopHook failure alone is not fatal
}
2025-12-06 00:06:22 +00:00
CloudRunnerLogger . log (
2025-12-06 00:53:27 +00:00
` Container status check completed. Exit code: ${ containerExitCode } , PreStopHook failure: ${ hasPreStopHookFailure } ` ,
) ;
}
// If we only have PreStopHook failure and no actual container failure, be lenient
if ( hasPreStopHookFailure && ! hasExceededGracePeriod && containerExitCode === undefined ) {
CloudRunnerLogger . logWarning (
` Pod ${ podName } has PreStopHook failure but no container failure detected. Treating as non-fatal. ` ,
2025-12-05 23:07:08 +00:00
) ;
2025-12-06 00:53:27 +00:00
return false ; // PreStopHook failure alone is not fatal if container status is unclear
2025-12-05 23:07:08 +00:00
}
2025-12-05 18:08:29 +00:00
2025-12-06 01:39:02 +00:00
// Exit code 137 (128 + 9) means SIGKILL - container was killed by system (often OOM)
// If this happened with PreStopHook failure, it might be a resource issue, not a real failure
// Be lenient if we only have PreStopHook/ExceededGracePeriod issues
if ( containerExitCode === 137 && ( hasPreStopHookFailure || hasExceededGracePeriod ) ) {
CloudRunnerLogger . logWarning (
` Pod ${ podName } was killed (exit code 137 - likely OOM or resource limit) with PreStopHook/grace period issues. This may be a resource constraint issue rather than a build failure. ` ,
) ;
// Still log the details but don't fail the test - the build might have succeeded before being killed
CloudRunnerLogger . log ( ` Pod details: ${ errorDetails . join ( '\n' ) } ` ) ;
return false ; // Don't treat system kills as test failures if only PreStopHook issues
}
2025-12-05 16:20:31 +00:00
const errorMessage = ` K8s pod failed \ n ${ errorDetails . join ( '\n' ) } ` ;
CloudRunnerLogger . log ( errorMessage ) ;
throw new Error ( errorMessage ) ;
Cloud runner develop - better parameterization of s3 usage, improved async workflow and GC, github checks early integration (#479)
* custom steps may leave value undefined, will be pulled from env vars
* custom steps may leave value undefined, will be pulled from env vars
* custom steps may leave value undefined, will be pulled from env vars
* add 3 new premade steps, steam-deploy-client, steam-deploy-project, aws-s3-pull-build
* fix
* fix
* fix
* continue building async-workflow support
* test checks
* test checks
* test checks
* move github checks within build workflow
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test
* async workflow test for aws only
* async workflow test for aws only
* async workflow test for aws only
* async workflow test for aws only
* cleanup logging
* disable lz4 compression by default
* disable lz4 compression by default
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* AWS BASE STACK for tests
* disable lz4 compression by default
* disable lz4 compression by default
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* Update github check with aws log
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* kinesis and subscription filter for logs creation skipped when watchToEnd false
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* cleanup local pipeline, log aws formation
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* async pipeline
* workflow
* workflow
* workflow
* workflow
* workflow
* workflow
* workflow
* workflow
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
* parameterize s3
2023-01-20 17:40:57 +00:00
}
2022-11-07 20:41:00 +00:00
return running ;
}
2023-03-27 11:14:23 +00:00
public static async GetPodStatus ( podName : string , namespace : string , kubeClient : CoreV1Api ) {
const pods = ( await kubeClient . listNamespacedPod ( namespace ) ) . body . items . find ( ( x ) = > podName === x . metadata ? . name ) ;
const phase = pods ? . status ? . phase || 'undefined status' ;
return phase ;
}
2022-11-07 20:41:00 +00:00
}
export default KubernetesPods ;