2022-02-01 02:31:20 +00:00
import * as k8s from '@kubernetes/client-node' ;
2022-11-07 20:41:00 +00:00
import { BuildParameters } from '../../..' ;
2022-02-01 02:31:20 +00:00
import * as core from '@actions/core' ;
Cloud Runner v0 - Reliable and trimmed down cloud runner (#353)
* Update cloud-runner-aws-pipeline.yml
* Update cloud-runner-k8s-pipeline.yml
* yarn build
* yarn build
* correct branch ref
* correct branch ref passed to target repo
* Create k8s-tests.yml
* Delete k8s-tests.yml
* correct branch ref passed to target repo
* correct branch ref passed to target repo
* Always describe AWS tasks for now, because unstable error handling
* Remove unused tree commands
* Use lfs guid sum
* Simple override cache push
* Simple override cache push and pull override to allow pure cloud storage driven caching
* Removal of early branch (breaks lfs caching)
* Remove unused tree commands
* Update action.yml
* Update action.yml
* Support cache and input override commands as input + full support custom hooks
* Increase k8s timeout
* replace filename being appended for unknclear reason
* cache key should not contain whitespaces
* Always try and deploy rook for k8s
* Apply k8s files for rook
* Update action.yml
* Apply k8s files for rook
* Apply k8s files for rook
* cache test and action description for kuber storage class
* Correct test and implement dependency health check and start
* GCP-secret run, cache key
* lfs smudge set explicit and undo explicit
* Run using external secret provider to speed up input
* Update cloud-runner-aws-pipeline.yml
* Add nodejs as build step dependency
* Add nodejs as build step dependency
* Cloud Runner Tests must be specified to capture logs from cloud runner for tests
* Cloud Runner Tests must be specified to capture logs from cloud runner for tests
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* better defaults for new inputs
* better defaults
* merge latest
* force build update
* use npm n to update node in unity builder
* use npm n to update node in unity builder
* use npm n to update node in unity builder
* correct new line
* quiet zipping
* quiet zipping
* default secrets for unity username and password
* default secrets for unity username and password
* ls active directory before lfs install
* Get cloud runner secrets from
* Get cloud runner secrets from
* Cleanup setup of default secrets
* Various fixes
* Cleanup setup of default secrets
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* AWS secrets manager support
* less caching logs
* default k8s storage class to pd-standard
* more readable build commands
* Capture aws exit code 1 reliably
* Always replace /head from branch
* k8s default storage class to standard-rwo
* cleanup
* further cleanup input
* further cleanup input
* further cleanup input
* further cleanup input
* further cleanup input
* folder sizes to inspect caching
* dir command for local cloud runner test
* k8s wait for pending because pvc will not create earlier
* prefer k8s standard storage
* handle empty string as cloud runner cluster input
* local-system is now used for cloud runner test implementation AND correctly unset test CLI input
* local-system is now used for cloud runner test implementation AND correctly unset test CLI input
* fix unterminated quote
* fix unterminated quote
* do not share build parameters in tests - in cloud runner this will cause conflicts with resouces of the same name
* remove head and heads from branch prefix
* fix reversed caching direction of cache-push
* fixes
* fixes
* fixes
* cachePull cli
* fixes
* fixes
* fixes
* fixes
* fixes
* order cache test to be first
* order cache test to be first
* fixes
* populate cache key instead of using branch
* cleanup cli
* garbage-collect-aws cli can iterate over aws resources and cli scans all ts files
* import cli methods
* import cli files explicitly
* import cli files explicitly
* import cli files explicitly
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* log parameters in cloud runner parameter test
* log parameters in cloud runner parameter test
* log parameters in cloud runner parameter test
* Cloud runner param test before caching because we have a fast local cache test now
* Using custom build path relative to repo root rather than project root
* aws-garbage-collect at end of pipeline
* aws-garbage-collect do not actually delete anything for now - just list
* remove some legacy du commands
* Update cloud-runner-aws-pipeline.yml
* log contents after cache pull and fix some scenarios with duplicate secrets
* log contents after cache pull and fix some scenarios with duplicate secrets
* log contents after cache pull and fix some scenarios with duplicate secrets
* PR comments
* Replace guid with uuid package
* use fileExists lambda instead of stat to check file exists in caching
* build failed results in core error message
* Delete sample.txt
2022-04-10 23:00:37 +00:00
import { ProviderInterface } from '../provider-interface' ;
2023-03-27 11:14:23 +00:00
import CloudRunnerSecret from '../../options/cloud-runner-secret' ;
2022-02-01 02:31:20 +00:00
import KubernetesStorage from './kubernetes-storage' ;
2023-03-27 11:14:23 +00:00
import CloudRunnerEnvironmentVariable from '../../options/cloud-runner-environment-variable' ;
2022-02-01 02:31:20 +00:00
import KubernetesTaskRunner from './kubernetes-task-runner' ;
import KubernetesSecret from './kubernetes-secret' ;
import KubernetesJobSpecFactory from './kubernetes-job-spec-factory' ;
import KubernetesServiceAccount from './kubernetes-service-account' ;
2023-03-27 11:14:23 +00:00
import CloudRunnerLogger from '../../services/core/cloud-runner-logger' ;
2022-02-01 02:31:20 +00:00
import { CoreV1Api } from '@kubernetes/client-node' ;
2022-11-07 20:41:00 +00:00
import CloudRunner from '../../cloud-runner' ;
import { ProviderResource } from '../provider-resource' ;
import { ProviderWorkflow } from '../provider-workflow' ;
2024-02-06 23:46:31 +00:00
import { RemoteClientLogger } from '../../remote-client/remote-client-logger' ;
import { KubernetesRole } from './kubernetes-role' ;
import { CloudRunnerSystem } from '../../services/core/cloud-runner-system' ;
2022-02-01 02:31:20 +00:00
Cloud Runner v0 - Reliable and trimmed down cloud runner (#353)
* Update cloud-runner-aws-pipeline.yml
* Update cloud-runner-k8s-pipeline.yml
* yarn build
* yarn build
* correct branch ref
* correct branch ref passed to target repo
* Create k8s-tests.yml
* Delete k8s-tests.yml
* correct branch ref passed to target repo
* correct branch ref passed to target repo
* Always describe AWS tasks for now, because unstable error handling
* Remove unused tree commands
* Use lfs guid sum
* Simple override cache push
* Simple override cache push and pull override to allow pure cloud storage driven caching
* Removal of early branch (breaks lfs caching)
* Remove unused tree commands
* Update action.yml
* Update action.yml
* Support cache and input override commands as input + full support custom hooks
* Increase k8s timeout
* replace filename being appended for unknclear reason
* cache key should not contain whitespaces
* Always try and deploy rook for k8s
* Apply k8s files for rook
* Update action.yml
* Apply k8s files for rook
* Apply k8s files for rook
* cache test and action description for kuber storage class
* Correct test and implement dependency health check and start
* GCP-secret run, cache key
* lfs smudge set explicit and undo explicit
* Run using external secret provider to speed up input
* Update cloud-runner-aws-pipeline.yml
* Add nodejs as build step dependency
* Add nodejs as build step dependency
* Cloud Runner Tests must be specified to capture logs from cloud runner for tests
* Cloud Runner Tests must be specified to capture logs from cloud runner for tests
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* better defaults for new inputs
* better defaults
* merge latest
* force build update
* use npm n to update node in unity builder
* use npm n to update node in unity builder
* use npm n to update node in unity builder
* correct new line
* quiet zipping
* quiet zipping
* default secrets for unity username and password
* default secrets for unity username and password
* ls active directory before lfs install
* Get cloud runner secrets from
* Get cloud runner secrets from
* Cleanup setup of default secrets
* Various fixes
* Cleanup setup of default secrets
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* AWS secrets manager support
* less caching logs
* default k8s storage class to pd-standard
* more readable build commands
* Capture aws exit code 1 reliably
* Always replace /head from branch
* k8s default storage class to standard-rwo
* cleanup
* further cleanup input
* further cleanup input
* further cleanup input
* further cleanup input
* further cleanup input
* folder sizes to inspect caching
* dir command for local cloud runner test
* k8s wait for pending because pvc will not create earlier
* prefer k8s standard storage
* handle empty string as cloud runner cluster input
* local-system is now used for cloud runner test implementation AND correctly unset test CLI input
* local-system is now used for cloud runner test implementation AND correctly unset test CLI input
* fix unterminated quote
* fix unterminated quote
* do not share build parameters in tests - in cloud runner this will cause conflicts with resouces of the same name
* remove head and heads from branch prefix
* fix reversed caching direction of cache-push
* fixes
* fixes
* fixes
* cachePull cli
* fixes
* fixes
* fixes
* fixes
* fixes
* order cache test to be first
* order cache test to be first
* fixes
* populate cache key instead of using branch
* cleanup cli
* garbage-collect-aws cli can iterate over aws resources and cli scans all ts files
* import cli methods
* import cli files explicitly
* import cli files explicitly
* import cli files explicitly
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* log parameters in cloud runner parameter test
* log parameters in cloud runner parameter test
* log parameters in cloud runner parameter test
* Cloud runner param test before caching because we have a fast local cache test now
* Using custom build path relative to repo root rather than project root
* aws-garbage-collect at end of pipeline
* aws-garbage-collect do not actually delete anything for now - just list
* remove some legacy du commands
* Update cloud-runner-aws-pipeline.yml
* log contents after cache pull and fix some scenarios with duplicate secrets
* log contents after cache pull and fix some scenarios with duplicate secrets
* log contents after cache pull and fix some scenarios with duplicate secrets
* PR comments
* Replace guid with uuid package
* use fileExists lambda instead of stat to check file exists in caching
* build failed results in core error message
* Delete sample.txt
2022-04-10 23:00:37 +00:00
class Kubernetes implements ProviderInterface {
2022-11-07 20:41:00 +00:00
public static Instance : Kubernetes ;
public kubeConfig ! : k8s . KubeConfig ;
public kubeClient ! : k8s . CoreV1Api ;
2024-02-06 23:46:31 +00:00
public kubeClientApps ! : k8s . AppsV1Api ;
2022-11-07 20:41:00 +00:00
public kubeClientBatch ! : k8s . BatchV1Api ;
2024-02-06 23:46:31 +00:00
public rbacAuthorizationV1Api ! : k8s . RbacAuthorizationV1Api ;
2022-11-07 20:41:00 +00:00
public buildGuid : string = '' ;
public buildParameters ! : BuildParameters ;
public pvcName : string = '' ;
public secretName : string = '' ;
public jobName : string = '' ;
public namespace ! : string ;
public podName : string = '' ;
public containerName : string = '' ;
public cleanupCronJobName : string = '' ;
public serviceAccountName : string = '' ;
2024-02-06 23:46:31 +00:00
public ip : string = '' ;
2022-02-01 02:31:20 +00:00
constructor ( buildParameters : BuildParameters ) {
2022-11-07 20:41:00 +00:00
Kubernetes . Instance = this ;
2022-02-01 02:31:20 +00:00
this . kubeConfig = new k8s . KubeConfig ( ) ;
this . kubeConfig . loadFromDefault ( ) ;
this . kubeClient = this . kubeConfig . makeApiClient ( k8s . CoreV1Api ) ;
2024-02-06 23:46:31 +00:00
this . kubeClientApps = this . kubeConfig . makeApiClient ( k8s . AppsV1Api ) ;
2022-02-01 02:31:20 +00:00
this . kubeClientBatch = this . kubeConfig . makeApiClient ( k8s . BatchV1Api ) ;
2024-02-06 23:46:31 +00:00
this . rbacAuthorizationV1Api = this . kubeConfig . makeApiClient ( k8s . RbacAuthorizationV1Api ) ;
2025-12-04 22:47:45 +00:00
this . namespace = buildParameters . containerNamespace ? buildParameters . containerNamespace : 'default' ;
2022-02-01 02:31:20 +00:00
CloudRunnerLogger . log ( 'Loaded default Kubernetes configuration for this environment' ) ;
2022-11-07 20:41:00 +00:00
}
2022-02-01 02:31:20 +00:00
2024-02-06 23:46:31 +00:00
async PushLogUpdate ( logs : string ) {
// push logs to nginx file server via 'LOG_SERVICE_IP' env var
const ip = process . env [ ` LOG_SERVICE_IP ` ] ;
if ( ip === undefined ) {
RemoteClientLogger . logWarning ( ` LOG_SERVICE_IP not set, skipping log push ` ) ;
return ;
}
const url = ` http:// ${ ip } /api/log ` ;
RemoteClientLogger . log ( ` Pushing logs to ${ url } ` ) ;
// logs to base64
logs = Buffer . from ( logs ) . toString ( 'base64' ) ;
const response = await CloudRunnerSystem . Run ( ` curl -X POST -d " ${ logs } " ${ url } ` , false , true ) ;
RemoteClientLogger . log ( ` Pushed logs to ${ url } ${ response } ` ) ;
}
2022-11-07 20:41:00 +00:00
async listResources ( ) : Promise < ProviderResource [ ] > {
const pods = await this . kubeClient . listNamespacedPod ( this . namespace ) ;
const serviceAccounts = await this . kubeClient . listNamespacedServiceAccount ( this . namespace ) ;
const secrets = await this . kubeClient . listNamespacedSecret ( this . namespace ) ;
const jobs = await this . kubeClientBatch . listNamespacedJob ( this . namespace ) ;
return [
. . . pods . body . items . map ( ( x ) = > {
return { Name : x.metadata?.name || ` ` } ;
} ) ,
. . . serviceAccounts . body . items . map ( ( x ) = > {
return { Name : x.metadata?.name || ` ` } ;
} ) ,
. . . secrets . body . items . map ( ( x ) = > {
return { Name : x.metadata?.name || ` ` } ;
} ) ,
. . . jobs . body . items . map ( ( x ) = > {
return { Name : x.metadata?.name || ` ` } ;
} ) ,
] ;
}
listWorkflow ( ) : Promise < ProviderWorkflow [ ] > {
throw new Error ( 'Method not implemented.' ) ;
}
watchWorkflow ( ) : Promise < string > {
throw new Error ( 'Method not implemented.' ) ;
}
garbageCollect (
// eslint-disable-next-line no-unused-vars
filter : string ,
// eslint-disable-next-line no-unused-vars
previewOnly : boolean ,
// eslint-disable-next-line no-unused-vars
olderThan : Number ,
// eslint-disable-next-line no-unused-vars
fullCache : boolean ,
// eslint-disable-next-line no-unused-vars
baseDependencies : boolean ,
) : Promise < string > {
return new Promise ( ( result ) = > result ( ` ` ) ) ;
2022-02-01 02:31:20 +00:00
}
2022-11-07 20:41:00 +00:00
public async setupWorkflow (
2022-02-01 02:31:20 +00:00
buildGuid : string ,
buildParameters : BuildParameters ,
// eslint-disable-next-line no-unused-vars
branchName : string ,
// eslint-disable-next-line no-unused-vars
defaultSecretsArray : { ParameterKey : string ; EnvironmentVariable : string ; ParameterValue : string } [ ] ,
) {
try {
2022-11-07 20:41:00 +00:00
this . buildParameters = buildParameters ;
2023-03-27 11:14:23 +00:00
this . cleanupCronJobName = ` unity-builder-cronjob- ${ buildParameters . buildGuid } ` ;
2022-11-07 20:41:00 +00:00
this . serviceAccountName = ` service-account- ${ buildParameters . buildGuid } ` ;
2022-02-01 02:31:20 +00:00
await KubernetesServiceAccount . createServiceAccount ( this . serviceAccountName , this . namespace , this . kubeClient ) ;
} catch ( error ) {
throw error ;
}
}
2022-11-07 20:41:00 +00:00
async runTaskInWorkflow (
2022-02-01 02:31:20 +00:00
buildGuid : string ,
image : string ,
commands : string ,
mountdir : string ,
workingdir : string ,
environment : CloudRunnerEnvironmentVariable [ ] ,
secrets : CloudRunnerSecret [ ] ,
) : Promise < string > {
try {
2022-11-07 20:41:00 +00:00
CloudRunnerLogger . log ( 'Cloud Runner K8s workflow!' ) ;
2022-04-11 22:43:41 +00:00
// Setup
2024-02-06 23:46:31 +00:00
const id =
BuildParameters && BuildParameters . shouldUseRetainedWorkspaceMode ( this . buildParameters )
? CloudRunner . lockedWorkspace
: this . buildParameters . buildGuid ;
2023-03-27 11:14:23 +00:00
this . pvcName = ` unity-builder-pvc- ${ id } ` ;
await KubernetesStorage . createPersistentVolumeClaim (
this . buildParameters ,
this . pvcName ,
this . kubeClient ,
this . namespace ,
) ;
2022-02-01 02:31:20 +00:00
this . buildGuid = buildGuid ;
2022-11-07 20:41:00 +00:00
this . secretName = ` build-credentials- ${ this . buildGuid } ` ;
this . jobName = ` unity-builder-job- ${ this . buildGuid } ` ;
2022-02-01 02:31:20 +00:00
this . containerName = ` main ` ;
await KubernetesSecret . createSecret ( secrets , this . secretName , this . namespace , this . kubeClient ) ;
2026-01-17 05:48:22 +00:00
2025-12-29 18:50:36 +00:00
// For tests, clean up old images before creating job to free space for image pull
2026-01-17 04:52:35 +00:00
// IMPORTANT: Preserve the Unity image to avoid re-pulling it
2025-12-29 18:50:36 +00:00
if ( process . env [ 'cloudRunnerTests' ] === 'true' ) {
try {
CloudRunnerLogger . log ( 'Cleaning up old images in k3d node before pulling new image...' ) ;
const { CloudRunnerSystem } = await import ( '../../services/core/cloud-runner-system' ) ;
2026-01-17 05:48:22 +00:00
2026-01-17 04:52:35 +00:00
// Extract image name without tag for matching
const imageName = image . split ( ':' ) [ 0 ] ;
const imageTag = image . split ( ':' ) [ 1 ] || 'latest' ;
2026-01-17 05:48:22 +00:00
2026-01-17 16:32:54 +00:00
// More targeted cleanup: remove stopped containers only
// IMPORTANT: Do NOT remove images - preserve Unity image to avoid re-pulling the 3.9GB image
// Strategy: Only remove containers, never touch images (safest approach)
2026-01-17 03:52:38 +00:00
const cleanupCommands = [
2026-01-17 04:52:35 +00:00
// Remove all stopped containers (this frees runtime space but keeps images)
2026-01-17 03:52:38 +00:00
'docker exec k3d-unity-builder-agent-0 sh -c "crictl rm --all 2>/dev/null || true" || true' ,
2026-01-17 04:52:35 +00:00
'docker exec k3d-unity-builder-server-0 sh -c "crictl rm --all 2>/dev/null || true" || true' ,
2026-01-17 16:32:54 +00:00
// DO NOT remove images - preserve everything including Unity image
// Removing images risks removing the Unity image which causes "no space left" errors
2026-01-17 03:52:38 +00:00
] ;
2026-01-17 05:48:22 +00:00
2026-01-17 03:52:38 +00:00
for ( const cmd of cleanupCommands ) {
try {
await CloudRunnerSystem . Run ( cmd , true , true ) ;
} catch ( cmdError ) {
// Ignore individual command failures
CloudRunnerLogger . log ( ` Cleanup command failed (non-fatal): ${ cmdError } ` ) ;
}
}
2025-12-29 18:50:36 +00:00
} catch ( cleanupError ) {
CloudRunnerLogger . logWarning ( ` Failed to cleanup images before job creation: ${ cleanupError } ` ) ;
// Continue anyway - image might already be cached
}
}
2026-01-17 05:48:22 +00:00
2022-02-01 02:31:20 +00:00
let output = '' ;
2023-03-27 11:14:23 +00:00
try {
2026-01-17 22:45:39 +00:00
// Before creating the job, verify we have the Unity image cached on the agent node
// If not cached, try to ensure it's available to avoid disk pressure during pull
2026-01-17 19:45:47 +00:00
if ( process . env [ 'cloudRunnerTests' ] === 'true' && image . includes ( 'unityci/editor' ) ) {
try {
const { CloudRunnerSystem } = await import ( '../../services/core/cloud-runner-system' ) ;
2026-01-17 22:45:39 +00:00
// Check if image is cached on agent node (where pods run)
const agentImageCheck = await CloudRunnerSystem . Run (
2026-01-17 19:45:47 +00:00
` docker exec k3d-unity-builder-agent-0 sh -c "crictl images | grep -q unityci/editor && echo 'cached' || echo 'not_cached'" || echo 'not_cached' ` ,
true ,
true ,
) ;
2026-01-17 22:45:39 +00:00
if ( agentImageCheck . includes ( 'not_cached' ) ) {
// Check if image is on server node
const serverImageCheck = await CloudRunnerSystem . Run (
` docker exec k3d-unity-builder-server-0 sh -c "crictl images | grep -q unityci/editor && echo 'cached' || echo 'not_cached'" || echo 'not_cached' ` ,
true ,
true ,
) ;
// Check available disk space on agent node
2026-01-17 19:45:47 +00:00
const diskInfo = await CloudRunnerSystem . Run (
'docker exec k3d-unity-builder-agent-0 sh -c "df -h /var/lib/rancher/k3s 2>/dev/null | tail -1 || df -h / 2>/dev/null | tail -1 || echo unknown" || echo unknown' ,
true ,
true ,
) ;
2026-01-17 22:45:39 +00:00
2026-01-17 19:45:47 +00:00
CloudRunnerLogger . logWarning (
2026-01-17 22:45:39 +00:00
` Unity image not cached on agent node (where pods run). Server node: ${ serverImageCheck . includes ( 'cached' ) ? 'has image' : 'no image' } . Disk info: ${ diskInfo . trim ( ) } . Pod will attempt to pull image (3.9GB) which may fail due to disk pressure. ` ,
2026-01-17 19:45:47 +00:00
) ;
2026-01-17 22:45:39 +00:00
2026-01-19 04:46:23 +00:00
// If image is on server but not agent, log a warning
// NOTE: We don't attempt to pull here because:
// 1. Pulling a 3.9GB image can take several minutes and block the test
// 2. If there's not enough disk space, the pull will hang indefinitely
// 3. The pod will attempt to pull during scheduling anyway
// 4. If the pull fails, Kubernetes will provide proper error messages
2026-01-17 22:45:39 +00:00
if ( serverImageCheck . includes ( 'cached' ) ) {
2026-01-19 04:46:23 +00:00
CloudRunnerLogger . logWarning (
'Unity image exists on server node but not agent node. Pod will attempt to pull during scheduling. If pull fails due to disk pressure, ensure cleanup runs before this test.' ,
2026-01-17 22:45:39 +00:00
) ;
} else {
// Image not on either node - check if we have enough space to pull
// Extract available space from disk info
const availableSpaceMatch = diskInfo . match ( /(\d+(?:\.\d+)?)\s*([GMK]?i?B)/i ) ;
if ( availableSpaceMatch ) {
const availableValue = parseFloat ( availableSpaceMatch [ 1 ] ) ;
const availableUnit = availableSpaceMatch [ 2 ] . toUpperCase ( ) ;
let availableGB = availableValue ;
if ( availableUnit . includes ( 'M' ) ) {
availableGB = availableValue / 1024 ;
} else if ( availableUnit . includes ( 'K' ) ) {
availableGB = availableValue / ( 1024 * 1024 ) ;
}
// Unity image is ~3.9GB, need at least 4.5GB to be safe
if ( availableGB < 4.5 ) {
CloudRunnerLogger . logWarning (
` CRITICAL: Unity image not cached and only ${ availableGB . toFixed ( 2 ) } GB available. Image pull (3.9GB) will likely fail. Consider running cleanup or ensuring pre-pull step succeeds. ` ,
) ;
}
}
}
2026-01-17 19:45:47 +00:00
} else {
CloudRunnerLogger . log ( 'Unity image is cached on agent node - pod should start without pulling' ) ;
}
} catch ( checkError ) {
// Ignore check errors - continue with job creation
CloudRunnerLogger . logWarning ( ` Failed to verify Unity image cache: ${ checkError } ` ) ;
}
}
2023-03-27 11:14:23 +00:00
CloudRunnerLogger . log ( 'Job does not exist' ) ;
await this . createJob ( commands , image , mountdir , workingdir , environment , secrets ) ;
CloudRunnerLogger . log ( 'Watching pod until running' ) ;
await KubernetesTaskRunner . watchUntilPodRunning ( this . kubeClient , this . podName , this . namespace ) ;
2022-11-07 20:41:00 +00:00
2024-02-06 23:46:31 +00:00
CloudRunnerLogger . log ( 'Pod is running' ) ;
2023-03-27 11:14:23 +00:00
output += await KubernetesTaskRunner . runTask (
this . kubeConfig ,
this . kubeClient ,
this . jobName ,
this . podName ,
this . containerName ,
this . namespace ,
) ;
} catch ( error : any ) {
CloudRunnerLogger . log ( ` error running k8s workflow ${ error } ` ) ;
await new Promise ( ( resolve ) = > setTimeout ( resolve , 3000 ) ) ;
CloudRunnerLogger . log (
JSON . stringify (
( await this . kubeClient . listNamespacedEvent ( this . namespace ) ) . body . items
. map ( ( x ) = > {
return {
message : x.message || ` ` ,
name : x.metadata.name || ` ` ,
reason : x.reason || ` ` ,
} ;
} )
. filter ( ( x ) = > x . name . includes ( this . podName ) ) ,
undefined ,
4 ,
) ,
) ;
await this . cleanupTaskResources ( ) ;
throw error ;
2022-02-01 02:31:20 +00:00
}
2022-04-11 22:43:41 +00:00
2023-03-27 11:14:23 +00:00
await this . cleanupTaskResources ( ) ;
2022-02-01 02:31:20 +00:00
return output ;
} catch ( error ) {
CloudRunnerLogger . log ( 'Running job failed' ) ;
core . error ( JSON . stringify ( error , undefined , 4 ) ) ;
2023-03-27 11:14:23 +00:00
// await this.cleanupTaskResources();
2022-02-01 02:31:20 +00:00
throw error ;
}
}
2023-03-27 11:14:23 +00:00
private async createJob (
commands : string ,
image : string ,
mountdir : string ,
workingdir : string ,
environment : CloudRunnerEnvironmentVariable [ ] ,
secrets : CloudRunnerSecret [ ] ,
) {
await this . createNamespacedJob ( commands , image , mountdir , workingdir , environment , secrets ) ;
const find = await Kubernetes . findPodFromJob ( this . kubeClient , this . jobName , this . namespace ) ;
this . setPodNameAndContainerName ( find ) ;
}
private async doesJobExist ( name : string ) {
const jobs = await this . kubeClientBatch . listNamespacedJob ( this . namespace ) ;
return jobs . body . items . some ( ( x ) = > x . metadata ? . name === name ) ;
}
private async doesFailedJobExist() {
const podStatus = await this . kubeClient . readNamespacedPodStatus ( this . podName , this . namespace ) ;
return podStatus . body . status ? . phase === ` Failed ` ;
}
2022-11-07 20:41:00 +00:00
private async createNamespacedJob (
commands : string ,
image : string ,
mountdir : string ,
workingdir : string ,
environment : CloudRunnerEnvironmentVariable [ ] ,
secrets : CloudRunnerSecret [ ] ,
) {
for ( let index = 0 ; index < 3 ; index ++ ) {
try {
const jobSpec = KubernetesJobSpecFactory . getJobSpec (
commands ,
image ,
mountdir ,
workingdir ,
environment ,
secrets ,
this . buildGuid ,
this . buildParameters ,
this . secretName ,
this . pvcName ,
this . jobName ,
k8s ,
2023-03-27 11:14:23 +00:00
this . containerName ,
2024-02-06 23:46:31 +00:00
this . ip ,
2022-11-07 20:41:00 +00:00
) ;
await new Promise ( ( promise ) = > setTimeout ( promise , 15000 ) ) ;
2024-02-06 23:46:31 +00:00
// await KubernetesRole.createRole(this.serviceAccountName, this.namespace, this.rbacAuthorizationV1Api);
2023-03-27 11:14:23 +00:00
const result = await this . kubeClientBatch . createNamespacedJob ( this . namespace , jobSpec ) ;
2022-11-07 20:41:00 +00:00
CloudRunnerLogger . log ( ` Build job created ` ) ;
await new Promise ( ( promise ) = > setTimeout ( promise , 5000 ) ) ;
CloudRunnerLogger . log ( 'Job created' ) ;
2023-03-27 11:14:23 +00:00
return result . body . metadata ? . name ;
2022-11-07 20:41:00 +00:00
} catch ( error ) {
CloudRunnerLogger . log ( ` Error occured creating job: ${ error } ` ) ;
throw error ;
}
}
}
2022-02-01 02:31:20 +00:00
setPodNameAndContainerName ( pod : k8s.V1Pod ) {
this . podName = pod . metadata ? . name || '' ;
2023-03-27 11:14:23 +00:00
this . containerName = pod . status ? . containerStatuses ? . [ 0 ] . name || this . containerName ;
2022-02-01 02:31:20 +00:00
}
async cleanupTaskResources() {
CloudRunnerLogger . log ( 'cleaning up' ) ;
try {
await this . kubeClientBatch . deleteNamespacedJob ( this . jobName , this . namespace ) ;
await this . kubeClient . deleteNamespacedPod ( this . podName , this . namespace ) ;
2024-02-06 23:46:31 +00:00
await KubernetesRole . deleteRole ( this . serviceAccountName , this . namespace , this . rbacAuthorizationV1Api ) ;
2022-11-07 20:41:00 +00:00
} catch ( error : any ) {
CloudRunnerLogger . log ( ` Failed to cleanup ` ) ;
if ( error . response . body . reason !== ` NotFound ` ) {
CloudRunnerLogger . log ( ` Wasn't a not found error: ${ error . response . body . reason } ` ) ;
throw error ;
}
2022-02-01 02:31:20 +00:00
}
try {
2022-11-07 20:41:00 +00:00
await this . kubeClient . deleteNamespacedSecret ( this . secretName , this . namespace ) ;
} catch ( error : any ) {
CloudRunnerLogger . log ( ` Failed to cleanup secret ` ) ;
CloudRunnerLogger . log ( error . response . body . reason ) ;
}
CloudRunnerLogger . log ( 'cleaned up Secret, Job and Pod' ) ;
CloudRunnerLogger . log ( 'cleaning up finished' ) ;
2022-02-01 02:31:20 +00:00
}
2022-11-07 20:41:00 +00:00
async cleanupWorkflow (
2022-02-01 02:31:20 +00:00
buildParameters : BuildParameters ,
// eslint-disable-next-line no-unused-vars
branchName : string ,
// eslint-disable-next-line no-unused-vars
defaultSecretsArray : { ParameterKey : string ; EnvironmentVariable : string ; ParameterValue : string } [ ] ,
) {
2024-02-06 23:46:31 +00:00
if ( BuildParameters && BuildParameters . shouldUseRetainedWorkspaceMode ( buildParameters ) ) {
2022-11-07 20:41:00 +00:00
return ;
}
2022-02-01 02:31:20 +00:00
CloudRunnerLogger . log ( ` deleting PVC ` ) ;
2022-11-07 20:41:00 +00:00
try {
await this . kubeClient . deleteNamespacedPersistentVolumeClaim ( this . pvcName , this . namespace ) ;
await this . kubeClient . deleteNamespacedServiceAccount ( this . serviceAccountName , this . namespace ) ;
CloudRunnerLogger . log ( 'cleaned up PVC and Service Account' ) ;
} catch ( error : any ) {
CloudRunnerLogger . log ( ` Cleanup failed ${ JSON . stringify ( error , undefined , 4 ) } ` ) ;
throw error ;
}
2022-02-01 02:31:20 +00:00
}
static async findPodFromJob ( kubeClient : CoreV1Api , jobName : string , namespace : string ) {
const namespacedPods = await kubeClient . listNamespacedPod ( namespace ) ;
const pod = namespacedPods . body . items . find ( ( x ) = > x . metadata ? . labels ? . [ 'job-name' ] === jobName ) ;
if ( pod === undefined ) {
throw new Error ( "pod with job-name label doesn't exist" ) ;
}
2022-04-11 22:43:41 +00:00
2022-02-01 02:31:20 +00:00
return pod ;
}
}
export default Kubernetes ;