2022-02-01 02:31:20 +00:00
|
|
|
import { CoreV1Api, KubeConfig, Log } from '@kubernetes/client-node';
|
|
|
|
import { Writable } from 'stream';
|
Cloud Runner v0 - Reliable and trimmed down cloud runner (#353)
* Update cloud-runner-aws-pipeline.yml
* Update cloud-runner-k8s-pipeline.yml
* yarn build
* yarn build
* correct branch ref
* correct branch ref passed to target repo
* Create k8s-tests.yml
* Delete k8s-tests.yml
* correct branch ref passed to target repo
* correct branch ref passed to target repo
* Always describe AWS tasks for now, because unstable error handling
* Remove unused tree commands
* Use lfs guid sum
* Simple override cache push
* Simple override cache push and pull override to allow pure cloud storage driven caching
* Removal of early branch (breaks lfs caching)
* Remove unused tree commands
* Update action.yml
* Update action.yml
* Support cache and input override commands as input + full support custom hooks
* Increase k8s timeout
* replace filename being appended for unknclear reason
* cache key should not contain whitespaces
* Always try and deploy rook for k8s
* Apply k8s files for rook
* Update action.yml
* Apply k8s files for rook
* Apply k8s files for rook
* cache test and action description for kuber storage class
* Correct test and implement dependency health check and start
* GCP-secret run, cache key
* lfs smudge set explicit and undo explicit
* Run using external secret provider to speed up input
* Update cloud-runner-aws-pipeline.yml
* Add nodejs as build step dependency
* Add nodejs as build step dependency
* Cloud Runner Tests must be specified to capture logs from cloud runner for tests
* Cloud Runner Tests must be specified to capture logs from cloud runner for tests
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* better defaults for new inputs
* better defaults
* merge latest
* force build update
* use npm n to update node in unity builder
* use npm n to update node in unity builder
* use npm n to update node in unity builder
* correct new line
* quiet zipping
* quiet zipping
* default secrets for unity username and password
* default secrets for unity username and password
* ls active directory before lfs install
* Get cloud runner secrets from
* Get cloud runner secrets from
* Cleanup setup of default secrets
* Various fixes
* Cleanup setup of default secrets
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* AWS secrets manager support
* less caching logs
* default k8s storage class to pd-standard
* more readable build commands
* Capture aws exit code 1 reliably
* Always replace /head from branch
* k8s default storage class to standard-rwo
* cleanup
* further cleanup input
* further cleanup input
* further cleanup input
* further cleanup input
* further cleanup input
* folder sizes to inspect caching
* dir command for local cloud runner test
* k8s wait for pending because pvc will not create earlier
* prefer k8s standard storage
* handle empty string as cloud runner cluster input
* local-system is now used for cloud runner test implementation AND correctly unset test CLI input
* local-system is now used for cloud runner test implementation AND correctly unset test CLI input
* fix unterminated quote
* fix unterminated quote
* do not share build parameters in tests - in cloud runner this will cause conflicts with resouces of the same name
* remove head and heads from branch prefix
* fix reversed caching direction of cache-push
* fixes
* fixes
* fixes
* cachePull cli
* fixes
* fixes
* fixes
* fixes
* fixes
* order cache test to be first
* order cache test to be first
* fixes
* populate cache key instead of using branch
* cleanup cli
* garbage-collect-aws cli can iterate over aws resources and cli scans all ts files
* import cli methods
* import cli files explicitly
* import cli files explicitly
* import cli files explicitly
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* log parameters in cloud runner parameter test
* log parameters in cloud runner parameter test
* log parameters in cloud runner parameter test
* Cloud runner param test before caching because we have a fast local cache test now
* Using custom build path relative to repo root rather than project root
* aws-garbage-collect at end of pipeline
* aws-garbage-collect do not actually delete anything for now - just list
* remove some legacy du commands
* Update cloud-runner-aws-pipeline.yml
* log contents after cache pull and fix some scenarios with duplicate secrets
* log contents after cache pull and fix some scenarios with duplicate secrets
* log contents after cache pull and fix some scenarios with duplicate secrets
* PR comments
* Replace guid with uuid package
* use fileExists lambda instead of stat to check file exists in caching
* build failed results in core error message
* Delete sample.txt
2022-04-10 23:00:37 +00:00
|
|
|
import CloudRunnerLogger from '../../services/cloud-runner-logger';
|
2022-02-01 02:31:20 +00:00
|
|
|
import * as core from '@actions/core';
|
Cloud Runner v0 - Reliable and trimmed down cloud runner (#353)
* Update cloud-runner-aws-pipeline.yml
* Update cloud-runner-k8s-pipeline.yml
* yarn build
* yarn build
* correct branch ref
* correct branch ref passed to target repo
* Create k8s-tests.yml
* Delete k8s-tests.yml
* correct branch ref passed to target repo
* correct branch ref passed to target repo
* Always describe AWS tasks for now, because unstable error handling
* Remove unused tree commands
* Use lfs guid sum
* Simple override cache push
* Simple override cache push and pull override to allow pure cloud storage driven caching
* Removal of early branch (breaks lfs caching)
* Remove unused tree commands
* Update action.yml
* Update action.yml
* Support cache and input override commands as input + full support custom hooks
* Increase k8s timeout
* replace filename being appended for unknclear reason
* cache key should not contain whitespaces
* Always try and deploy rook for k8s
* Apply k8s files for rook
* Update action.yml
* Apply k8s files for rook
* Apply k8s files for rook
* cache test and action description for kuber storage class
* Correct test and implement dependency health check and start
* GCP-secret run, cache key
* lfs smudge set explicit and undo explicit
* Run using external secret provider to speed up input
* Update cloud-runner-aws-pipeline.yml
* Add nodejs as build step dependency
* Add nodejs as build step dependency
* Cloud Runner Tests must be specified to capture logs from cloud runner for tests
* Cloud Runner Tests must be specified to capture logs from cloud runner for tests
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs
* better defaults for new inputs
* better defaults
* merge latest
* force build update
* use npm n to update node in unity builder
* use npm n to update node in unity builder
* use npm n to update node in unity builder
* correct new line
* quiet zipping
* quiet zipping
* default secrets for unity username and password
* default secrets for unity username and password
* ls active directory before lfs install
* Get cloud runner secrets from
* Get cloud runner secrets from
* Cleanup setup of default secrets
* Various fixes
* Cleanup setup of default secrets
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* Various fixes
* AWS secrets manager support
* less caching logs
* default k8s storage class to pd-standard
* more readable build commands
* Capture aws exit code 1 reliably
* Always replace /head from branch
* k8s default storage class to standard-rwo
* cleanup
* further cleanup input
* further cleanup input
* further cleanup input
* further cleanup input
* further cleanup input
* folder sizes to inspect caching
* dir command for local cloud runner test
* k8s wait for pending because pvc will not create earlier
* prefer k8s standard storage
* handle empty string as cloud runner cluster input
* local-system is now used for cloud runner test implementation AND correctly unset test CLI input
* local-system is now used for cloud runner test implementation AND correctly unset test CLI input
* fix unterminated quote
* fix unterminated quote
* do not share build parameters in tests - in cloud runner this will cause conflicts with resouces of the same name
* remove head and heads from branch prefix
* fix reversed caching direction of cache-push
* fixes
* fixes
* fixes
* cachePull cli
* fixes
* fixes
* fixes
* fixes
* fixes
* order cache test to be first
* order cache test to be first
* fixes
* populate cache key instead of using branch
* cleanup cli
* garbage-collect-aws cli can iterate over aws resources and cli scans all ts files
* import cli methods
* import cli files explicitly
* import cli files explicitly
* import cli files explicitly
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* import cli methods
* log parameters in cloud runner parameter test
* log parameters in cloud runner parameter test
* log parameters in cloud runner parameter test
* Cloud runner param test before caching because we have a fast local cache test now
* Using custom build path relative to repo root rather than project root
* aws-garbage-collect at end of pipeline
* aws-garbage-collect do not actually delete anything for now - just list
* remove some legacy du commands
* Update cloud-runner-aws-pipeline.yml
* log contents after cache pull and fix some scenarios with duplicate secrets
* log contents after cache pull and fix some scenarios with duplicate secrets
* log contents after cache pull and fix some scenarios with duplicate secrets
* PR comments
* Replace guid with uuid package
* use fileExists lambda instead of stat to check file exists in caching
* build failed results in core error message
* Delete sample.txt
2022-04-10 23:00:37 +00:00
|
|
|
import { CloudRunnerStatics } from '../../cloud-runner-statics';
|
2022-02-01 02:31:20 +00:00
|
|
|
import waitUntil from 'async-wait-until';
|
2022-05-04 23:25:17 +00:00
|
|
|
import { FollowLogStreamService } from '../../services/follow-log-stream-service';
|
2022-02-01 02:31:20 +00:00
|
|
|
|
|
|
|
class KubernetesTaskRunner {
|
2023-02-05 00:16:42 +00:00
|
|
|
static lastReceivedTimestamp: number;
|
2022-02-01 02:31:20 +00:00
|
|
|
static async runTask(
|
|
|
|
kubeConfig: KubeConfig,
|
|
|
|
kubeClient: CoreV1Api,
|
|
|
|
jobName: string,
|
|
|
|
podName: string,
|
|
|
|
containerName: string,
|
|
|
|
namespace: string,
|
|
|
|
) {
|
|
|
|
CloudRunnerLogger.log(`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace}`);
|
|
|
|
const stream = new Writable();
|
|
|
|
let output = '';
|
|
|
|
let didStreamAnyLogs: boolean = false;
|
2022-05-04 23:25:17 +00:00
|
|
|
let shouldReadLogs = true;
|
|
|
|
let shouldCleanup = true;
|
2022-02-01 02:31:20 +00:00
|
|
|
stream._write = (chunk, encoding, next) => {
|
|
|
|
didStreamAnyLogs = true;
|
|
|
|
let message = chunk.toString().trimRight(`\n`);
|
|
|
|
message = `[${CloudRunnerStatics.logPrefix}] ${message}`;
|
2022-05-04 23:25:17 +00:00
|
|
|
({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration(
|
|
|
|
message,
|
|
|
|
shouldReadLogs,
|
|
|
|
shouldCleanup,
|
|
|
|
output,
|
|
|
|
));
|
2022-02-01 02:31:20 +00:00
|
|
|
next();
|
|
|
|
};
|
2023-02-05 00:16:42 +00:00
|
|
|
|
|
|
|
// export interface LogOptions {
|
|
|
|
/**
|
|
|
|
* Follow the log stream of the pod. Defaults to false.
|
|
|
|
*/
|
|
|
|
// follow?: boolean;
|
|
|
|
/**
|
|
|
|
* If set, the number of bytes to read from the server before terminating the log output. This may not display a
|
|
|
|
* complete final line of logging, and may return slightly more or slightly less than the specified limit.
|
|
|
|
*/
|
|
|
|
// limitBytes?: number;
|
|
|
|
/**
|
|
|
|
* If true, then the output is pretty printed.
|
|
|
|
*/
|
|
|
|
// pretty?: boolean;
|
|
|
|
/**
|
|
|
|
* Return previous terminated container logs. Defaults to false.
|
|
|
|
*/
|
|
|
|
// previous?: boolean;
|
|
|
|
/**
|
|
|
|
* A relative time in seconds before the current time from which to show logs. If this value precedes the time a
|
|
|
|
* pod was started, only logs since the pod start will be returned. If this value is in the future, no logs will
|
|
|
|
* be returned. Only one of sinceSeconds or sinceTime may be specified.
|
|
|
|
*/
|
|
|
|
// sinceSeconds?: number;
|
|
|
|
/**
|
|
|
|
* If set, the number of lines from the end of the logs to show. If not specified, logs are shown from the creation
|
|
|
|
* of the container or sinceSeconds or sinceTime
|
|
|
|
*/
|
|
|
|
// tailLines?: number;
|
|
|
|
/**
|
|
|
|
* If true, add an RFC3339 or RFC3339Nano timestamp at the beginning of every line of log output. Defaults to false.
|
|
|
|
*/
|
|
|
|
// timestamps?: boolean;
|
|
|
|
// }
|
|
|
|
|
2022-02-01 02:31:20 +00:00
|
|
|
const logOptions = {
|
|
|
|
follow: true,
|
|
|
|
pretty: false,
|
2023-02-05 00:16:42 +00:00
|
|
|
previous: true,
|
|
|
|
timestamps: true,
|
|
|
|
sinceSeconds: KubernetesTaskRunner.lastReceivedTimestamp,
|
2022-02-01 02:31:20 +00:00
|
|
|
};
|
|
|
|
try {
|
2023-02-05 00:16:42 +00:00
|
|
|
const resultError = await new Log(kubeConfig).log(namespace, podName, containerName, stream, logOptions);
|
2022-02-01 02:31:20 +00:00
|
|
|
stream.destroy();
|
|
|
|
if (resultError) {
|
|
|
|
throw resultError;
|
|
|
|
}
|
|
|
|
if (!didStreamAnyLogs) {
|
|
|
|
core.error('Failed to stream any logs, listing namespace events, check for an error with the container');
|
|
|
|
core.error(
|
|
|
|
JSON.stringify(
|
|
|
|
{
|
|
|
|
events: (await kubeClient.listNamespacedEvent(namespace)).body.items
|
|
|
|
.filter((x) => {
|
|
|
|
return x.involvedObject.name === podName || x.involvedObject.name === jobName;
|
|
|
|
})
|
|
|
|
.map((x) => {
|
|
|
|
return {
|
|
|
|
type: x.involvedObject.kind,
|
|
|
|
name: x.involvedObject.name,
|
|
|
|
message: x.message,
|
|
|
|
};
|
|
|
|
}),
|
|
|
|
},
|
|
|
|
undefined,
|
|
|
|
4,
|
|
|
|
),
|
|
|
|
);
|
|
|
|
throw new Error(`No logs streamed from k8s`);
|
|
|
|
}
|
|
|
|
} catch (error) {
|
|
|
|
if (stream) {
|
|
|
|
stream.destroy();
|
|
|
|
}
|
2023-02-05 00:16:42 +00:00
|
|
|
CloudRunnerLogger.log(JSON.stringify(error));
|
|
|
|
CloudRunnerLogger.log('k8s task runner failed');
|
2022-02-01 02:31:20 +00:00
|
|
|
throw error;
|
|
|
|
}
|
|
|
|
CloudRunnerLogger.log('end of log stream');
|
2022-04-11 22:43:41 +00:00
|
|
|
|
2022-02-01 02:31:20 +00:00
|
|
|
return output;
|
|
|
|
}
|
|
|
|
|
|
|
|
static async watchUntilPodRunning(kubeClient: CoreV1Api, podName: string, namespace: string) {
|
|
|
|
let success: boolean = false;
|
|
|
|
CloudRunnerLogger.log(`Watching ${podName} ${namespace}`);
|
|
|
|
await waitUntil(
|
|
|
|
async () => {
|
|
|
|
const status = await kubeClient.readNamespacedPodStatus(podName, namespace);
|
|
|
|
const phase = status?.body.status?.phase;
|
|
|
|
success = phase === 'Running';
|
|
|
|
CloudRunnerLogger.log(
|
|
|
|
`${status.body.status?.phase} ${status.body.status?.conditions?.[0].reason || ''} ${
|
|
|
|
status.body.status?.conditions?.[0].message || ''
|
|
|
|
}`,
|
|
|
|
);
|
|
|
|
if (success || phase !== 'Pending') return true;
|
2022-04-11 22:43:41 +00:00
|
|
|
|
2022-02-01 02:31:20 +00:00
|
|
|
return false;
|
|
|
|
},
|
|
|
|
{
|
|
|
|
timeout: 2000000,
|
|
|
|
intervalBetweenAttempts: 15000,
|
|
|
|
},
|
|
|
|
);
|
2022-04-11 22:43:41 +00:00
|
|
|
|
2022-02-01 02:31:20 +00:00
|
|
|
return success;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
export default KubernetesTaskRunner;
|