unity-builder/src/model/cloud-runner/providers/k8s/kubernetes-task-runner.ts

164 lines
5.4 KiB
TypeScript
Raw Normal View History

2022-02-01 02:31:20 +00:00
import { CoreV1Api, KubeConfig, Log } from '@kubernetes/client-node';
import { Writable } from 'stream';
Cloud Runner v0 - Reliable and trimmed down cloud runner (#353) * Update cloud-runner-aws-pipeline.yml * Update cloud-runner-k8s-pipeline.yml * yarn build * yarn build * correct branch ref * correct branch ref passed to target repo * Create k8s-tests.yml * Delete k8s-tests.yml * correct branch ref passed to target repo * correct branch ref passed to target repo * Always describe AWS tasks for now, because unstable error handling * Remove unused tree commands * Use lfs guid sum * Simple override cache push * Simple override cache push and pull override to allow pure cloud storage driven caching * Removal of early branch (breaks lfs caching) * Remove unused tree commands * Update action.yml * Update action.yml * Support cache and input override commands as input + full support custom hooks * Increase k8s timeout * replace filename being appended for unknclear reason * cache key should not contain whitespaces * Always try and deploy rook for k8s * Apply k8s files for rook * Update action.yml * Apply k8s files for rook * Apply k8s files for rook * cache test and action description for kuber storage class * Correct test and implement dependency health check and start * GCP-secret run, cache key * lfs smudge set explicit and undo explicit * Run using external secret provider to speed up input * Update cloud-runner-aws-pipeline.yml * Add nodejs as build step dependency * Add nodejs as build step dependency * Cloud Runner Tests must be specified to capture logs from cloud runner for tests * Cloud Runner Tests must be specified to capture logs from cloud runner for tests * Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs * Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs * Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs * Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs * Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs * better defaults for new inputs * better defaults * merge latest * force build update * use npm n to update node in unity builder * use npm n to update node in unity builder * use npm n to update node in unity builder * correct new line * quiet zipping * quiet zipping * default secrets for unity username and password * default secrets for unity username and password * ls active directory before lfs install * Get cloud runner secrets from * Get cloud runner secrets from * Cleanup setup of default secrets * Various fixes * Cleanup setup of default secrets * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * AWS secrets manager support * less caching logs * default k8s storage class to pd-standard * more readable build commands * Capture aws exit code 1 reliably * Always replace /head from branch * k8s default storage class to standard-rwo * cleanup * further cleanup input * further cleanup input * further cleanup input * further cleanup input * further cleanup input * folder sizes to inspect caching * dir command for local cloud runner test * k8s wait for pending because pvc will not create earlier * prefer k8s standard storage * handle empty string as cloud runner cluster input * local-system is now used for cloud runner test implementation AND correctly unset test CLI input * local-system is now used for cloud runner test implementation AND correctly unset test CLI input * fix unterminated quote * fix unterminated quote * do not share build parameters in tests - in cloud runner this will cause conflicts with resouces of the same name * remove head and heads from branch prefix * fix reversed caching direction of cache-push * fixes * fixes * fixes * cachePull cli * fixes * fixes * fixes * fixes * fixes * order cache test to be first * order cache test to be first * fixes * populate cache key instead of using branch * cleanup cli * garbage-collect-aws cli can iterate over aws resources and cli scans all ts files * import cli methods * import cli files explicitly * import cli files explicitly * import cli files explicitly * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * log parameters in cloud runner parameter test * log parameters in cloud runner parameter test * log parameters in cloud runner parameter test * Cloud runner param test before caching because we have a fast local cache test now * Using custom build path relative to repo root rather than project root * aws-garbage-collect at end of pipeline * aws-garbage-collect do not actually delete anything for now - just list * remove some legacy du commands * Update cloud-runner-aws-pipeline.yml * log contents after cache pull and fix some scenarios with duplicate secrets * log contents after cache pull and fix some scenarios with duplicate secrets * log contents after cache pull and fix some scenarios with duplicate secrets * PR comments * Replace guid with uuid package * use fileExists lambda instead of stat to check file exists in caching * build failed results in core error message * Delete sample.txt
2022-04-10 23:00:37 +00:00
import CloudRunnerLogger from '../../services/cloud-runner-logger';
2022-02-01 02:31:20 +00:00
import * as core from '@actions/core';
Cloud Runner v0 - Reliable and trimmed down cloud runner (#353) * Update cloud-runner-aws-pipeline.yml * Update cloud-runner-k8s-pipeline.yml * yarn build * yarn build * correct branch ref * correct branch ref passed to target repo * Create k8s-tests.yml * Delete k8s-tests.yml * correct branch ref passed to target repo * correct branch ref passed to target repo * Always describe AWS tasks for now, because unstable error handling * Remove unused tree commands * Use lfs guid sum * Simple override cache push * Simple override cache push and pull override to allow pure cloud storage driven caching * Removal of early branch (breaks lfs caching) * Remove unused tree commands * Update action.yml * Update action.yml * Support cache and input override commands as input + full support custom hooks * Increase k8s timeout * replace filename being appended for unknclear reason * cache key should not contain whitespaces * Always try and deploy rook for k8s * Apply k8s files for rook * Update action.yml * Apply k8s files for rook * Apply k8s files for rook * cache test and action description for kuber storage class * Correct test and implement dependency health check and start * GCP-secret run, cache key * lfs smudge set explicit and undo explicit * Run using external secret provider to speed up input * Update cloud-runner-aws-pipeline.yml * Add nodejs as build step dependency * Add nodejs as build step dependency * Cloud Runner Tests must be specified to capture logs from cloud runner for tests * Cloud Runner Tests must be specified to capture logs from cloud runner for tests * Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs * Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs * Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs * Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs * Refactor and cleanup - no async input, combined setup/build, removed github logs for cli runs * better defaults for new inputs * better defaults * merge latest * force build update * use npm n to update node in unity builder * use npm n to update node in unity builder * use npm n to update node in unity builder * correct new line * quiet zipping * quiet zipping * default secrets for unity username and password * default secrets for unity username and password * ls active directory before lfs install * Get cloud runner secrets from * Get cloud runner secrets from * Cleanup setup of default secrets * Various fixes * Cleanup setup of default secrets * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * Various fixes * AWS secrets manager support * less caching logs * default k8s storage class to pd-standard * more readable build commands * Capture aws exit code 1 reliably * Always replace /head from branch * k8s default storage class to standard-rwo * cleanup * further cleanup input * further cleanup input * further cleanup input * further cleanup input * further cleanup input * folder sizes to inspect caching * dir command for local cloud runner test * k8s wait for pending because pvc will not create earlier * prefer k8s standard storage * handle empty string as cloud runner cluster input * local-system is now used for cloud runner test implementation AND correctly unset test CLI input * local-system is now used for cloud runner test implementation AND correctly unset test CLI input * fix unterminated quote * fix unterminated quote * do not share build parameters in tests - in cloud runner this will cause conflicts with resouces of the same name * remove head and heads from branch prefix * fix reversed caching direction of cache-push * fixes * fixes * fixes * cachePull cli * fixes * fixes * fixes * fixes * fixes * order cache test to be first * order cache test to be first * fixes * populate cache key instead of using branch * cleanup cli * garbage-collect-aws cli can iterate over aws resources and cli scans all ts files * import cli methods * import cli files explicitly * import cli files explicitly * import cli files explicitly * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * import cli methods * log parameters in cloud runner parameter test * log parameters in cloud runner parameter test * log parameters in cloud runner parameter test * Cloud runner param test before caching because we have a fast local cache test now * Using custom build path relative to repo root rather than project root * aws-garbage-collect at end of pipeline * aws-garbage-collect do not actually delete anything for now - just list * remove some legacy du commands * Update cloud-runner-aws-pipeline.yml * log contents after cache pull and fix some scenarios with duplicate secrets * log contents after cache pull and fix some scenarios with duplicate secrets * log contents after cache pull and fix some scenarios with duplicate secrets * PR comments * Replace guid with uuid package * use fileExists lambda instead of stat to check file exists in caching * build failed results in core error message * Delete sample.txt
2022-04-10 23:00:37 +00:00
import { CloudRunnerStatics } from '../../cloud-runner-statics';
2022-02-01 02:31:20 +00:00
import waitUntil from 'async-wait-until';
import { FollowLogStreamService } from '../../services/follow-log-stream-service';
2022-02-01 02:31:20 +00:00
class KubernetesTaskRunner {
2023-02-01 23:00:41 +00:00
static lastReceivedTimestamp: number;
2022-02-01 02:31:20 +00:00
static async runTask(
kubeConfig: KubeConfig,
kubeClient: CoreV1Api,
jobName: string,
podName: string,
containerName: string,
namespace: string,
) {
CloudRunnerLogger.log(`Streaming logs from pod: ${podName} container: ${containerName} namespace: ${namespace}`);
const stream = new Writable();
let output = '';
let didStreamAnyLogs: boolean = false;
let shouldReadLogs = true;
let shouldCleanup = true;
2022-02-01 02:31:20 +00:00
stream._write = (chunk, encoding, next) => {
didStreamAnyLogs = true;
let message = chunk.toString().trimRight(`\n`);
message = `[${CloudRunnerStatics.logPrefix}] ${message}`;
({ shouldReadLogs, shouldCleanup, output } = FollowLogStreamService.handleIteration(
message,
shouldReadLogs,
shouldCleanup,
output,
));
2022-02-01 02:31:20 +00:00
next();
};
2023-02-01 23:00:41 +00:00
// export interface LogOptions {
/**
* Follow the log stream of the pod. Defaults to false.
*/
// follow?: boolean;
/**
* If set, the number of bytes to read from the server before terminating the log output. This may not display a
* complete final line of logging, and may return slightly more or slightly less than the specified limit.
*/
// limitBytes?: number;
/**
* If true, then the output is pretty printed.
*/
// pretty?: boolean;
/**
* Return previous terminated container logs. Defaults to false.
*/
// previous?: boolean;
/**
* A relative time in seconds before the current time from which to show logs. If this value precedes the time a
* pod was started, only logs since the pod start will be returned. If this value is in the future, no logs will
* be returned. Only one of sinceSeconds or sinceTime may be specified.
*/
// sinceSeconds?: number;
/**
* If set, the number of lines from the end of the logs to show. If not specified, logs are shown from the creation
* of the container or sinceSeconds or sinceTime
*/
// tailLines?: number;
/**
* If true, add an RFC3339 or RFC3339Nano timestamp at the beginning of every line of log output. Defaults to false.
*/
// timestamps?: boolean;
// }
2022-02-01 02:31:20 +00:00
const logOptions = {
follow: true,
pretty: false,
2023-02-09 23:51:58 +00:00
previous: false,
2023-02-01 23:00:41 +00:00
timestamps: true,
sinceSeconds: KubernetesTaskRunner.lastReceivedTimestamp,
2022-02-01 02:31:20 +00:00
};
try {
2023-02-01 23:00:41 +00:00
const resultError = await new Log(kubeConfig).log(namespace, podName, containerName, stream, logOptions);
2022-02-01 02:31:20 +00:00
stream.destroy();
if (resultError) {
throw resultError;
}
if (!didStreamAnyLogs) {
core.error('Failed to stream any logs, listing namespace events, check for an error with the container');
core.error(
JSON.stringify(
{
events: (await kubeClient.listNamespacedEvent(namespace)).body.items
.filter((x) => {
return x.involvedObject.name === podName || x.involvedObject.name === jobName;
})
.map((x) => {
return {
type: x.involvedObject.kind,
name: x.involvedObject.name,
message: x.message,
};
}),
},
undefined,
4,
),
);
throw new Error(`No logs streamed from k8s`);
}
2023-02-15 19:50:51 +00:00
} catch (error) {
2022-02-01 02:31:20 +00:00
if (stream) {
stream.destroy();
}
2023-02-02 21:29:18 +00:00
CloudRunnerLogger.log('k8s task runner failed');
2023-02-15 19:57:07 +00:00
CloudRunnerLogger.log(JSON.stringify(error, undefined, 4));
2022-02-01 02:31:20 +00:00
}
CloudRunnerLogger.log('end of log stream');
2022-02-01 02:31:20 +00:00
return output;
}
static async watchUntilPodRunning(kubeClient: CoreV1Api, podName: string, namespace: string) {
let success: boolean = false;
CloudRunnerLogger.log(`Watching ${podName} ${namespace}`);
await waitUntil(
async () => {
const status = await kubeClient.readNamespacedPodStatus(podName, namespace);
const phase = status?.body.status?.phase;
success = phase === 'Running';
CloudRunnerLogger.log(
2023-02-15 01:34:36 +00:00
`Phase:${status.body.status?.phase} Reason:${status.body.status?.conditions?.[0].reason || ''} Message:${
2022-02-01 02:31:20 +00:00
status.body.status?.conditions?.[0].message || ''
2023-02-14 12:35:50 +00:00
}`,
2022-02-01 02:31:20 +00:00
);
2023-02-15 01:34:36 +00:00
CloudRunnerLogger.log(
JSON.stringify(
(await kubeClient.listNamespacedEvent(namespace)).body.items
.map((x) => {
return {
message: x.message || ``,
name: x.metadata.name || ``,
reason: x.reason || ``,
};
})
.filter((x) => x.name.includes(podName)),
undefined,
4,
),
);
2022-02-01 02:31:20 +00:00
if (success || phase !== 'Pending') return true;
2022-02-01 02:31:20 +00:00
return false;
},
{
timeout: 2000000,
intervalBetweenAttempts: 15000,
},
);
2022-02-01 02:31:20 +00:00
return success;
}
}
export default KubernetesTaskRunner;