From 237e6290344b5b378a3eb31c82408470dda59d77 Mon Sep 17 00:00:00 2001 From: Kartik Gupta <88345179+kartikgupta-db@users.noreply.github.com> Date: Sat, 11 Nov 2023 13:21:28 +0000 Subject: [PATCH] All `AuthProviders` must implement a non silent `check`. (#936) ## Changes * Moving forward, we want to avoid having silent checks to make reasoning about auth easier. This PR makes it so that all AuthProviders must implement the `check` method. * Each check method must handle it's own Retry Loop, error handling and display. Since now checks are always interactive, we do not want calling code to manage the interactions. ## Tests * manual --- .github/workflows/nightly-release.yml | 6 +- .github/workflows/push.yml | 4 +- packages/databricks-vscode/.eslintrc.json | 6 + .../eslint-local-rules/index.js | 12 + .../rules/mutexSynchronisedDecorator.ts | 76 + .../eslint-local-rules/tsconfig.json | 6 + packages/databricks-vscode/package.json | 10 +- .../scripts/writeBundleSchema.ts | 25 + .../src/bundle/BundleFileSet.test.ts | 195 + .../src/bundle/BundleFileSet.ts | 132 + .../src/bundle/BundleSchema.d.ts | 5541 +++++++++++++++++ .../src/bundle/GenerateBundle.ts | 46 - .../src/bundle/bundleAutocompleteProvider.ts | 65 + .../databricks-vscode/src/bundle/types.ts | 3 + .../src/cli/DatabricksCliSyncParser.ts | 1 + .../configuration/BundleConfigReaderWriter.ts | 210 + .../src/configuration/ConfigModel.ts | 167 + .../ConfigOverrideReaderWriter.ts | 55 + .../src/configuration/ConnectionManager.ts | 4 +- .../src/configuration/auth/AuthProvider.ts | 40 +- .../src/configuration/auth/AzureCliCheck.ts | 5 +- .../configuration/auth/DatabricksCliCheck.ts | 11 +- .../src/configuration/auth/orchestrate.ts | 2 +- .../src/configuration/types.ts | 47 + packages/databricks-vscode/src/extension.ts | 15 +- .../src/file-managers/BundleWatcher.ts | 72 + .../file-managers/ProjectConfigFile.test.ts | 1 - .../src/locking/CachedValue.test.ts | 42 + .../src/locking/CachedValue.ts | 32 + .../databricks-vscode/src/locking/Mutex.ts | 28 + .../databricks-vscode/src/locking/index.ts | 7 + .../src/vscode-objs/StateStorage.ts | 10 +- yarn.lock | 440 +- 33 files changed, 7232 insertions(+), 84 deletions(-) create mode 100644 packages/databricks-vscode/.eslintrc.json create mode 100644 packages/databricks-vscode/eslint-local-rules/index.js create mode 100644 packages/databricks-vscode/eslint-local-rules/rules/mutexSynchronisedDecorator.ts create mode 100644 packages/databricks-vscode/eslint-local-rules/tsconfig.json create mode 100644 packages/databricks-vscode/scripts/writeBundleSchema.ts create mode 100644 packages/databricks-vscode/src/bundle/BundleFileSet.test.ts create mode 100644 packages/databricks-vscode/src/bundle/BundleFileSet.ts create mode 100644 packages/databricks-vscode/src/bundle/BundleSchema.d.ts delete mode 100644 packages/databricks-vscode/src/bundle/GenerateBundle.ts create mode 100644 packages/databricks-vscode/src/bundle/bundleAutocompleteProvider.ts create mode 100644 packages/databricks-vscode/src/bundle/types.ts create mode 100644 packages/databricks-vscode/src/configuration/BundleConfigReaderWriter.ts create mode 100644 packages/databricks-vscode/src/configuration/ConfigModel.ts create mode 100644 packages/databricks-vscode/src/configuration/ConfigOverrideReaderWriter.ts create mode 100644 packages/databricks-vscode/src/configuration/types.ts create mode 100644 packages/databricks-vscode/src/file-managers/BundleWatcher.ts create mode 100644 packages/databricks-vscode/src/locking/CachedValue.test.ts create mode 100644 packages/databricks-vscode/src/locking/CachedValue.ts diff --git a/.github/workflows/nightly-release.yml b/.github/workflows/nightly-release.yml index 9d2e214eb..144f1d180 100644 --- a/.github/workflows/nightly-release.yml +++ b/.github/workflows/nightly-release.yml @@ -2,7 +2,7 @@ name: Publish nightly release on: push: - branches: [main] + branches: [main, bundle-integ] workflow_dispatch: jobs: @@ -24,8 +24,8 @@ jobs: - name: Update nightly release uses: softprops/action-gh-release@v1 with: - name: Nightly + name: Nightly - ${{ github.ref_name }} prerelease: true - tag_name: nightly + tag_name: nightly-${{ github.ref_name }} token: ${{ secrets.GITHUB_TOKEN }} files: "packages/databricks-vscode/databricks*/*.vsix" diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index cf292fb67..11b6d0671 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -2,9 +2,9 @@ name: VSCode Extensions CI on: push: - branches: [main] + branches: [main, bundle-integ] pull_request: - branches: [main] + branches: [main, bundle-integ] jobs: run-tests: diff --git a/packages/databricks-vscode/.eslintrc.json b/packages/databricks-vscode/.eslintrc.json new file mode 100644 index 000000000..4f1ba9b7a --- /dev/null +++ b/packages/databricks-vscode/.eslintrc.json @@ -0,0 +1,6 @@ +{ + "plugins": ["eslint-plugin-local-rules"], + "rules": { + "local-rules/mutex-synchronised-decorator": "error" + } +} diff --git a/packages/databricks-vscode/eslint-local-rules/index.js b/packages/databricks-vscode/eslint-local-rules/index.js new file mode 100644 index 000000000..e82cd1def --- /dev/null +++ b/packages/databricks-vscode/eslint-local-rules/index.js @@ -0,0 +1,12 @@ +/* eslint-disable */ +require("ts-node").register({ + transpileOnly: true, + compilerOptions: { + module: "commonjs", + }, +}); + +module.exports = { + "mutex-synchronised-decorator": + require("./rules/mutexSynchronisedDecorator").default, +}; diff --git a/packages/databricks-vscode/eslint-local-rules/rules/mutexSynchronisedDecorator.ts b/packages/databricks-vscode/eslint-local-rules/rules/mutexSynchronisedDecorator.ts new file mode 100644 index 000000000..5984af291 --- /dev/null +++ b/packages/databricks-vscode/eslint-local-rules/rules/mutexSynchronisedDecorator.ts @@ -0,0 +1,76 @@ +/* eslint-disable no-console */ +import {ESLintUtils, AST_NODE_TYPES} from "@typescript-eslint/utils"; + +const rule = ESLintUtils.RuleCreator.withoutDocs({ + create(context) { + return { + // eslint-disable-next-line @typescript-eslint/naming-convention + Decorator(node) { + if ( + node.expression.type === "CallExpression" && + node.expression.callee.type === + AST_NODE_TYPES.MemberExpression && + node.expression.callee.object.type === + AST_NODE_TYPES.Identifier && + node.expression.callee.object.name === "Mutex" && + node.expression.callee.property.type === + AST_NODE_TYPES.Identifier && + node.expression.callee.property.name === "synchronise" && + node.expression.arguments.length > 0 + ) { + const mutexName = + node.expression.arguments[0].type === + AST_NODE_TYPES.Literal + ? (node.expression.arguments[0].value as string) + : ""; + + if (node.parent.parent?.type !== AST_NODE_TYPES.ClassBody) { + return context.report({ + node, + messageId: "decoratorNotInClass", + }); + } + + const exists = node.parent.parent.body.some((element) => { + return ( + element.type === + AST_NODE_TYPES.PropertyDefinition && + element.key.type === AST_NODE_TYPES.Identifier && + element.key.name === mutexName + ); + }); + + const className = + node.parent.parent.parent?.type === + AST_NODE_TYPES.ClassDeclaration + ? node.parent.parent.parent.id?.name + : undefined; + + if (!exists) { + return context.report({ + node, + messageId: "synchronisedMutexNotInClass", + data: { + mutexName, + className, + }, + }); + } + } + }, + }; + }, + meta: { + type: "problem", + schema: [], + messages: { + decoratorNotInClass: + "Mutex.synchronized() should be used inside a class", + synchronisedMutexNotInClass: + 'Mutex "{{mutexName}}" is not defined in class "{{className}}".', + }, + }, + defaultOptions: [], +}); + +export default rule; diff --git a/packages/databricks-vscode/eslint-local-rules/tsconfig.json b/packages/databricks-vscode/eslint-local-rules/tsconfig.json new file mode 100644 index 000000000..02566e202 --- /dev/null +++ b/packages/databricks-vscode/eslint-local-rules/tsconfig.json @@ -0,0 +1,6 @@ +{ + "transpileOnly": true, + "compilerOptions": { + "module": "Node16" + } +} diff --git a/packages/databricks-vscode/package.json b/packages/databricks-vscode/package.json index c8a59e16f..e3b831cdc 100644 --- a/packages/databricks-vscode/package.json +++ b/packages/databricks-vscode/package.json @@ -666,6 +666,7 @@ "package:cli:link": "rm -f ./bin/databricks && mkdir -p bin && ln -s ../../../../cli/cli bin/databricks", "package:wrappers:write": "ts-node ./scripts/writeIpynbWrapper.ts -s ./resources/python/notebook.workflow-wrapper.py -o ./resources/python/generated/notebook.workflow-wrapper.json", "package:jupyter-init-script:write": "ts-node ./scripts/writeJupyterInitFileWithVersion.ts", + "package:bundle-schema:write": "yarn package:cli:fetch && ts-node ./scripts/writeBundleSchema.ts ./bin/databricks ./src/bundle/BundleSchema.d.ts", "package:compile": "yarn run esbuild:base", "package:copy-webview-toolkit": "cp ./node_modules/@vscode/webview-ui-toolkit/dist/toolkit.js ./out/toolkit.js", "esbuild:base": "esbuild ./src/extension.ts --bundle --outfile=out/extension.js --external:vscode --format=cjs --platform=node --sourcemap --target=es2019", @@ -687,6 +688,7 @@ "dependencies": { "@databricks/databricks-sdk": "file:../../vendor/databricks-sdk.tgz", "@databricks/databricks-vscode-types": "workspace:^", + "@types/lodash": "^4.14.199", "@vscode/debugadapter": "^1.61.0", "@vscode/extension-telemetry": "^0.9.0", "@vscode/webview-ui-toolkit": "^1.2.2", @@ -702,8 +704,9 @@ "@sinonjs/fake-timers": "^11.2.2", "@types/bcryptjs": "^2.4.2", "@types/chai": "^4.3.5", + "@types/eslint": "^8.44.6", "@types/fs-extra": "^11.0.1", - "@types/mocha": "^10.0.1", + "@types/mocha": "^10.0.2", "@types/mock-require": "^2.0.1", "@types/node": "^20.4.2", "@types/sinonjs__fake-timers": "^8.1.2", @@ -713,6 +716,7 @@ "@types/yargs": "^17.0.24", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", + "@typescript-eslint/utils": "^6.9.0", "@vscode/test-electron": "^2.3.3", "@wdio/cli": "^8.12.2", "@wdio/local-runner": "^8.12.1", @@ -722,8 +726,10 @@ "chai": "^4.3.7", "esbuild": "^0.19.4", "eslint": "^8.51.0", + "eslint-plugin-local-rules": "^2.0.0", "fs-extra": "^11.1.1", - "glob": "^10.3.3", + "glob": "^10.3.10", + "json-schema-to-typescript": "^13.1.1", "mocha": "^10.2.0", "mock-require": "^3.0.3", "nyc": "^15.1.0", diff --git a/packages/databricks-vscode/scripts/writeBundleSchema.ts b/packages/databricks-vscode/scripts/writeBundleSchema.ts new file mode 100644 index 000000000..0c8d9ad04 --- /dev/null +++ b/packages/databricks-vscode/scripts/writeBundleSchema.ts @@ -0,0 +1,25 @@ +/** + * This script generates the BundleSchema.d.ts file from the bundle schema. + * It MUST be run after a yarn package:cli:fetch + */ + +import * as cp from "child_process"; +import * as fs from "fs"; +import {compileFromFile} from "json-schema-to-typescript"; +import {tmpdir} from "os"; +import path from "path"; +import {argv} from "process"; + +const output = cp.execFileSync(argv[2], ["bundle", "schema"]); + +const tmpFile = path.join(tmpdir(), "BundleSchema.json"); +fs.writeFileSync(tmpFile, output); + +// eslint-disable-next-line no-console +console.log("Bundle schema written to", tmpFile); + +// compile from file +compileFromFile(tmpFile).then((ts) => fs.writeFileSync(argv[3], ts)); + +// eslint-disable-next-line no-console +console.log("BundleSchema.d.ts written to", argv[3]); diff --git a/packages/databricks-vscode/src/bundle/BundleFileSet.test.ts b/packages/databricks-vscode/src/bundle/BundleFileSet.test.ts new file mode 100644 index 000000000..1ff8efc6f --- /dev/null +++ b/packages/databricks-vscode/src/bundle/BundleFileSet.test.ts @@ -0,0 +1,195 @@ +import {Uri} from "vscode"; +import {BundleFileSet} from "./BundleFileSet"; +import {expect} from "chai"; +import path from "path"; +import * as tmp from "tmp-promise"; +import * as fs from "fs/promises"; +import {BundleSchema} from "./BundleSchema"; +import * as yaml from "yaml"; + +describe(__filename, async function () { + let tmpdir: tmp.DirectoryResult; + + beforeEach(async () => { + tmpdir = await tmp.dir({unsafeCleanup: true}); + }); + + afterEach(async () => { + await tmpdir.cleanup(); + }); + + it("should return the correct absolute path", () => { + const tmpdirUri = Uri.file(tmpdir.path); + + const bundleFileSet = new BundleFileSet(tmpdirUri); + + expect(bundleFileSet.getAbsolutePath("test.txt").fsPath).to.equal( + path.join(tmpdirUri.fsPath, "test.txt") + ); + + expect( + bundleFileSet.getAbsolutePath(Uri.file("test.txt")).fsPath + ).to.equal(path.join(tmpdirUri.fsPath, "test.txt")); + }); + + it("should find the correct root bundle yaml", async () => { + const tmpdirUri = Uri.file(tmpdir.path); + const bundleFileSet = new BundleFileSet(tmpdirUri); + + expect(await bundleFileSet.getRootFile()).to.be.undefined; + + await fs.writeFile(path.join(tmpdirUri.fsPath, "bundle.yaml"), ""); + + expect((await bundleFileSet.getRootFile())?.fsPath).to.equal( + path.join(tmpdirUri.fsPath, "bundle.yaml") + ); + }); + + it("should return undefined if more than one root bundle yaml is found", async () => { + const tmpdirUri = Uri.file(tmpdir.path); + const bundleFileSet = new BundleFileSet(tmpdirUri); + + await fs.writeFile(path.join(tmpdirUri.fsPath, "bundle.yaml"), ""); + await fs.writeFile(path.join(tmpdirUri.fsPath, "databricks.yaml"), ""); + + expect(await bundleFileSet.getRootFile()).to.be.undefined; + }); + + describe("file listing", async () => { + beforeEach(async () => { + const rootBundleData: BundleSchema = { + include: [ + "included.yaml", + path.join("includes", "**", "*.yaml"), + ], + }; + + await fs.writeFile( + path.join(tmpdir.path, "bundle.yaml"), + yaml.stringify(rootBundleData) + ); + + await fs.writeFile(path.join(tmpdir.path, "included.yaml"), ""); + await fs.writeFile(path.join(tmpdir.path, "notIncluded.yaml"), ""); + await fs.mkdir(path.join(tmpdir.path, "includes")); + await fs.writeFile( + path.join(tmpdir.path, "includes", "included.yaml"), + "" + ); + }); + + it("should return correct included files", async () => { + const tmpdirUri = Uri.file(tmpdir.path); + const bundleFileSet = new BundleFileSet(tmpdirUri); + + expect(await bundleFileSet.getIncludedFilesGlob()).to.equal( + `{included.yaml,${path.join("includes", "**", "*.yaml")}}` + ); + + const actual = (await bundleFileSet.getIncludedFiles())?.map( + (v) => v.fsPath + ); + const expected = [ + Uri.file(path.join(tmpdirUri.fsPath, "included.yaml")), + Uri.file( + path.join(tmpdirUri.fsPath, "includes", "included.yaml") + ), + ].map((v) => v.fsPath); + expect(actual).to.deep.equal(expected); + }); + + it("should return all bundle files", async () => { + const tmpdirUri = Uri.file(tmpdir.path); + const bundleFileSet = new BundleFileSet(tmpdirUri); + + const actual = (await bundleFileSet.allFiles()).map( + (v) => v.fsPath + ); + const expected = [ + Uri.joinPath(tmpdirUri, "bundle.yaml"), + Uri.joinPath(tmpdirUri, "included.yaml"), + Uri.joinPath(tmpdirUri, "includes", "included.yaml"), + ].map((v) => v.fsPath); + expect(actual).to.deep.equal(expected); + }); + + it("isRootBundleFile should return true only for root bundle file", async () => { + const tmpdirUri = Uri.file(tmpdir.path); + const bundleFileSet = new BundleFileSet(tmpdirUri); + + const possibleRoots = [ + "bundle.yaml", + "bundle.yml", + "databricks.yaml", + "databricks.yml", + ]; + + for (const root of possibleRoots) { + expect( + bundleFileSet.isRootBundleFile( + Uri.file(path.join(tmpdirUri.fsPath, root)) + ) + ).to.be.true; + } + + expect( + bundleFileSet.isRootBundleFile( + Uri.file(path.join(tmpdirUri.fsPath, "bundle-wrong.yaml")) + ) + ).to.be.false; + }); + + it("isIncludedBundleFile should return true only for included files", async () => { + const tmpdirUri = Uri.file(tmpdir.path); + const bundleFileSet = new BundleFileSet(tmpdirUri); + + expect( + await bundleFileSet.isIncludedBundleFile( + Uri.file(path.join(tmpdirUri.fsPath, "included.yaml")) + ) + ).to.be.true; + + expect( + await bundleFileSet.isIncludedBundleFile( + Uri.file( + path.join(tmpdirUri.fsPath, "includes", "included.yaml") + ) + ) + ).to.be.true; + + expect( + await bundleFileSet.isIncludedBundleFile( + Uri.file(path.join(tmpdirUri.fsPath, "notIncluded.yaml")) + ) + ).to.be.false; + }); + + it("isBundleFile should return true only for bundle files", async () => { + const tmpdirUri = Uri.file(tmpdir.path); + const bundleFileSet = new BundleFileSet(tmpdirUri); + + const possibleBundleFiles = [ + "bundle.yaml", + "bundle.yml", + "databricks.yaml", + "databricks.yml", + "included.yaml", + path.join("includes", "included.yaml"), + ]; + + for (const bundleFile of possibleBundleFiles) { + expect( + await bundleFileSet.isBundleFile( + Uri.file(path.join(tmpdirUri.fsPath, bundleFile)) + ) + ).to.be.true; + } + + expect( + await bundleFileSet.isBundleFile( + Uri.file(path.join(tmpdirUri.fsPath, "notIncluded.yaml")) + ) + ).to.be.false; + }); + }); +}); diff --git a/packages/databricks-vscode/src/bundle/BundleFileSet.ts b/packages/databricks-vscode/src/bundle/BundleFileSet.ts new file mode 100644 index 000000000..496da19af --- /dev/null +++ b/packages/databricks-vscode/src/bundle/BundleFileSet.ts @@ -0,0 +1,132 @@ +import {Uri} from "vscode"; +import * as glob from "glob"; +import {merge} from "lodash"; +import * as yaml from "yaml"; +import path from "path"; +import {BundleSchema} from "./BundleSchema"; +import {readFile, writeFile} from "fs/promises"; +import {CachedValue} from "../locking/CachedValue"; +import minimatch from "minimatch"; + +export async function parseBundleYaml(file: Uri) { + const data = yaml.parse(await readFile(file.fsPath, "utf-8")); + return data as BundleSchema; +} + +export async function writeBundleYaml(file: Uri, data: BundleSchema) { + await writeFile(file.fsPath, yaml.stringify(data)); +} + +function toGlobPath(path: string) { + if (process.platform === "win32") { + return path.replace(/\\/g, "/"); + } + return path; +} +export class BundleFileSet { + private rootFilePattern: string = "{bundle,databricks}.{yaml,yml}"; + public readonly bundleDataCache: CachedValue = + new CachedValue(async () => { + let bundle = {}; + await this.forEach(async (data) => { + bundle = merge(bundle, data); + }); + return bundle as BundleSchema; + }); + + constructor(private readonly workspaceRoot: Uri) {} + + getAbsolutePath(path: string | Uri) { + if (typeof path === "string") { + return Uri.joinPath(this.workspaceRoot, path); + } + return Uri.joinPath(this.workspaceRoot, path.fsPath); + } + + async getRootFile() { + const rootFile = await glob.glob( + toGlobPath(this.getAbsolutePath(this.rootFilePattern).fsPath), + {nocase: process.platform === "win32"} + ); + if (rootFile.length !== 1) { + return undefined; + } + return Uri.file(rootFile[0]); + } + + async getIncludedFilesGlob() { + const rootFile = await this.getRootFile(); + if (rootFile === undefined) { + return undefined; + } + const bundle = await parseBundleYaml(Uri.file(rootFile.fsPath)); + if (bundle?.include === undefined || bundle?.include.length === 0) { + return undefined; + } + if (bundle?.include.length === 1) { + return bundle.include[0]; + } + return `{${bundle.include.join(",")}}`; + } + + async getIncludedFiles() { + const includedFilesGlob = await this.getIncludedFilesGlob(); + if (includedFilesGlob !== undefined) { + return ( + await glob.glob( + toGlobPath( + path.join(this.workspaceRoot.fsPath, includedFilesGlob) + ), + {nocase: process.platform === "win32"} + ) + ).map((i) => Uri.file(i)); + } + } + + async allFiles() { + const rootFile = await this.getRootFile(); + if (rootFile === undefined) { + return []; + } + + return [rootFile, ...((await this.getIncludedFiles()) ?? [])]; + } + + async findFile( + predicate: (data: BundleSchema, file: Uri) => Promise + ) { + const matchedFiles: {data: BundleSchema; file: Uri}[] = []; + this.forEach(async (data, file) => { + if (await predicate(data, file)) { + matchedFiles.push({data, file}); + } + }); + return matchedFiles; + } + + async forEach(f: (data: BundleSchema, file: Uri) => Promise) { + for (const file of await this.allFiles()) { + await f(await parseBundleYaml(file), file); + } + } + + isRootBundleFile(e: Uri) { + return minimatch( + e.fsPath, + toGlobPath(this.getAbsolutePath(this.rootFilePattern).fsPath) + ); + } + + async isIncludedBundleFile(e: Uri) { + let includedFilesGlob = await this.getIncludedFilesGlob(); + if (includedFilesGlob === undefined) { + return false; + } + includedFilesGlob = this.getAbsolutePath(includedFilesGlob).fsPath; + return minimatch(e.fsPath, toGlobPath(includedFilesGlob)); + } + + async isBundleFile(e: Uri) { + return this.isRootBundleFile(e) || (await this.isIncludedBundleFile(e)); + } +} diff --git a/packages/databricks-vscode/src/bundle/BundleSchema.d.ts b/packages/databricks-vscode/src/bundle/BundleSchema.d.ts new file mode 100644 index 000000000..f1d1ef33b --- /dev/null +++ b/packages/databricks-vscode/src/bundle/BundleSchema.d.ts @@ -0,0 +1,5541 @@ +/* eslint-disable */ +/** + * This file was automatically generated by json-schema-to-typescript. + * DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file, + * and run json-schema-to-typescript to regenerate this file. + */ + +/** + * Root of the bundle config + */ +export interface BundleSchema { + /** + * A description of all code artifacts in this bundle. + */ + artifacts?: { + [k: string]: { + build: string; + files: { + source: string; + }[]; + path: string; + type: string; + }; + }; + /** + * The details for this bundle. + */ + bundle?: { + compute_id?: string; + git?: { + branch?: string; + origin_url?: string; + }; + /** + * The name of the bundle. + */ + name: string; + }; + environments?: { + [k: string]: { + artifacts?: { + [k: string]: { + build: string; + files: { + source: string; + }[]; + path: string; + type: string; + }; + }; + bundle?: { + compute_id?: string; + git?: { + branch?: string; + origin_url?: string; + }; + name: string; + }; + compute_id?: string; + default?: boolean; + git?: { + branch?: string; + origin_url?: string; + }; + mode?: string; + resources?: { + experiments?: { + [k: string]: { + artifact_location?: string; + creation_time?: number; + experiment_id?: string; + last_update_time?: number; + lifecycle_stage?: string; + name?: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + tags?: { + key?: string; + value?: string; + }[]; + }; + }; + jobs?: { + [k: string]: { + compute?: { + compute_key: string; + spec: { + kind?: string; + }; + }[]; + continuous?: { + pause_status?: string; + }; + email_notifications?: { + no_alert_for_skipped_runs?: boolean; + on_duration_warning_threshold_exceeded?: string[]; + on_failure?: string[]; + on_start?: string[]; + on_success?: string[]; + }; + format?: string; + git_source?: { + git_branch?: string; + git_commit?: string; + git_provider: string; + git_snapshot?: { + used_commit?: string; + }; + git_tag?: string; + git_url: string; + job_source?: { + dirty_state?: string; + import_from_git_branch: string; + job_config_path: string; + }; + }; + health?: { + rules?: { + metric?: string; + op?: string; + value?: number; + }[]; + }; + job_clusters?: { + job_cluster_key: string; + new_cluster?: { + autoscale?: { + max_workers: number; + min_workers: number; + }; + autotermination_minutes?: number; + aws_attributes?: { + availability?: string; + ebs_volume_count?: number; + ebs_volume_iops?: number; + ebs_volume_size?: number; + ebs_volume_throughput?: number; + ebs_volume_type?: string; + first_on_demand?: number; + instance_profile_arn?: string; + spot_bid_price_percent?: number; + zone_id?: string; + }; + azure_attributes?: { + availability?: string; + first_on_demand?: number; + log_analytics_info?: { + log_analytics_primary_key?: string; + log_analytics_workspace_id?: string; + }; + spot_bid_max_price?: number; + }; + cluster_log_conf?: { + dbfs?: { + destination?: string; + }; + s3?: { + canned_acl?: string; + destination?: string; + enable_encryption?: boolean; + encryption_type?: string; + endpoint?: string; + kms_key?: string; + region?: string; + }; + }; + cluster_name?: string; + cluster_source?: string; + custom_tags?: { + [k: string]: string; + }; + data_security_mode?: string; + docker_image?: { + basic_auth?: { + password?: string; + username?: string; + }; + url?: string; + }; + driver_instance_pool_id?: string; + driver_node_type_id?: string; + enable_elastic_disk?: boolean; + enable_local_disk_encryption?: boolean; + gcp_attributes?: { + availability?: string; + boot_disk_size?: number; + google_service_account?: string; + local_ssd_count?: number; + }; + init_scripts?: { + dbfs?: { + destination?: string; + }; + s3?: { + canned_acl?: string; + destination?: string; + enable_encryption?: boolean; + encryption_type?: string; + endpoint?: string; + kms_key?: string; + region?: string; + }; + volumes?: { + destination?: string; + }; + workspace?: { + destination?: string; + }; + }[]; + instance_pool_id?: string; + node_type_id?: string; + num_workers?: number; + policy_id?: string; + runtime_engine?: string; + single_user_name?: string; + spark_conf?: { + [k: string]: string; + }; + spark_env_vars?: { + [k: string]: string; + }; + spark_version?: string; + ssh_public_keys?: string[]; + workload_type?: { + clients?: { + jobs?: boolean; + notebooks?: boolean; + }; + }; + }; + }[]; + max_concurrent_runs?: number; + name?: string; + notification_settings?: { + no_alert_for_canceled_runs?: boolean; + no_alert_for_skipped_runs?: boolean; + }; + parameters?: { + default: string; + name: string; + }[]; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + run_as?: { + service_principal_name?: string; + user_name?: string; + }; + schedule?: { + pause_status?: string; + quartz_cron_expression: string; + timezone_id: string; + }; + tags?: { + [k: string]: string; + }; + tasks?: { + compute_key?: string; + condition_task?: { + left?: string; + op?: string; + right?: string; + }; + dbt_task?: { + catalog?: string; + commands: string[]; + profiles_directory?: string; + project_directory?: string; + schema?: string; + warehouse_id?: string; + }; + depends_on?: { + outcome?: string; + task_key: string; + }[]; + description?: string; + email_notifications?: { + on_duration_warning_threshold_exceeded?: string[]; + on_failure?: string[]; + on_start?: string[]; + on_success?: string[]; + }; + existing_cluster_id?: string; + health?: { + rules?: { + metric?: string; + op?: string; + value?: number; + }[]; + }; + job_cluster_key?: string; + libraries?: { + cran?: { + package: string; + repo?: string; + }; + egg?: string; + jar?: string; + maven?: { + coordinates: string; + exclusions?: string[]; + repo?: string; + }; + pypi?: { + package: string; + repo?: string; + }; + whl?: string; + }[]; + max_retries?: number; + min_retry_interval_millis?: number; + new_cluster?: { + autoscale?: { + max_workers: number; + min_workers: number; + }; + autotermination_minutes?: number; + aws_attributes?: { + availability?: string; + ebs_volume_count?: number; + ebs_volume_iops?: number; + ebs_volume_size?: number; + ebs_volume_throughput?: number; + ebs_volume_type?: string; + first_on_demand?: number; + instance_profile_arn?: string; + spot_bid_price_percent?: number; + zone_id?: string; + }; + azure_attributes?: { + availability?: string; + first_on_demand?: number; + log_analytics_info?: { + log_analytics_primary_key?: string; + log_analytics_workspace_id?: string; + }; + spot_bid_max_price?: number; + }; + cluster_log_conf?: { + dbfs?: { + destination?: string; + }; + s3?: { + canned_acl?: string; + destination?: string; + enable_encryption?: boolean; + encryption_type?: string; + endpoint?: string; + kms_key?: string; + region?: string; + }; + }; + cluster_name?: string; + cluster_source?: string; + custom_tags?: { + [k: string]: string; + }; + data_security_mode?: string; + docker_image?: { + basic_auth?: { + password?: string; + username?: string; + }; + url?: string; + }; + driver_instance_pool_id?: string; + driver_node_type_id?: string; + enable_elastic_disk?: boolean; + enable_local_disk_encryption?: boolean; + gcp_attributes?: { + availability?: string; + boot_disk_size?: number; + google_service_account?: string; + local_ssd_count?: number; + }; + init_scripts?: { + dbfs?: { + destination?: string; + }; + s3?: { + canned_acl?: string; + destination?: string; + enable_encryption?: boolean; + encryption_type?: string; + endpoint?: string; + kms_key?: string; + region?: string; + }; + volumes?: { + destination?: string; + }; + workspace?: { + destination?: string; + }; + }[]; + instance_pool_id?: string; + node_type_id?: string; + num_workers?: number; + policy_id?: string; + runtime_engine?: string; + single_user_name?: string; + spark_conf?: { + [k: string]: string; + }; + spark_env_vars?: { + [k: string]: string; + }; + spark_version?: string; + ssh_public_keys?: string[]; + workload_type?: { + clients?: { + jobs?: boolean; + notebooks?: boolean; + }; + }; + }; + notebook_task?: { + base_parameters?: { + [k: string]: string; + }; + notebook_path: string; + source?: string; + }; + notification_settings?: { + alert_on_last_attempt?: boolean; + no_alert_for_canceled_runs?: boolean; + no_alert_for_skipped_runs?: boolean; + }; + pipeline_task?: { + full_refresh?: boolean; + pipeline_id?: string; + }; + python_wheel_task?: { + entry_point?: string; + named_parameters?: { + [k: string]: string; + }; + package_name?: string; + parameters?: string[]; + }; + retry_on_timeout?: boolean; + run_if?: string; + run_job_task?: { + job_id: number; + job_parameters?: unknown; + }; + spark_jar_task?: { + jar_uri?: string; + main_class_name?: string; + parameters?: string[]; + }; + spark_python_task?: { + parameters?: string[]; + python_file: string; + source?: string; + }; + spark_submit_task?: { + parameters?: string[]; + }; + sql_task?: { + alert?: { + alert_id: string; + pause_subscriptions?: boolean; + subscriptions?: { + destination_id?: string; + user_name?: string; + }[]; + }; + dashboard?: { + custom_subject?: string; + dashboard_id: string; + pause_subscriptions?: boolean; + subscriptions?: { + destination_id?: string; + user_name?: string; + }[]; + }; + file?: { + path: string; + }; + parameters?: { + [k: string]: string; + }; + query?: { + query_id: string; + }; + warehouse_id: string; + }; + task_key: string; + timeout_seconds?: number; + }[]; + timeout_seconds?: number; + trigger?: { + file_arrival?: { + min_time_between_triggers_seconds?: number; + url?: string; + wait_after_last_change_seconds?: number; + }; + pause_status?: string; + }; + webhook_notifications?: { + on_duration_warning_threshold_exceeded?: { + id?: string; + }[]; + on_failure?: { + id?: string; + }[]; + on_start?: { + id?: string; + }[]; + on_success?: { + id?: string; + }[]; + }; + }; + }; + model_serving_endpoints?: { + [k: string]: { + config: { + served_models: { + environment_vars?: { + [k: string]: string; + }; + instance_profile_arn?: string; + model_name: string; + model_version: string; + name?: string; + scale_to_zero_enabled: boolean; + workload_size: string; + }[]; + traffic_config?: { + routes?: { + served_model_name: string; + traffic_percentage: number; + }[]; + }; + }; + name: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + }; + }; + models?: { + [k: string]: { + creation_timestamp?: number; + description?: string; + last_updated_timestamp?: number; + latest_versions?: { + creation_timestamp?: number; + current_stage?: string; + description?: string; + last_updated_timestamp?: number; + name?: string; + run_id?: string; + run_link?: string; + source?: string; + status?: string; + status_message?: string; + tags?: { + key?: string; + value?: string; + }[]; + user_id?: string; + version?: string; + }[]; + name?: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + tags?: { + key?: string; + value?: string; + }[]; + user_id?: string; + }; + }; + pipelines?: { + [k: string]: { + catalog?: string; + channel?: string; + clusters?: { + apply_policy_default_values?: boolean; + autoscale?: { + max_workers: number; + min_workers: number; + }; + aws_attributes?: { + availability?: string; + ebs_volume_count?: number; + ebs_volume_iops?: number; + ebs_volume_size?: number; + ebs_volume_throughput?: number; + ebs_volume_type?: string; + first_on_demand?: number; + instance_profile_arn?: string; + spot_bid_price_percent?: number; + zone_id?: string; + }; + azure_attributes?: { + availability?: string; + first_on_demand?: number; + log_analytics_info?: { + log_analytics_primary_key?: string; + log_analytics_workspace_id?: string; + }; + spot_bid_max_price?: number; + }; + cluster_log_conf?: { + dbfs?: { + destination?: string; + }; + s3?: { + canned_acl?: string; + destination?: string; + enable_encryption?: boolean; + encryption_type?: string; + endpoint?: string; + kms_key?: string; + region?: string; + }; + }; + custom_tags?: { + [k: string]: string; + }; + driver_instance_pool_id?: string; + driver_node_type_id?: string; + gcp_attributes?: { + availability?: string; + boot_disk_size?: number; + google_service_account?: string; + local_ssd_count?: number; + }; + instance_pool_id?: string; + label?: string; + node_type_id?: string; + num_workers?: number; + policy_id?: string; + spark_conf?: { + [k: string]: string; + }; + spark_env_vars?: { + [k: string]: string; + }; + ssh_public_keys?: string[]; + }[]; + configuration?: { + [k: string]: string; + }; + continuous?: boolean; + development?: boolean; + edition?: string; + filters?: { + exclude?: string[]; + include?: string[]; + }; + id?: string; + libraries?: { + file?: { + path?: string; + }; + jar?: string; + maven?: { + coordinates: string; + exclusions?: string[]; + repo?: string; + }; + notebook?: { + path?: string; + }; + }[]; + name?: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + photon?: boolean; + serverless?: boolean; + storage?: string; + target?: string; + trigger?: { + cron?: { + quartz_cron_schedule?: string; + timezone_id?: string; + }; + manual?: unknown; + }; + }; + }; + }; + run_as?: { + service_principal_name?: string; + user_name?: string; + }; + variables?: { + [k: string]: string; + }; + workspace?: { + artifact_path?: string; + auth_type?: string; + azure_client_id?: string; + azure_environment?: string; + azure_login_app_id?: string; + azure_tenant_id?: string; + azure_use_msi?: boolean; + azure_workspace_resource_id?: string; + client_id?: string; + file_path?: string; + google_service_account?: string; + host?: string; + profile?: string; + root_path?: string; + state_path?: string; + }; + }; + }; + experimental?: { + python_wheel_wrapper?: boolean; + scripts?: { + [k: string]: string; + }; + }; + /** + * A list of glob patterns of files to load and merge into the this configuration. Defaults to no files being included. + */ + include?: string[]; + /** + * Collection of Databricks resources to deploy. + */ + resources?: { + /** + * List of MLflow experiments + */ + experiments?: { + [k: string]: { + /** + * Location where artifacts for the experiment are stored. + */ + artifact_location?: string; + /** + * Creation time + */ + creation_time?: number; + /** + * Unique identifier for the experiment. + */ + experiment_id?: string; + /** + * Last update time + */ + last_update_time?: number; + /** + * Current life cycle stage of the experiment: "active" or "deleted". + * Deleted experiments are not returned by APIs. + */ + lifecycle_stage?: string; + /** + * Human readable name that identifies the experiment. + */ + name?: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + /** + * Tags: Additional metadata key-value pairs. + */ + tags?: { + /** + * The tag key. + */ + key?: string; + /** + * The tag value. + */ + value?: string; + }[]; + }; + }; + /** + * List of Databricks jobs + */ + jobs?: { + [k: string]: { + /** + * A list of compute requirements that can be referenced by tasks of this job. + */ + compute?: { + /** + * A unique name for the compute requirement. This field is required and must be unique within the job. + * `JobTaskSettings` may refer to this field to determine the compute requirements for the task execution. + */ + compute_key: string; + spec: { + /** + * The kind of compute described by this compute specification. + */ + kind?: string; + }; + }[]; + /** + * An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. + */ + continuous?: { + /** + * Whether this trigger is paused or not. + */ + pause_status?: string; + }; + /** + * An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. The default behavior is to not send any emails. + */ + email_notifications?: { + /** + * If true, do not send email to recipients specified in `on_failure` if the run is skipped. + */ + no_alert_for_skipped_runs?: boolean; + on_duration_warning_threshold_exceeded?: string[]; + /** + * A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. + */ + on_failure?: string[]; + /** + * A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + */ + on_start?: string[]; + /** + * A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + */ + on_success?: string[]; + }; + /** + * Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `"MULTI_TASK"`. + */ + format?: string; + /** + * An optional specification for a remote repository containing the notebooks used by this job's notebook tasks. + */ + git_source?: { + /** + * Name of the branch to be checked out and used by this job. + * This field cannot be specified in conjunction with git_tag or git_commit. + * + * The maximum length is 255 characters. + * + */ + git_branch?: string; + /** + * Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. + * The maximum length is 64 characters. + */ + git_commit?: string; + /** + * Unique identifier of the service used to host the Git repository. The value is case insensitive. + */ + git_provider: string; + git_snapshot?: { + /** + * Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to. + */ + used_commit?: string; + }; + /** + * Name of the tag to be checked out and used by this job. + * This field cannot be specified in conjunction with git_branch or git_commit. + * + * The maximum length is 255 characters. + * + */ + git_tag?: string; + /** + * URL of the repository to be cloned by this job. + * The maximum length is 300 characters. + */ + git_url: string; + job_source?: { + dirty_state?: string; + import_from_git_branch: string; + job_config_path: string; + }; + }; + health?: { + rules?: { + metric?: string; + op?: string; + value?: number; + }[]; + }; + /** + * A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. + */ + job_clusters?: { + /** + * A unique name for the job cluster. This field is required and must be unique within the job. + * `JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution. + */ + job_cluster_key: string; + /** + * If new_cluster, a description of a cluster that is created for only for this task. + */ + new_cluster?: { + /** + * Parameters needed in order to automatically scale clusters up and down based on load. + * Note: autoscaling works best with DB runtime versions 3.0 or later. + */ + autoscale?: { + /** + * The maximum number of workers to which the cluster can scale up when overloaded. + * Note that `max_workers` must be strictly greater than `min_workers`. + */ + max_workers: number; + /** + * The minimum number of workers to which the cluster can scale down when underutilized. + * It is also the initial number of workers the cluster will have after creation. + */ + min_workers: number; + }; + /** + * Automatically terminates the cluster after it is inactive for this time in minutes. If not set, + * this cluster will not be automatically terminated. If specified, the threshold must be between + * 10 and 10000 minutes. + * Users can also set this value to 0 to explicitly disable automatic termination. + */ + autotermination_minutes?: number; + /** + * Attributes related to clusters running on Amazon Web Services. + * If not specified at cluster creation, a set of default values will be used. + */ + aws_attributes?: { + availability?: string; + /** + * The number of volumes launched for each instance. Users can choose up to 10 volumes. + * This feature is only enabled for supported node types. Legacy node types cannot specify + * custom EBS volumes. + * For node types with no instance store, at least one EBS volume needs to be specified; + * otherwise, cluster creation will fail. + * + * These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. + * Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. + * + * If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for + * scratch storage because heterogenously sized scratch devices can lead to inefficient disk + * utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance + * store volumes. + * + * Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` + * will be overridden. + */ + ebs_volume_count?: number; + /** + * + */ + ebs_volume_iops?: number; + /** + * The size of each EBS volume (in GiB) launched for each instance. For general purpose + * SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, + * this value must be within the range 500 - 4096. + */ + ebs_volume_size?: number; + /** + * + */ + ebs_volume_throughput?: number; + ebs_volume_type?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * If this value is greater than 0, the cluster driver node in particular will be placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Nodes for this cluster will only be placed on AWS instances with this instance profile. If + * ommitted, nodes will be placed on instances without an IAM instance profile. The instance + * profile must have previously been added to the Databricks environment by an account + * administrator. + * + * This feature may only be available to certain customer plans. + * + * If this field is ommitted, we will pull in the default from the conf if it exists. + */ + instance_profile_arn?: string; + /** + * The bid price for AWS spot instances, as a percentage of the corresponding instance type's + * on-demand price. + * For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot + * instance, then the bid price is half of the price of + * on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice + * the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. + * When spot instances are requested for this cluster, only spot instances whose bid price + * percentage matches this field will be considered. + * Note that, for safety, we enforce this field to be no more than 10000. + * + * The default value and documentation here should be kept consistent with + * CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + */ + spot_bid_price_percent?: number; + /** + * Identifier for the availability zone/datacenter in which the cluster resides. + * This string will be of a form like "us-west-2a". The provided availability + * zone must be in the same region as the Databricks deployment. For example, "us-west-2a" + * is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. + * This is an optional field at cluster creation, and if not specified, a default zone will be used. + * If the zone specified is "auto", will try to place cluster in a zone with high availability, + * and will retry placement in a different AZ if there is not enough capacity. + * See [[AutoAZHelper.scala]] for more details. + * The list of available zones as well as the default value can be found by using the + * `List Zones`_ method. + */ + zone_id?: string; + }; + /** + * Attributes related to clusters running on Microsoft Azure. + * If not specified at cluster creation, a set of default values will be used. + */ + azure_attributes?: { + availability?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * This value should be greater than 0, to make sure the cluster driver node is placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Defines values necessary to configure and run Azure Log Analytics agent + */ + log_analytics_info?: { + /** + * + */ + log_analytics_primary_key?: string; + /** + * + */ + log_analytics_workspace_id?: string; + }; + /** + * The max bid price to be used for Azure spot instances. + * The Max price for the bid cannot be higher than the on-demand price of the instance. + * If not specified, the default value is -1, which specifies that the instance cannot be evicted + * on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + */ + spot_bid_max_price?: number; + }; + /** + * The configuration for delivering spark logs to a long-term storage destination. + * Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified + * for one cluster. If the conf is given, the logs will be delivered to the destination every + * `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while + * the destination of executor logs is `$destination/$clusterId/executor`. + */ + cluster_log_conf?: { + /** + * destination needs to be provided. e.g. + * `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + */ + dbfs?: { + /** + * dbfs destination, e.g. `dbfs:/my/path` + */ + destination?: string; + }; + /** + * destination and either the region or endpoint need to be provided. e.g. + * `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + * Cluster iam role is used to access s3, please make sure the cluster iam role in + * `instance_profile_arn` has permission to write data to the s3 destination. + */ + s3?: { + /** + * (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. + * If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on + * the destination bucket and prefix. The full list of possible canned acl can be found at + * http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. + * Please also note that by default only the object owner gets full controls. If you are using cross account + * role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to + * read the logs. + */ + canned_acl?: string; + /** + * S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using + * cluster iam role, please make sure you set cluster iam role and the role has write access to the + * destination. Please also note that you cannot use AWS keys to deliver logs. + */ + destination?: string; + /** + * (Optional) Flag to enable server side encryption, `false` by default. + */ + enable_encryption?: boolean; + /** + * (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when + * encryption is enabled and the default type is `sse-s3`. + */ + encryption_type?: string; + /** + * S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. + * If both are set, endpoint will be used. + */ + endpoint?: string; + /** + * (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + */ + kms_key?: string; + /** + * S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, + * endpoint will be used. + */ + region?: string; + }; + }; + /** + * Cluster name requested by the user. This doesn't have to be unique. + * If not specified at creation, the cluster name will be an empty string. + * + */ + cluster_name?: string; + cluster_source?: string; + /** + * Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS + * instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + * + * - Currently, Databricks allows at most 45 custom tags + * + * - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + */ + custom_tags?: { + [k: string]: string; + }; + data_security_mode?: string; + docker_image?: { + basic_auth?: { + /** + * Password of the user + */ + password?: string; + /** + * Name of the user + */ + username?: string; + }; + /** + * URL of the docker image. + */ + url?: string; + }; + /** + * The optional ID of the instance pool for the driver of the cluster belongs. + * The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not + * assigned. + */ + driver_instance_pool_id?: string; + /** + * The node type of the Spark driver. Note that this field is optional; + * if unset, the driver node type will be set as the same value + * as `node_type_id` defined above. + * + */ + driver_node_type_id?: string; + /** + * Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk + * space when its Spark workers are running low on disk space. This feature requires specific AWS + * permissions to function correctly - refer to the User Guide for more details. + */ + enable_elastic_disk?: boolean; + /** + * Whether to enable LUKS on cluster VMs' local disks + */ + enable_local_disk_encryption?: boolean; + /** + * Attributes related to clusters running on Google Cloud Platform. + * If not specified at cluster creation, a set of default values will be used. + */ + gcp_attributes?: { + availability?: string; + /** + * boot disk size in GB + */ + boot_disk_size?: number; + /** + * If provided, the cluster will impersonate the google service account when accessing + * gcloud services (like GCS). The google service account + * must have previously been added to the Databricks environment by an account + * administrator. + */ + google_service_account?: string; + /** + * If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + */ + local_ssd_count?: number; + }; + /** + * The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + */ + init_scripts?: { + /** + * destination needs to be provided. e.g. + * `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + */ + dbfs?: { + /** + * dbfs destination, e.g. `dbfs:/my/path` + */ + destination?: string; + }; + /** + * destination and either the region or endpoint need to be provided. e.g. + * `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + * Cluster iam role is used to access s3, please make sure the cluster iam role in + * `instance_profile_arn` has permission to write data to the s3 destination. + */ + s3?: { + /** + * (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. + * If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on + * the destination bucket and prefix. The full list of possible canned acl can be found at + * http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. + * Please also note that by default only the object owner gets full controls. If you are using cross account + * role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to + * read the logs. + */ + canned_acl?: string; + /** + * S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using + * cluster iam role, please make sure you set cluster iam role and the role has write access to the + * destination. Please also note that you cannot use AWS keys to deliver logs. + */ + destination?: string; + /** + * (Optional) Flag to enable server side encryption, `false` by default. + */ + enable_encryption?: boolean; + /** + * (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when + * encryption is enabled and the default type is `sse-s3`. + */ + encryption_type?: string; + /** + * S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. + * If both are set, endpoint will be used. + */ + endpoint?: string; + /** + * (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + */ + kms_key?: string; + /** + * S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, + * endpoint will be used. + */ + region?: string; + }; + volumes?: { + destination?: string; + }; + /** + * destination needs to be provided. e.g. + * `{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }` + */ + workspace?: { + /** + * workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` + */ + destination?: string; + }; + }[]; + /** + * The optional ID of the instance pool to which the cluster belongs. + */ + instance_pool_id?: string; + /** + * This field encodes, through a single value, the resources available to each of + * the Spark nodes in this cluster. For example, the Spark nodes can be provisioned + * and optimized for memory or compute intensive workloads. A list of available node + * types can be retrieved by using the :method:clusters/listNodeTypes API call. + * + */ + node_type_id?: string; + /** + * Number of worker nodes that this cluster should have. A cluster has one Spark Driver + * and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + * + * Note: When reading the properties of a cluster, this field reflects the desired number + * of workers rather than the actual current number of workers. For instance, if a cluster + * is resized from 5 to 10 workers, this field will immediately be updated to reflect + * the target size of 10 workers, whereas the workers listed in `spark_info` will gradually + * increase from 5 to 10 as the new nodes are provisioned. + */ + num_workers?: number; + /** + * The ID of the cluster policy used to create the cluster if applicable. + */ + policy_id?: string; + runtime_engine?: string; + /** + * Single user name if data_security_mode is `SINGLE_USER` + */ + single_user_name?: string; + /** + * An object containing a set of optional, user-specified Spark configuration key-value pairs. + * Users can also pass in a string of extra JVM options to the driver and the executors via + * `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + * + */ + spark_conf?: { + [k: string]: string; + }; + /** + * An object containing a set of optional, user-specified environment variable key-value pairs. + * Please note that key-value pair of the form (X,Y) will be exported as is (i.e., + * `export X='Y'`) while launching the driver and workers. + * + * In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending + * them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all + * default databricks managed environmental variables are included as well. + * + * Example Spark environment variables: + * `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or + * `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + */ + spark_env_vars?: { + [k: string]: string; + }; + /** + * The Spark version of the cluster, e.g. `3.3.x-scala2.11`. + * A list of available Spark versions can be retrieved by using + * the :method:clusters/sparkVersions API call. + * + */ + spark_version?: string; + /** + * SSH public key contents that will be added to each Spark node in this cluster. The + * corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. + * Up to 10 keys can be specified. + */ + ssh_public_keys?: string[]; + workload_type?: { + /** + * defined what type of clients can use the cluster. E.g. Notebooks, Jobs + */ + clients?: { + /** + * With jobs set, the cluster can be used for jobs + */ + jobs?: boolean; + /** + * With notebooks set, this cluster can be used for notebooks + */ + notebooks?: boolean; + }; + }; + }; + }[]; + /** + * An optional maximum allowed number of concurrent runs of the job. + * + * Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. + * + * This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. + * + * This value cannot exceed 1000\. Setting this value to 0 causes all new runs to be skipped. The default behavior is to allow only 1 concurrent run. + */ + max_concurrent_runs?: number; + /** + * An optional name for the job. + */ + name?: string; + /** + * Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. + */ + notification_settings?: { + /** + * If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. + */ + no_alert_for_canceled_runs?: boolean; + /** + * If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. + */ + no_alert_for_skipped_runs?: boolean; + }; + /** + * Job-level parameter definitions + */ + parameters?: { + /** + * Default value of the parameter. + */ + default: string; + /** + * The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.` + */ + name: string; + }[]; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + run_as?: { + /** + * Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + */ + service_principal_name?: string; + /** + * The email of an active workspace user. Non-admin users can only set this field to their own email. + */ + user_name?: string; + }; + /** + * An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + */ + schedule?: { + /** + * Whether this trigger is paused or not. + */ + pause_status?: string; + /** + * A Cron expression using Quartz syntax that describes the schedule for a job. + * See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) + * for details. This field is required." + * + */ + quartz_cron_expression: string; + /** + * A Java timezone ID. The schedule for a job is resolved with respect to this timezone. + * See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. + * This field is required. + * + */ + timezone_id: string; + }; + /** + * A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. + */ + tags?: { + [k: string]: string; + }; + /** + * A list of task specifications to be executed by this job. + */ + tasks?: { + /** + * The key of the compute requirement, specified in `job.settings.compute`, to use for execution of this task. + */ + compute_key?: string; + /** + * If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications. + */ + condition_task?: { + /** + * The left operand of the condition task. Can be either a string value or a job state or parameter reference. + */ + left?: string; + /** + * * `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`. + * * `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`. + * + * The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison. + * + */ + op?: string; + /** + * The right operand of the condition task. Can be either a string value or a job state or parameter reference. + */ + right?: string; + }; + /** + * If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. + */ + dbt_task?: { + /** + * Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks >= 1.1.1. + */ + catalog?: string; + /** + * A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided. + */ + commands: string[]; + /** + * Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used. + */ + profiles_directory?: string; + /** + * Optional (relative) path to the project directory, if no value is provided, the root of the git repository is used. + */ + project_directory?: string; + /** + * Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used. + */ + schema?: string; + /** + * ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument. + */ + warehouse_id?: string; + }; + /** + * An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete successfully before executing this task. + * The key is `task_key`, and the value is the name assigned to the dependent task. + * + */ + depends_on?: { + /** + * Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run. + */ + outcome?: string; + /** + * The name of the task this task depends on. + */ + task_key: string; + }[]; + /** + * An optional description for this task. + * The maximum length is 4096 bytes. + */ + description?: string; + /** + * An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. + */ + email_notifications?: { + on_duration_warning_threshold_exceeded?: string[]; + /** + * A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. + */ + on_failure?: string[]; + /** + * A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + */ + on_start?: string[]; + /** + * A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + */ + on_success?: string[]; + }; + /** + * If existing_cluster_id, the ID of an existing cluster that is used for all runs of this task. When running tasks on an existing cluster, you may need to manually restart the cluster if it stops responding. We suggest running jobs on new clusters for greater reliability. + */ + existing_cluster_id?: string; + health?: { + rules?: { + metric?: string; + op?: string; + value?: number; + }[]; + }; + /** + * If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`. + */ + job_cluster_key?: string; + /** + * An optional list of libraries to be installed on the cluster that executes the task. The default value is an empty list. + */ + libraries?: { + /** + * Specification of a CRAN library to be installed as part of the library + */ + cran?: { + /** + * The name of the CRAN package to install. + */ + package: string; + /** + * The repository where the package can be found. If not specified, the default CRAN repo is used. + */ + repo?: string; + }; + /** + * URI of the egg to be installed. Currently only DBFS and S3 URIs are supported. + * For example: `{ "egg": "dbfs:/my/egg" }` or + * `{ "egg": "s3://my-bucket/egg" }`. + * If S3 is used, please make sure the cluster has read access on the library. You may need to + * launch the cluster with an IAM role to access the S3 URI. + */ + egg?: string; + /** + * URI of the jar to be installed. Currently only DBFS and S3 URIs are supported. + * For example: `{ "jar": "dbfs:/mnt/databricks/library.jar" }` or + * `{ "jar": "s3://my-bucket/library.jar" }`. + * If S3 is used, please make sure the cluster has read access on the library. You may need to + * launch the cluster with an IAM role to access the S3 URI. + */ + jar?: string; + /** + * Specification of a maven library to be installed. For example: + * `{ "coordinates": "org.jsoup:jsoup:1.7.2" }` + */ + maven?: { + /** + * Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2". + */ + coordinates: string; + /** + * List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`. + * + * Maven dependency exclusions: + * https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. + */ + exclusions?: string[]; + /** + * Maven repo to install the Maven package from. If omitted, both Maven Central Repository + * and Spark Packages are searched. + */ + repo?: string; + }; + /** + * Specification of a PyPi library to be installed. For example: + * `{ "package": "simplejson" }` + */ + pypi?: { + /** + * The name of the pypi package to install. An optional exact version specification is also + * supported. Examples: "simplejson" and "simplejson==3.8.0". + */ + package: string; + /** + * The repository where the package can be found. If not specified, the default pip index is + * used. + */ + repo?: string; + }; + /** + * URI of the wheel to be installed. + * For example: `{ "whl": "dbfs:/my/whl" }` or `{ "whl": "s3://my-bucket/whl" }`. + * If S3 is used, please make sure the cluster has read access on the library. You may need to + * launch the cluster with an IAM role to access the S3 URI. + */ + whl?: string; + }[]; + /** + * An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value -1 means to retry indefinitely and the value 0 means to never retry. The default behavior is to never retry. + */ + max_retries?: number; + /** + * An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried. + */ + min_retry_interval_millis?: number; + /** + * If new_cluster, a description of a cluster that is created for only for this task. + */ + new_cluster?: { + /** + * Parameters needed in order to automatically scale clusters up and down based on load. + * Note: autoscaling works best with DB runtime versions 3.0 or later. + */ + autoscale?: { + /** + * The maximum number of workers to which the cluster can scale up when overloaded. + * Note that `max_workers` must be strictly greater than `min_workers`. + */ + max_workers: number; + /** + * The minimum number of workers to which the cluster can scale down when underutilized. + * It is also the initial number of workers the cluster will have after creation. + */ + min_workers: number; + }; + /** + * Automatically terminates the cluster after it is inactive for this time in minutes. If not set, + * this cluster will not be automatically terminated. If specified, the threshold must be between + * 10 and 10000 minutes. + * Users can also set this value to 0 to explicitly disable automatic termination. + */ + autotermination_minutes?: number; + /** + * Attributes related to clusters running on Amazon Web Services. + * If not specified at cluster creation, a set of default values will be used. + */ + aws_attributes?: { + availability?: string; + /** + * The number of volumes launched for each instance. Users can choose up to 10 volumes. + * This feature is only enabled for supported node types. Legacy node types cannot specify + * custom EBS volumes. + * For node types with no instance store, at least one EBS volume needs to be specified; + * otherwise, cluster creation will fail. + * + * These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. + * Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. + * + * If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for + * scratch storage because heterogenously sized scratch devices can lead to inefficient disk + * utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance + * store volumes. + * + * Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` + * will be overridden. + */ + ebs_volume_count?: number; + /** + * + */ + ebs_volume_iops?: number; + /** + * The size of each EBS volume (in GiB) launched for each instance. For general purpose + * SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, + * this value must be within the range 500 - 4096. + */ + ebs_volume_size?: number; + /** + * + */ + ebs_volume_throughput?: number; + ebs_volume_type?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * If this value is greater than 0, the cluster driver node in particular will be placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Nodes for this cluster will only be placed on AWS instances with this instance profile. If + * ommitted, nodes will be placed on instances without an IAM instance profile. The instance + * profile must have previously been added to the Databricks environment by an account + * administrator. + * + * This feature may only be available to certain customer plans. + * + * If this field is ommitted, we will pull in the default from the conf if it exists. + */ + instance_profile_arn?: string; + /** + * The bid price for AWS spot instances, as a percentage of the corresponding instance type's + * on-demand price. + * For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot + * instance, then the bid price is half of the price of + * on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice + * the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. + * When spot instances are requested for this cluster, only spot instances whose bid price + * percentage matches this field will be considered. + * Note that, for safety, we enforce this field to be no more than 10000. + * + * The default value and documentation here should be kept consistent with + * CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + */ + spot_bid_price_percent?: number; + /** + * Identifier for the availability zone/datacenter in which the cluster resides. + * This string will be of a form like "us-west-2a". The provided availability + * zone must be in the same region as the Databricks deployment. For example, "us-west-2a" + * is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. + * This is an optional field at cluster creation, and if not specified, a default zone will be used. + * If the zone specified is "auto", will try to place cluster in a zone with high availability, + * and will retry placement in a different AZ if there is not enough capacity. + * See [[AutoAZHelper.scala]] for more details. + * The list of available zones as well as the default value can be found by using the + * `List Zones`_ method. + */ + zone_id?: string; + }; + /** + * Attributes related to clusters running on Microsoft Azure. + * If not specified at cluster creation, a set of default values will be used. + */ + azure_attributes?: { + availability?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * This value should be greater than 0, to make sure the cluster driver node is placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Defines values necessary to configure and run Azure Log Analytics agent + */ + log_analytics_info?: { + /** + * + */ + log_analytics_primary_key?: string; + /** + * + */ + log_analytics_workspace_id?: string; + }; + /** + * The max bid price to be used for Azure spot instances. + * The Max price for the bid cannot be higher than the on-demand price of the instance. + * If not specified, the default value is -1, which specifies that the instance cannot be evicted + * on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + */ + spot_bid_max_price?: number; + }; + /** + * The configuration for delivering spark logs to a long-term storage destination. + * Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified + * for one cluster. If the conf is given, the logs will be delivered to the destination every + * `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while + * the destination of executor logs is `$destination/$clusterId/executor`. + */ + cluster_log_conf?: { + /** + * destination needs to be provided. e.g. + * `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + */ + dbfs?: { + /** + * dbfs destination, e.g. `dbfs:/my/path` + */ + destination?: string; + }; + /** + * destination and either the region or endpoint need to be provided. e.g. + * `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + * Cluster iam role is used to access s3, please make sure the cluster iam role in + * `instance_profile_arn` has permission to write data to the s3 destination. + */ + s3?: { + /** + * (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. + * If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on + * the destination bucket and prefix. The full list of possible canned acl can be found at + * http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. + * Please also note that by default only the object owner gets full controls. If you are using cross account + * role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to + * read the logs. + */ + canned_acl?: string; + /** + * S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using + * cluster iam role, please make sure you set cluster iam role and the role has write access to the + * destination. Please also note that you cannot use AWS keys to deliver logs. + */ + destination?: string; + /** + * (Optional) Flag to enable server side encryption, `false` by default. + */ + enable_encryption?: boolean; + /** + * (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when + * encryption is enabled and the default type is `sse-s3`. + */ + encryption_type?: string; + /** + * S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. + * If both are set, endpoint will be used. + */ + endpoint?: string; + /** + * (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + */ + kms_key?: string; + /** + * S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, + * endpoint will be used. + */ + region?: string; + }; + }; + /** + * Cluster name requested by the user. This doesn't have to be unique. + * If not specified at creation, the cluster name will be an empty string. + * + */ + cluster_name?: string; + cluster_source?: string; + /** + * Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS + * instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + * + * - Currently, Databricks allows at most 45 custom tags + * + * - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + */ + custom_tags?: { + [k: string]: string; + }; + data_security_mode?: string; + docker_image?: { + basic_auth?: { + /** + * Password of the user + */ + password?: string; + /** + * Name of the user + */ + username?: string; + }; + /** + * URL of the docker image. + */ + url?: string; + }; + /** + * The optional ID of the instance pool for the driver of the cluster belongs. + * The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not + * assigned. + */ + driver_instance_pool_id?: string; + /** + * The node type of the Spark driver. Note that this field is optional; + * if unset, the driver node type will be set as the same value + * as `node_type_id` defined above. + * + */ + driver_node_type_id?: string; + /** + * Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk + * space when its Spark workers are running low on disk space. This feature requires specific AWS + * permissions to function correctly - refer to the User Guide for more details. + */ + enable_elastic_disk?: boolean; + /** + * Whether to enable LUKS on cluster VMs' local disks + */ + enable_local_disk_encryption?: boolean; + /** + * Attributes related to clusters running on Google Cloud Platform. + * If not specified at cluster creation, a set of default values will be used. + */ + gcp_attributes?: { + availability?: string; + /** + * boot disk size in GB + */ + boot_disk_size?: number; + /** + * If provided, the cluster will impersonate the google service account when accessing + * gcloud services (like GCS). The google service account + * must have previously been added to the Databricks environment by an account + * administrator. + */ + google_service_account?: string; + /** + * If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + */ + local_ssd_count?: number; + }; + /** + * The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + */ + init_scripts?: { + /** + * destination needs to be provided. e.g. + * `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + */ + dbfs?: { + /** + * dbfs destination, e.g. `dbfs:/my/path` + */ + destination?: string; + }; + /** + * destination and either the region or endpoint need to be provided. e.g. + * `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + * Cluster iam role is used to access s3, please make sure the cluster iam role in + * `instance_profile_arn` has permission to write data to the s3 destination. + */ + s3?: { + /** + * (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. + * If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on + * the destination bucket and prefix. The full list of possible canned acl can be found at + * http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. + * Please also note that by default only the object owner gets full controls. If you are using cross account + * role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to + * read the logs. + */ + canned_acl?: string; + /** + * S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using + * cluster iam role, please make sure you set cluster iam role and the role has write access to the + * destination. Please also note that you cannot use AWS keys to deliver logs. + */ + destination?: string; + /** + * (Optional) Flag to enable server side encryption, `false` by default. + */ + enable_encryption?: boolean; + /** + * (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when + * encryption is enabled and the default type is `sse-s3`. + */ + encryption_type?: string; + /** + * S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. + * If both are set, endpoint will be used. + */ + endpoint?: string; + /** + * (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + */ + kms_key?: string; + /** + * S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, + * endpoint will be used. + */ + region?: string; + }; + volumes?: { + destination?: string; + }; + /** + * destination needs to be provided. e.g. + * `{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }` + */ + workspace?: { + /** + * workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` + */ + destination?: string; + }; + }[]; + /** + * The optional ID of the instance pool to which the cluster belongs. + */ + instance_pool_id?: string; + /** + * This field encodes, through a single value, the resources available to each of + * the Spark nodes in this cluster. For example, the Spark nodes can be provisioned + * and optimized for memory or compute intensive workloads. A list of available node + * types can be retrieved by using the :method:clusters/listNodeTypes API call. + * + */ + node_type_id?: string; + /** + * Number of worker nodes that this cluster should have. A cluster has one Spark Driver + * and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + * + * Note: When reading the properties of a cluster, this field reflects the desired number + * of workers rather than the actual current number of workers. For instance, if a cluster + * is resized from 5 to 10 workers, this field will immediately be updated to reflect + * the target size of 10 workers, whereas the workers listed in `spark_info` will gradually + * increase from 5 to 10 as the new nodes are provisioned. + */ + num_workers?: number; + /** + * The ID of the cluster policy used to create the cluster if applicable. + */ + policy_id?: string; + runtime_engine?: string; + /** + * Single user name if data_security_mode is `SINGLE_USER` + */ + single_user_name?: string; + /** + * An object containing a set of optional, user-specified Spark configuration key-value pairs. + * Users can also pass in a string of extra JVM options to the driver and the executors via + * `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + * + */ + spark_conf?: { + [k: string]: string; + }; + /** + * An object containing a set of optional, user-specified environment variable key-value pairs. + * Please note that key-value pair of the form (X,Y) will be exported as is (i.e., + * `export X='Y'`) while launching the driver and workers. + * + * In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending + * them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all + * default databricks managed environmental variables are included as well. + * + * Example Spark environment variables: + * `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or + * `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + */ + spark_env_vars?: { + [k: string]: string; + }; + /** + * The Spark version of the cluster, e.g. `3.3.x-scala2.11`. + * A list of available Spark versions can be retrieved by using + * the :method:clusters/sparkVersions API call. + * + */ + spark_version?: string; + /** + * SSH public key contents that will be added to each Spark node in this cluster. The + * corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. + * Up to 10 keys can be specified. + */ + ssh_public_keys?: string[]; + workload_type?: { + /** + * defined what type of clients can use the cluster. E.g. Notebooks, Jobs + */ + clients?: { + /** + * With jobs set, the cluster can be used for jobs + */ + jobs?: boolean; + /** + * With notebooks set, this cluster can be used for notebooks + */ + notebooks?: boolean; + }; + }; + }; + /** + * If notebook_task, indicates that this task must run a notebook. This field may not be specified in conjunction with spark_jar_task. + */ + notebook_task?: { + /** + * Base parameters to be used for each run of this job. If the run is initiated by a call to + * :method:jobs/runNow with parameters specified, the two parameters maps are merged. If the same key is specified in + * `base_parameters` and in `run-now`, the value from `run-now` is used. + * + * Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + * + * If the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters, + * the default value from the notebook is used. + * + * Retrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets). + * + */ + base_parameters?: { + [k: string]: string; + }; + /** + * The path of the notebook to be run in the Databricks workspace or remote repository. + * For notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash. + * For notebooks stored in a remote repository, the path must be relative. This field is required. + * + */ + notebook_path: string; + /** + * Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved + * from the local workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`, + * the Python file will be retrieved from a Git repository defined in `git_source`. + * + * * `WORKSPACE`: The Python file is located in a workspace or at a cloud filesystem URI. + * * `GIT`: The Python file is located in a remote Git repository. + * + */ + source?: string; + }; + /** + * Optional notification settings that are used when sending notifications to each of the `email_notifications` for this task. + */ + notification_settings?: { + /** + * If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run. + */ + alert_on_last_attempt?: boolean; + /** + * If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. + */ + no_alert_for_canceled_runs?: boolean; + /** + * If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. + */ + no_alert_for_skipped_runs?: boolean; + }; + /** + * If pipeline_task, indicates that this task must execute a Pipeline. + */ + pipeline_task?: { + /** + * If true, a full refresh will be triggered on the delta live table. + */ + full_refresh?: boolean; + /** + * The full name of the pipeline task to execute. + */ + pipeline_id?: string; + }; + /** + * If python_wheel_task, indicates that this job must execute a PythonWheel. + */ + python_wheel_task?: { + /** + * Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()` + */ + entry_point?: string; + /** + * Command-line parameters passed to Python wheel task in the form of `["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty if `parameters` is not null. + */ + named_parameters?: { + [k: string]: string; + }; + /** + * Name of the package to execute + */ + package_name?: string; + /** + * Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null. + */ + parameters?: string[]; + }; + /** + * An optional policy to specify whether to retry a task when it times out. The default behavior is to not retry on timeout. + */ + retry_on_timeout?: boolean; + /** + * An optional value specifying the condition determining whether the task is run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`. + * + * * `ALL_SUCCESS`: All dependencies have executed and succeeded + * * `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded + * * `NONE_FAILED`: None of the dependencies have failed and at least one was executed + * * `ALL_DONE`: All dependencies completed and at least one was executed + * * `AT_LEAST_ONE_FAILED`: At least one dependency failed + * * `ALL_FAILED`: ALl dependencies have failed + * + */ + run_if?: string; + run_job_task?: { + job_id: number; + job_parameters?: unknown; + }; + /** + * If spark_jar_task, indicates that this task must run a JAR. + */ + spark_jar_task?: { + /** + * Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create. + * + */ + jar_uri?: string; + /** + * The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library. + * + * The code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail. + */ + main_class_name?: string; + /** + * Parameters passed to the main method. + * + * Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + * + */ + parameters?: string[]; + }; + /** + * If spark_python_task, indicates that this task must run a Python file. + */ + spark_python_task?: { + /** + * Command line parameters passed to the Python file. + * + * Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + * + */ + parameters?: string[]; + /** + * The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required. + */ + python_file: string; + /** + * Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved + * from the local workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`, + * the Python file will be retrieved from a Git repository defined in `git_source`. + * + * * `WORKSPACE`: The Python file is located in a workspace or at a cloud filesystem URI. + * * `GIT`: The Python file is located in a remote Git repository. + * + */ + source?: string; + }; + /** + * If spark_submit_task, indicates that this task must be launched by the spark submit script. This task can run only on new clusters. + */ + spark_submit_task?: { + /** + * Command-line parameters passed to spark submit. + * + * Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + * + */ + parameters?: string[]; + }; + /** + * If sql_task, indicates that this job must execute a SQL task. + */ + sql_task?: { + /** + * If alert, indicates that this job must refresh a SQL alert. + */ + alert?: { + /** + * The canonical identifier of the SQL alert. + */ + alert_id: string; + /** + * If true, the alert notifications are not sent to subscribers. + */ + pause_subscriptions?: boolean; + /** + * If specified, alert notifications are sent to subscribers. + */ + subscriptions?: { + /** + * The canonical identifier of the destination to receive email notification. + */ + destination_id?: string; + /** + * The user name to receive the subscription email. + */ + user_name?: string; + }[]; + }; + /** + * If dashboard, indicates that this job must refresh a SQL dashboard. + */ + dashboard?: { + /** + * Subject of the email sent to subscribers of this task. + */ + custom_subject?: string; + /** + * The canonical identifier of the SQL dashboard. + */ + dashboard_id: string; + /** + * If true, the dashboard snapshot is not taken, and emails are not sent to subscribers. + */ + pause_subscriptions?: boolean; + /** + * If specified, dashboard snapshots are sent to subscriptions. + */ + subscriptions?: { + /** + * The canonical identifier of the destination to receive email notification. + */ + destination_id?: string; + /** + * The user name to receive the subscription email. + */ + user_name?: string; + }[]; + }; + /** + * If file, indicates that this job runs a SQL file in a remote Git repository. Only one SQL statement is supported in a file. Multiple SQL statements separated by semicolons (;) are not permitted. + */ + file?: { + /** + * Relative path of the SQL file in the remote Git repository. + */ + path: string; + }; + /** + * Parameters to be used for each run of this job. The SQL alert task does not support custom parameters. + */ + parameters?: { + [k: string]: string; + }; + /** + * If query, indicates that this job must execute a SQL query. + */ + query?: { + /** + * The canonical identifier of the SQL query. + */ + query_id: string; + }; + /** + * The canonical identifier of the SQL warehouse. Only serverless and pro SQL warehouses are supported. + */ + warehouse_id: string; + }; + /** + * A unique name for the task. This field is used to refer to this task from other tasks. + * This field is required and must be unique within its parent job. + * On Update or Reset, this field is used to reference the tasks to be updated or reset. + * The maximum length is 100 characters. + */ + task_key: string; + /** + * An optional timeout applied to each run of this job task. The default behavior is to have no timeout. + */ + timeout_seconds?: number; + }[]; + /** + * An optional timeout applied to each run of this job. The default behavior is to have no timeout. + */ + timeout_seconds?: number; + /** + * Trigger settings for the job. Can be used to trigger a run when new files arrive in an external location. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + */ + trigger?: { + /** + * File arrival trigger settings. + */ + file_arrival?: { + /** + * If set, the trigger starts a run only after the specified amount of time passed since + * the last time the trigger fired. The minimum allowed value is 60 seconds + * + */ + min_time_between_triggers_seconds?: number; + /** + * URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. + */ + url?: string; + /** + * If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. + * This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The + * minimum allowed value is 60 seconds. + * + */ + wait_after_last_change_seconds?: number; + }; + /** + * Whether this trigger is paused or not. + */ + pause_status?: string; + }; + /** + * A collection of system notification IDs to notify when the run begins or completes. The default behavior is to not send any system notifications. + */ + webhook_notifications?: { + on_duration_warning_threshold_exceeded?: { + id?: string; + }[]; + /** + * An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. + */ + on_failure?: { + id?: string; + }[]; + /** + * An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. + */ + on_start?: { + id?: string; + }[]; + /** + * An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. + */ + on_success?: { + id?: string; + }[]; + }; + }; + }; + model_serving_endpoints?: { + [k: string]: { + config: { + served_models: { + environment_vars?: { + [k: string]: string; + }; + instance_profile_arn?: string; + model_name: string; + model_version: string; + name?: string; + scale_to_zero_enabled: boolean; + workload_size: string; + }[]; + traffic_config?: { + routes?: { + served_model_name: string; + traffic_percentage: number; + }[]; + }; + }; + name: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + }; + }; + /** + * List of MLflow models + */ + models?: { + [k: string]: { + /** + * Timestamp recorded when this `registered_model` was created. + */ + creation_timestamp?: number; + /** + * Description of this `registered_model`. + */ + description?: string; + /** + * Timestamp recorded when metadata for this `registered_model` was last updated. + */ + last_updated_timestamp?: number; + /** + * Collection of latest model versions for each stage. + * Only contains models with current `READY` status. + */ + latest_versions?: { + /** + * Timestamp recorded when this `model_version` was created. + */ + creation_timestamp?: number; + /** + * Current stage for this `model_version`. + */ + current_stage?: string; + /** + * Description of this `model_version`. + */ + description?: string; + /** + * Timestamp recorded when metadata for this `model_version` was last updated. + */ + last_updated_timestamp?: number; + /** + * Unique name of the model + */ + name?: string; + /** + * MLflow run ID used when creating `model_version`, if `source` was generated by an + * experiment run stored in MLflow tracking server. + */ + run_id?: string; + /** + * Run Link: Direct link to the run that generated this version + */ + run_link?: string; + /** + * URI indicating the location of the source model artifacts, used when creating `model_version` + */ + source?: string; + /** + * Current status of `model_version` + */ + status?: string; + /** + * Details on current `status`, if it is pending or failed. + */ + status_message?: string; + /** + * Tags: Additional metadata key-value pairs for this `model_version`. + */ + tags?: { + /** + * The tag key. + */ + key?: string; + /** + * The tag value. + */ + value?: string; + }[]; + /** + * User that created this `model_version`. + */ + user_id?: string; + /** + * Model's version number. + */ + version?: string; + }[]; + /** + * Unique name for the model. + */ + name?: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + /** + * Tags: Additional metadata key-value pairs for this `registered_model`. + */ + tags?: { + /** + * The tag key. + */ + key?: string; + /** + * The tag value. + */ + value?: string; + }[]; + /** + * User that created this `registered_model` + */ + user_id?: string; + }; + }; + /** + * List of DLT pipelines + */ + pipelines?: { + [k: string]: { + /** + * A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. + */ + catalog?: string; + /** + * DLT Release Channel that specifies which version to use. + */ + channel?: string; + /** + * Cluster settings for this pipeline deployment. + */ + clusters?: { + /** + * Note: This field won't be persisted. Only API users will check this field. + */ + apply_policy_default_values?: boolean; + /** + * Parameters needed in order to automatically scale clusters up and down based on load. + * Note: autoscaling works best with DB runtime versions 3.0 or later. + */ + autoscale?: { + /** + * The maximum number of workers to which the cluster can scale up when overloaded. + * Note that `max_workers` must be strictly greater than `min_workers`. + */ + max_workers: number; + /** + * The minimum number of workers to which the cluster can scale down when underutilized. + * It is also the initial number of workers the cluster will have after creation. + */ + min_workers: number; + }; + /** + * Attributes related to clusters running on Amazon Web Services. + * If not specified at cluster creation, a set of default values will be used. + */ + aws_attributes?: { + availability?: string; + /** + * The number of volumes launched for each instance. Users can choose up to 10 volumes. + * This feature is only enabled for supported node types. Legacy node types cannot specify + * custom EBS volumes. + * For node types with no instance store, at least one EBS volume needs to be specified; + * otherwise, cluster creation will fail. + * + * These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. + * Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. + * + * If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for + * scratch storage because heterogenously sized scratch devices can lead to inefficient disk + * utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance + * store volumes. + * + * Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` + * will be overridden. + */ + ebs_volume_count?: number; + /** + * + */ + ebs_volume_iops?: number; + /** + * The size of each EBS volume (in GiB) launched for each instance. For general purpose + * SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, + * this value must be within the range 500 - 4096. + */ + ebs_volume_size?: number; + /** + * + */ + ebs_volume_throughput?: number; + ebs_volume_type?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * If this value is greater than 0, the cluster driver node in particular will be placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Nodes for this cluster will only be placed on AWS instances with this instance profile. If + * ommitted, nodes will be placed on instances without an IAM instance profile. The instance + * profile must have previously been added to the Databricks environment by an account + * administrator. + * + * This feature may only be available to certain customer plans. + * + * If this field is ommitted, we will pull in the default from the conf if it exists. + */ + instance_profile_arn?: string; + /** + * The bid price for AWS spot instances, as a percentage of the corresponding instance type's + * on-demand price. + * For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot + * instance, then the bid price is half of the price of + * on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice + * the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. + * When spot instances are requested for this cluster, only spot instances whose bid price + * percentage matches this field will be considered. + * Note that, for safety, we enforce this field to be no more than 10000. + * + * The default value and documentation here should be kept consistent with + * CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + */ + spot_bid_price_percent?: number; + /** + * Identifier for the availability zone/datacenter in which the cluster resides. + * This string will be of a form like "us-west-2a". The provided availability + * zone must be in the same region as the Databricks deployment. For example, "us-west-2a" + * is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. + * This is an optional field at cluster creation, and if not specified, a default zone will be used. + * If the zone specified is "auto", will try to place cluster in a zone with high availability, + * and will retry placement in a different AZ if there is not enough capacity. + * See [[AutoAZHelper.scala]] for more details. + * The list of available zones as well as the default value can be found by using the + * `List Zones`_ method. + */ + zone_id?: string; + }; + /** + * Attributes related to clusters running on Microsoft Azure. + * If not specified at cluster creation, a set of default values will be used. + */ + azure_attributes?: { + availability?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * This value should be greater than 0, to make sure the cluster driver node is placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Defines values necessary to configure and run Azure Log Analytics agent + */ + log_analytics_info?: { + /** + * + */ + log_analytics_primary_key?: string; + /** + * + */ + log_analytics_workspace_id?: string; + }; + /** + * The max bid price to be used for Azure spot instances. + * The Max price for the bid cannot be higher than the on-demand price of the instance. + * If not specified, the default value is -1, which specifies that the instance cannot be evicted + * on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + */ + spot_bid_max_price?: number; + }; + /** + * The configuration for delivering spark logs to a long-term storage destination. + * Only dbfs destinations are supported. Only one destination can be specified + * for one cluster. If the conf is given, the logs will be delivered to the destination every + * `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while + * the destination of executor logs is `$destination/$clusterId/executor`. + * + */ + cluster_log_conf?: { + /** + * destination needs to be provided. e.g. + * `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + */ + dbfs?: { + /** + * dbfs destination, e.g. `dbfs:/my/path` + */ + destination?: string; + }; + /** + * destination and either the region or endpoint need to be provided. e.g. + * `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + * Cluster iam role is used to access s3, please make sure the cluster iam role in + * `instance_profile_arn` has permission to write data to the s3 destination. + */ + s3?: { + /** + * (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. + * If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on + * the destination bucket and prefix. The full list of possible canned acl can be found at + * http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. + * Please also note that by default only the object owner gets full controls. If you are using cross account + * role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to + * read the logs. + */ + canned_acl?: string; + /** + * S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using + * cluster iam role, please make sure you set cluster iam role and the role has write access to the + * destination. Please also note that you cannot use AWS keys to deliver logs. + */ + destination?: string; + /** + * (Optional) Flag to enable server side encryption, `false` by default. + */ + enable_encryption?: boolean; + /** + * (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when + * encryption is enabled and the default type is `sse-s3`. + */ + encryption_type?: string; + /** + * S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. + * If both are set, endpoint will be used. + */ + endpoint?: string; + /** + * (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + */ + kms_key?: string; + /** + * S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, + * endpoint will be used. + */ + region?: string; + }; + }; + /** + * Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS + * instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + * + * - Currently, Databricks allows at most 45 custom tags + * + * - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + */ + custom_tags?: { + [k: string]: string; + }; + /** + * The optional ID of the instance pool for the driver of the cluster belongs. + * The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not + * assigned. + */ + driver_instance_pool_id?: string; + /** + * The node type of the Spark driver. + * Note that this field is optional; if unset, the driver node type will be set as the same value + * as `node_type_id` defined above. + */ + driver_node_type_id?: string; + /** + * Attributes related to clusters running on Google Cloud Platform. + * If not specified at cluster creation, a set of default values will be used. + */ + gcp_attributes?: { + availability?: string; + /** + * boot disk size in GB + */ + boot_disk_size?: number; + /** + * If provided, the cluster will impersonate the google service account when accessing + * gcloud services (like GCS). The google service account + * must have previously been added to the Databricks environment by an account + * administrator. + */ + google_service_account?: string; + /** + * If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + */ + local_ssd_count?: number; + }; + /** + * The optional ID of the instance pool to which the cluster belongs. + */ + instance_pool_id?: string; + /** + * A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`. + */ + label?: string; + /** + * This field encodes, through a single value, the resources available to each of + * the Spark nodes in this cluster. For example, the Spark nodes can be provisioned + * and optimized for memory or compute intensive workloads. A list of available node + * types can be retrieved by using the :method:clusters/listNodeTypes API call. + * + */ + node_type_id?: string; + /** + * Number of worker nodes that this cluster should have. A cluster has one Spark Driver + * and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + * + * Note: When reading the properties of a cluster, this field reflects the desired number + * of workers rather than the actual current number of workers. For instance, if a cluster + * is resized from 5 to 10 workers, this field will immediately be updated to reflect + * the target size of 10 workers, whereas the workers listed in `spark_info` will gradually + * increase from 5 to 10 as the new nodes are provisioned. + */ + num_workers?: number; + /** + * The ID of the cluster policy used to create the cluster if applicable. + */ + policy_id?: string; + /** + * An object containing a set of optional, user-specified Spark configuration key-value pairs. + * See :method:clusters/create for more details. + * + */ + spark_conf?: { + [k: string]: string; + }; + /** + * An object containing a set of optional, user-specified environment variable key-value pairs. + * Please note that key-value pair of the form (X,Y) will be exported as is (i.e., + * `export X='Y'`) while launching the driver and workers. + * + * In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending + * them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all + * default databricks managed environmental variables are included as well. + * + * Example Spark environment variables: + * `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or + * `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + */ + spark_env_vars?: { + [k: string]: string; + }; + /** + * SSH public key contents that will be added to each Spark node in this cluster. The + * corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. + * Up to 10 keys can be specified. + */ + ssh_public_keys?: string[]; + }[]; + /** + * String-String configuration for this pipeline execution. + */ + configuration?: { + [k: string]: string; + }; + /** + * Whether the pipeline is continuous or triggered. This replaces `trigger`. + */ + continuous?: boolean; + /** + * Whether the pipeline is in Development mode. Defaults to false. + */ + development?: boolean; + /** + * Pipeline product edition. + */ + edition?: string; + /** + * Filters on which Pipeline packages to include in the deployed graph. + */ + filters?: { + /** + * Paths to exclude. + */ + exclude?: string[]; + /** + * Paths to include. + */ + include?: string[]; + }; + /** + * Unique identifier for this pipeline. + */ + id?: string; + /** + * Libraries or code needed by this deployment. + */ + libraries?: { + /** + * The path to a file that defines a pipeline and is stored in the Databricks Repos. + * + */ + file?: { + /** + * The absolute path of the file. + */ + path?: string; + }; + /** + * URI of the jar to be installed. Currently only DBFS is supported. + * + */ + jar?: string; + /** + * Specification of a maven library to be installed. + * + */ + maven?: { + /** + * Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2". + */ + coordinates: string; + /** + * List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`. + * + * Maven dependency exclusions: + * https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. + */ + exclusions?: string[]; + /** + * Maven repo to install the Maven package from. If omitted, both Maven Central Repository + * and Spark Packages are searched. + */ + repo?: string; + }; + /** + * The path to a notebook that defines a pipeline and is stored in the workspace. + * + */ + notebook?: { + /** + * The absolute path of the notebook. + */ + path?: string; + }; + }[]; + /** + * Friendly identifier for this pipeline. + */ + name?: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + /** + * Whether Photon is enabled for this pipeline. + */ + photon?: boolean; + /** + * Whether serverless compute is enabled for this pipeline. + */ + serverless?: boolean; + /** + * DBFS root directory for storing checkpoints and tables. + */ + storage?: string; + /** + * Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + */ + target?: string; + /** + * Which pipeline trigger to use. Deprecated: Use `continuous` instead. + */ + trigger?: { + cron?: { + quartz_cron_schedule?: string; + timezone_id?: string; + }; + manual?: unknown; + }; + }; + }; + }; + run_as?: { + service_principal_name?: string; + user_name?: string; + }; + sync?: { + exclude?: string[]; + include?: string[]; + }; + targets?: { + [k: string]: { + /** + * A description of all code artifacts in this bundle. + */ + artifacts?: { + [k: string]: { + build: string; + files: { + source: string; + }[]; + path: string; + type: string; + }; + }; + /** + * The details for this bundle. + */ + bundle?: { + compute_id?: string; + git?: { + branch?: string; + origin_url?: string; + }; + /** + * The name of the bundle. + */ + name: string; + }; + compute_id?: string; + default?: boolean; + git?: { + branch?: string; + origin_url?: string; + }; + mode?: string; + /** + * Collection of Databricks resources to deploy. + */ + resources?: { + /** + * List of MLflow experiments + */ + experiments?: { + [k: string]: { + /** + * Location where artifacts for the experiment are stored. + */ + artifact_location?: string; + /** + * Creation time + */ + creation_time?: number; + /** + * Unique identifier for the experiment. + */ + experiment_id?: string; + /** + * Last update time + */ + last_update_time?: number; + /** + * Current life cycle stage of the experiment: "active" or "deleted". + * Deleted experiments are not returned by APIs. + */ + lifecycle_stage?: string; + /** + * Human readable name that identifies the experiment. + */ + name?: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + /** + * Tags: Additional metadata key-value pairs. + */ + tags?: { + /** + * The tag key. + */ + key?: string; + /** + * The tag value. + */ + value?: string; + }[]; + }; + }; + /** + * List of Databricks jobs + */ + jobs?: { + [k: string]: { + /** + * A list of compute requirements that can be referenced by tasks of this job. + */ + compute?: { + /** + * A unique name for the compute requirement. This field is required and must be unique within the job. + * `JobTaskSettings` may refer to this field to determine the compute requirements for the task execution. + */ + compute_key: string; + spec: { + /** + * The kind of compute described by this compute specification. + */ + kind?: string; + }; + }[]; + /** + * An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used. + */ + continuous?: { + /** + * Whether this trigger is paused or not. + */ + pause_status?: string; + }; + /** + * An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. The default behavior is to not send any emails. + */ + email_notifications?: { + /** + * If true, do not send email to recipients specified in `on_failure` if the run is skipped. + */ + no_alert_for_skipped_runs?: boolean; + on_duration_warning_threshold_exceeded?: string[]; + /** + * A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. + */ + on_failure?: string[]; + /** + * A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + */ + on_start?: string[]; + /** + * A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + */ + on_success?: string[]; + }; + /** + * Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `"MULTI_TASK"`. + */ + format?: string; + /** + * An optional specification for a remote repository containing the notebooks used by this job's notebook tasks. + */ + git_source?: { + /** + * Name of the branch to be checked out and used by this job. + * This field cannot be specified in conjunction with git_tag or git_commit. + * + * The maximum length is 255 characters. + * + */ + git_branch?: string; + /** + * Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag. + * The maximum length is 64 characters. + */ + git_commit?: string; + /** + * Unique identifier of the service used to host the Git repository. The value is case insensitive. + */ + git_provider: string; + git_snapshot?: { + /** + * Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to. + */ + used_commit?: string; + }; + /** + * Name of the tag to be checked out and used by this job. + * This field cannot be specified in conjunction with git_branch or git_commit. + * + * The maximum length is 255 characters. + * + */ + git_tag?: string; + /** + * URL of the repository to be cloned by this job. + * The maximum length is 300 characters. + */ + git_url: string; + job_source?: { + dirty_state?: string; + import_from_git_branch: string; + job_config_path: string; + }; + }; + health?: { + rules?: { + metric?: string; + op?: string; + value?: number; + }[]; + }; + /** + * A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings. + */ + job_clusters?: { + /** + * A unique name for the job cluster. This field is required and must be unique within the job. + * `JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution. + */ + job_cluster_key: string; + /** + * If new_cluster, a description of a cluster that is created for only for this task. + */ + new_cluster?: { + /** + * Parameters needed in order to automatically scale clusters up and down based on load. + * Note: autoscaling works best with DB runtime versions 3.0 or later. + */ + autoscale?: { + /** + * The maximum number of workers to which the cluster can scale up when overloaded. + * Note that `max_workers` must be strictly greater than `min_workers`. + */ + max_workers: number; + /** + * The minimum number of workers to which the cluster can scale down when underutilized. + * It is also the initial number of workers the cluster will have after creation. + */ + min_workers: number; + }; + /** + * Automatically terminates the cluster after it is inactive for this time in minutes. If not set, + * this cluster will not be automatically terminated. If specified, the threshold must be between + * 10 and 10000 minutes. + * Users can also set this value to 0 to explicitly disable automatic termination. + */ + autotermination_minutes?: number; + /** + * Attributes related to clusters running on Amazon Web Services. + * If not specified at cluster creation, a set of default values will be used. + */ + aws_attributes?: { + availability?: string; + /** + * The number of volumes launched for each instance. Users can choose up to 10 volumes. + * This feature is only enabled for supported node types. Legacy node types cannot specify + * custom EBS volumes. + * For node types with no instance store, at least one EBS volume needs to be specified; + * otherwise, cluster creation will fail. + * + * These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. + * Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. + * + * If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for + * scratch storage because heterogenously sized scratch devices can lead to inefficient disk + * utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance + * store volumes. + * + * Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` + * will be overridden. + */ + ebs_volume_count?: number; + /** + * + */ + ebs_volume_iops?: number; + /** + * The size of each EBS volume (in GiB) launched for each instance. For general purpose + * SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, + * this value must be within the range 500 - 4096. + */ + ebs_volume_size?: number; + /** + * + */ + ebs_volume_throughput?: number; + ebs_volume_type?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * If this value is greater than 0, the cluster driver node in particular will be placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Nodes for this cluster will only be placed on AWS instances with this instance profile. If + * ommitted, nodes will be placed on instances without an IAM instance profile. The instance + * profile must have previously been added to the Databricks environment by an account + * administrator. + * + * This feature may only be available to certain customer plans. + * + * If this field is ommitted, we will pull in the default from the conf if it exists. + */ + instance_profile_arn?: string; + /** + * The bid price for AWS spot instances, as a percentage of the corresponding instance type's + * on-demand price. + * For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot + * instance, then the bid price is half of the price of + * on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice + * the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. + * When spot instances are requested for this cluster, only spot instances whose bid price + * percentage matches this field will be considered. + * Note that, for safety, we enforce this field to be no more than 10000. + * + * The default value and documentation here should be kept consistent with + * CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + */ + spot_bid_price_percent?: number; + /** + * Identifier for the availability zone/datacenter in which the cluster resides. + * This string will be of a form like "us-west-2a". The provided availability + * zone must be in the same region as the Databricks deployment. For example, "us-west-2a" + * is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. + * This is an optional field at cluster creation, and if not specified, a default zone will be used. + * If the zone specified is "auto", will try to place cluster in a zone with high availability, + * and will retry placement in a different AZ if there is not enough capacity. + * See [[AutoAZHelper.scala]] for more details. + * The list of available zones as well as the default value can be found by using the + * `List Zones`_ method. + */ + zone_id?: string; + }; + /** + * Attributes related to clusters running on Microsoft Azure. + * If not specified at cluster creation, a set of default values will be used. + */ + azure_attributes?: { + availability?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * This value should be greater than 0, to make sure the cluster driver node is placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Defines values necessary to configure and run Azure Log Analytics agent + */ + log_analytics_info?: { + /** + * + */ + log_analytics_primary_key?: string; + /** + * + */ + log_analytics_workspace_id?: string; + }; + /** + * The max bid price to be used for Azure spot instances. + * The Max price for the bid cannot be higher than the on-demand price of the instance. + * If not specified, the default value is -1, which specifies that the instance cannot be evicted + * on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + */ + spot_bid_max_price?: number; + }; + /** + * The configuration for delivering spark logs to a long-term storage destination. + * Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified + * for one cluster. If the conf is given, the logs will be delivered to the destination every + * `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while + * the destination of executor logs is `$destination/$clusterId/executor`. + */ + cluster_log_conf?: { + /** + * destination needs to be provided. e.g. + * `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + */ + dbfs?: { + /** + * dbfs destination, e.g. `dbfs:/my/path` + */ + destination?: string; + }; + /** + * destination and either the region or endpoint need to be provided. e.g. + * `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + * Cluster iam role is used to access s3, please make sure the cluster iam role in + * `instance_profile_arn` has permission to write data to the s3 destination. + */ + s3?: { + /** + * (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. + * If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on + * the destination bucket and prefix. The full list of possible canned acl can be found at + * http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. + * Please also note that by default only the object owner gets full controls. If you are using cross account + * role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to + * read the logs. + */ + canned_acl?: string; + /** + * S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using + * cluster iam role, please make sure you set cluster iam role and the role has write access to the + * destination. Please also note that you cannot use AWS keys to deliver logs. + */ + destination?: string; + /** + * (Optional) Flag to enable server side encryption, `false` by default. + */ + enable_encryption?: boolean; + /** + * (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when + * encryption is enabled and the default type is `sse-s3`. + */ + encryption_type?: string; + /** + * S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. + * If both are set, endpoint will be used. + */ + endpoint?: string; + /** + * (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + */ + kms_key?: string; + /** + * S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, + * endpoint will be used. + */ + region?: string; + }; + }; + /** + * Cluster name requested by the user. This doesn't have to be unique. + * If not specified at creation, the cluster name will be an empty string. + * + */ + cluster_name?: string; + cluster_source?: string; + /** + * Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS + * instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + * + * - Currently, Databricks allows at most 45 custom tags + * + * - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + */ + custom_tags?: { + [k: string]: string; + }; + data_security_mode?: string; + docker_image?: { + basic_auth?: { + /** + * Password of the user + */ + password?: string; + /** + * Name of the user + */ + username?: string; + }; + /** + * URL of the docker image. + */ + url?: string; + }; + /** + * The optional ID of the instance pool for the driver of the cluster belongs. + * The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not + * assigned. + */ + driver_instance_pool_id?: string; + /** + * The node type of the Spark driver. Note that this field is optional; + * if unset, the driver node type will be set as the same value + * as `node_type_id` defined above. + * + */ + driver_node_type_id?: string; + /** + * Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk + * space when its Spark workers are running low on disk space. This feature requires specific AWS + * permissions to function correctly - refer to the User Guide for more details. + */ + enable_elastic_disk?: boolean; + /** + * Whether to enable LUKS on cluster VMs' local disks + */ + enable_local_disk_encryption?: boolean; + /** + * Attributes related to clusters running on Google Cloud Platform. + * If not specified at cluster creation, a set of default values will be used. + */ + gcp_attributes?: { + availability?: string; + /** + * boot disk size in GB + */ + boot_disk_size?: number; + /** + * If provided, the cluster will impersonate the google service account when accessing + * gcloud services (like GCS). The google service account + * must have previously been added to the Databricks environment by an account + * administrator. + */ + google_service_account?: string; + /** + * If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + */ + local_ssd_count?: number; + }; + /** + * The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + */ + init_scripts?: { + /** + * destination needs to be provided. e.g. + * `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + */ + dbfs?: { + /** + * dbfs destination, e.g. `dbfs:/my/path` + */ + destination?: string; + }; + /** + * destination and either the region or endpoint need to be provided. e.g. + * `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + * Cluster iam role is used to access s3, please make sure the cluster iam role in + * `instance_profile_arn` has permission to write data to the s3 destination. + */ + s3?: { + /** + * (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. + * If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on + * the destination bucket and prefix. The full list of possible canned acl can be found at + * http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. + * Please also note that by default only the object owner gets full controls. If you are using cross account + * role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to + * read the logs. + */ + canned_acl?: string; + /** + * S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using + * cluster iam role, please make sure you set cluster iam role and the role has write access to the + * destination. Please also note that you cannot use AWS keys to deliver logs. + */ + destination?: string; + /** + * (Optional) Flag to enable server side encryption, `false` by default. + */ + enable_encryption?: boolean; + /** + * (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when + * encryption is enabled and the default type is `sse-s3`. + */ + encryption_type?: string; + /** + * S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. + * If both are set, endpoint will be used. + */ + endpoint?: string; + /** + * (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + */ + kms_key?: string; + /** + * S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, + * endpoint will be used. + */ + region?: string; + }; + volumes?: { + destination?: string; + }; + /** + * destination needs to be provided. e.g. + * `{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }` + */ + workspace?: { + /** + * workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` + */ + destination?: string; + }; + }[]; + /** + * The optional ID of the instance pool to which the cluster belongs. + */ + instance_pool_id?: string; + /** + * This field encodes, through a single value, the resources available to each of + * the Spark nodes in this cluster. For example, the Spark nodes can be provisioned + * and optimized for memory or compute intensive workloads. A list of available node + * types can be retrieved by using the :method:clusters/listNodeTypes API call. + * + */ + node_type_id?: string; + /** + * Number of worker nodes that this cluster should have. A cluster has one Spark Driver + * and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + * + * Note: When reading the properties of a cluster, this field reflects the desired number + * of workers rather than the actual current number of workers. For instance, if a cluster + * is resized from 5 to 10 workers, this field will immediately be updated to reflect + * the target size of 10 workers, whereas the workers listed in `spark_info` will gradually + * increase from 5 to 10 as the new nodes are provisioned. + */ + num_workers?: number; + /** + * The ID of the cluster policy used to create the cluster if applicable. + */ + policy_id?: string; + runtime_engine?: string; + /** + * Single user name if data_security_mode is `SINGLE_USER` + */ + single_user_name?: string; + /** + * An object containing a set of optional, user-specified Spark configuration key-value pairs. + * Users can also pass in a string of extra JVM options to the driver and the executors via + * `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + * + */ + spark_conf?: { + [k: string]: string; + }; + /** + * An object containing a set of optional, user-specified environment variable key-value pairs. + * Please note that key-value pair of the form (X,Y) will be exported as is (i.e., + * `export X='Y'`) while launching the driver and workers. + * + * In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending + * them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all + * default databricks managed environmental variables are included as well. + * + * Example Spark environment variables: + * `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or + * `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + */ + spark_env_vars?: { + [k: string]: string; + }; + /** + * The Spark version of the cluster, e.g. `3.3.x-scala2.11`. + * A list of available Spark versions can be retrieved by using + * the :method:clusters/sparkVersions API call. + * + */ + spark_version?: string; + /** + * SSH public key contents that will be added to each Spark node in this cluster. The + * corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. + * Up to 10 keys can be specified. + */ + ssh_public_keys?: string[]; + workload_type?: { + /** + * defined what type of clients can use the cluster. E.g. Notebooks, Jobs + */ + clients?: { + /** + * With jobs set, the cluster can be used for jobs + */ + jobs?: boolean; + /** + * With notebooks set, this cluster can be used for notebooks + */ + notebooks?: boolean; + }; + }; + }; + }[]; + /** + * An optional maximum allowed number of concurrent runs of the job. + * + * Set this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters. + * + * This setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs. + * + * This value cannot exceed 1000\. Setting this value to 0 causes all new runs to be skipped. The default behavior is to allow only 1 concurrent run. + */ + max_concurrent_runs?: number; + /** + * An optional name for the job. + */ + name?: string; + /** + * Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job. + */ + notification_settings?: { + /** + * If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. + */ + no_alert_for_canceled_runs?: boolean; + /** + * If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. + */ + no_alert_for_skipped_runs?: boolean; + }; + /** + * Job-level parameter definitions + */ + parameters?: { + /** + * Default value of the parameter. + */ + default: string; + /** + * The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.` + */ + name: string; + }[]; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + run_as?: { + /** + * Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role. + */ + service_principal_name?: string; + /** + * The email of an active workspace user. Non-admin users can only set this field to their own email. + */ + user_name?: string; + }; + /** + * An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + */ + schedule?: { + /** + * Whether this trigger is paused or not. + */ + pause_status?: string; + /** + * A Cron expression using Quartz syntax that describes the schedule for a job. + * See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) + * for details. This field is required." + * + */ + quartz_cron_expression: string; + /** + * A Java timezone ID. The schedule for a job is resolved with respect to this timezone. + * See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. + * This field is required. + * + */ + timezone_id: string; + }; + /** + * A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job. + */ + tags?: { + [k: string]: string; + }; + /** + * A list of task specifications to be executed by this job. + */ + tasks?: { + /** + * The key of the compute requirement, specified in `job.settings.compute`, to use for execution of this task. + */ + compute_key?: string; + /** + * If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications. + */ + condition_task?: { + /** + * The left operand of the condition task. Can be either a string value or a job state or parameter reference. + */ + left?: string; + /** + * * `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`. + * * `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`. + * + * The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison. + * + */ + op?: string; + /** + * The right operand of the condition task. Can be either a string value or a job state or parameter reference. + */ + right?: string; + }; + /** + * If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse. + */ + dbt_task?: { + /** + * Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks >= 1.1.1. + */ + catalog?: string; + /** + * A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided. + */ + commands: string[]; + /** + * Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used. + */ + profiles_directory?: string; + /** + * Optional (relative) path to the project directory, if no value is provided, the root of the git repository is used. + */ + project_directory?: string; + /** + * Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used. + */ + schema?: string; + /** + * ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument. + */ + warehouse_id?: string; + }; + /** + * An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete successfully before executing this task. + * The key is `task_key`, and the value is the name assigned to the dependent task. + * + */ + depends_on?: { + /** + * Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run. + */ + outcome?: string; + /** + * The name of the task this task depends on. + */ + task_key: string; + }[]; + /** + * An optional description for this task. + * The maximum length is 4096 bytes. + */ + description?: string; + /** + * An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails. + */ + email_notifications?: { + on_duration_warning_threshold_exceeded?: string[]; + /** + * A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent. + */ + on_failure?: string[]; + /** + * A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + */ + on_start?: string[]; + /** + * A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent. + */ + on_success?: string[]; + }; + /** + * If existing_cluster_id, the ID of an existing cluster that is used for all runs of this task. When running tasks on an existing cluster, you may need to manually restart the cluster if it stops responding. We suggest running jobs on new clusters for greater reliability. + */ + existing_cluster_id?: string; + health?: { + rules?: { + metric?: string; + op?: string; + value?: number; + }[]; + }; + /** + * If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`. + */ + job_cluster_key?: string; + /** + * An optional list of libraries to be installed on the cluster that executes the task. The default value is an empty list. + */ + libraries?: { + /** + * Specification of a CRAN library to be installed as part of the library + */ + cran?: { + /** + * The name of the CRAN package to install. + */ + package: string; + /** + * The repository where the package can be found. If not specified, the default CRAN repo is used. + */ + repo?: string; + }; + /** + * URI of the egg to be installed. Currently only DBFS and S3 URIs are supported. + * For example: `{ "egg": "dbfs:/my/egg" }` or + * `{ "egg": "s3://my-bucket/egg" }`. + * If S3 is used, please make sure the cluster has read access on the library. You may need to + * launch the cluster with an IAM role to access the S3 URI. + */ + egg?: string; + /** + * URI of the jar to be installed. Currently only DBFS and S3 URIs are supported. + * For example: `{ "jar": "dbfs:/mnt/databricks/library.jar" }` or + * `{ "jar": "s3://my-bucket/library.jar" }`. + * If S3 is used, please make sure the cluster has read access on the library. You may need to + * launch the cluster with an IAM role to access the S3 URI. + */ + jar?: string; + /** + * Specification of a maven library to be installed. For example: + * `{ "coordinates": "org.jsoup:jsoup:1.7.2" }` + */ + maven?: { + /** + * Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2". + */ + coordinates: string; + /** + * List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`. + * + * Maven dependency exclusions: + * https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. + */ + exclusions?: string[]; + /** + * Maven repo to install the Maven package from. If omitted, both Maven Central Repository + * and Spark Packages are searched. + */ + repo?: string; + }; + /** + * Specification of a PyPi library to be installed. For example: + * `{ "package": "simplejson" }` + */ + pypi?: { + /** + * The name of the pypi package to install. An optional exact version specification is also + * supported. Examples: "simplejson" and "simplejson==3.8.0". + */ + package: string; + /** + * The repository where the package can be found. If not specified, the default pip index is + * used. + */ + repo?: string; + }; + /** + * URI of the wheel to be installed. + * For example: `{ "whl": "dbfs:/my/whl" }` or `{ "whl": "s3://my-bucket/whl" }`. + * If S3 is used, please make sure the cluster has read access on the library. You may need to + * launch the cluster with an IAM role to access the S3 URI. + */ + whl?: string; + }[]; + /** + * An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value -1 means to retry indefinitely and the value 0 means to never retry. The default behavior is to never retry. + */ + max_retries?: number; + /** + * An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried. + */ + min_retry_interval_millis?: number; + /** + * If new_cluster, a description of a cluster that is created for only for this task. + */ + new_cluster?: { + /** + * Parameters needed in order to automatically scale clusters up and down based on load. + * Note: autoscaling works best with DB runtime versions 3.0 or later. + */ + autoscale?: { + /** + * The maximum number of workers to which the cluster can scale up when overloaded. + * Note that `max_workers` must be strictly greater than `min_workers`. + */ + max_workers: number; + /** + * The minimum number of workers to which the cluster can scale down when underutilized. + * It is also the initial number of workers the cluster will have after creation. + */ + min_workers: number; + }; + /** + * Automatically terminates the cluster after it is inactive for this time in minutes. If not set, + * this cluster will not be automatically terminated. If specified, the threshold must be between + * 10 and 10000 minutes. + * Users can also set this value to 0 to explicitly disable automatic termination. + */ + autotermination_minutes?: number; + /** + * Attributes related to clusters running on Amazon Web Services. + * If not specified at cluster creation, a set of default values will be used. + */ + aws_attributes?: { + availability?: string; + /** + * The number of volumes launched for each instance. Users can choose up to 10 volumes. + * This feature is only enabled for supported node types. Legacy node types cannot specify + * custom EBS volumes. + * For node types with no instance store, at least one EBS volume needs to be specified; + * otherwise, cluster creation will fail. + * + * These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. + * Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. + * + * If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for + * scratch storage because heterogenously sized scratch devices can lead to inefficient disk + * utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance + * store volumes. + * + * Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` + * will be overridden. + */ + ebs_volume_count?: number; + /** + * + */ + ebs_volume_iops?: number; + /** + * The size of each EBS volume (in GiB) launched for each instance. For general purpose + * SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, + * this value must be within the range 500 - 4096. + */ + ebs_volume_size?: number; + /** + * + */ + ebs_volume_throughput?: number; + ebs_volume_type?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * If this value is greater than 0, the cluster driver node in particular will be placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Nodes for this cluster will only be placed on AWS instances with this instance profile. If + * ommitted, nodes will be placed on instances without an IAM instance profile. The instance + * profile must have previously been added to the Databricks environment by an account + * administrator. + * + * This feature may only be available to certain customer plans. + * + * If this field is ommitted, we will pull in the default from the conf if it exists. + */ + instance_profile_arn?: string; + /** + * The bid price for AWS spot instances, as a percentage of the corresponding instance type's + * on-demand price. + * For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot + * instance, then the bid price is half of the price of + * on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice + * the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. + * When spot instances are requested for this cluster, only spot instances whose bid price + * percentage matches this field will be considered. + * Note that, for safety, we enforce this field to be no more than 10000. + * + * The default value and documentation here should be kept consistent with + * CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + */ + spot_bid_price_percent?: number; + /** + * Identifier for the availability zone/datacenter in which the cluster resides. + * This string will be of a form like "us-west-2a". The provided availability + * zone must be in the same region as the Databricks deployment. For example, "us-west-2a" + * is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. + * This is an optional field at cluster creation, and if not specified, a default zone will be used. + * If the zone specified is "auto", will try to place cluster in a zone with high availability, + * and will retry placement in a different AZ if there is not enough capacity. + * See [[AutoAZHelper.scala]] for more details. + * The list of available zones as well as the default value can be found by using the + * `List Zones`_ method. + */ + zone_id?: string; + }; + /** + * Attributes related to clusters running on Microsoft Azure. + * If not specified at cluster creation, a set of default values will be used. + */ + azure_attributes?: { + availability?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * This value should be greater than 0, to make sure the cluster driver node is placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Defines values necessary to configure and run Azure Log Analytics agent + */ + log_analytics_info?: { + /** + * + */ + log_analytics_primary_key?: string; + /** + * + */ + log_analytics_workspace_id?: string; + }; + /** + * The max bid price to be used for Azure spot instances. + * The Max price for the bid cannot be higher than the on-demand price of the instance. + * If not specified, the default value is -1, which specifies that the instance cannot be evicted + * on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + */ + spot_bid_max_price?: number; + }; + /** + * The configuration for delivering spark logs to a long-term storage destination. + * Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified + * for one cluster. If the conf is given, the logs will be delivered to the destination every + * `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while + * the destination of executor logs is `$destination/$clusterId/executor`. + */ + cluster_log_conf?: { + /** + * destination needs to be provided. e.g. + * `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + */ + dbfs?: { + /** + * dbfs destination, e.g. `dbfs:/my/path` + */ + destination?: string; + }; + /** + * destination and either the region or endpoint need to be provided. e.g. + * `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + * Cluster iam role is used to access s3, please make sure the cluster iam role in + * `instance_profile_arn` has permission to write data to the s3 destination. + */ + s3?: { + /** + * (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. + * If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on + * the destination bucket and prefix. The full list of possible canned acl can be found at + * http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. + * Please also note that by default only the object owner gets full controls. If you are using cross account + * role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to + * read the logs. + */ + canned_acl?: string; + /** + * S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using + * cluster iam role, please make sure you set cluster iam role and the role has write access to the + * destination. Please also note that you cannot use AWS keys to deliver logs. + */ + destination?: string; + /** + * (Optional) Flag to enable server side encryption, `false` by default. + */ + enable_encryption?: boolean; + /** + * (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when + * encryption is enabled and the default type is `sse-s3`. + */ + encryption_type?: string; + /** + * S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. + * If both are set, endpoint will be used. + */ + endpoint?: string; + /** + * (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + */ + kms_key?: string; + /** + * S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, + * endpoint will be used. + */ + region?: string; + }; + }; + /** + * Cluster name requested by the user. This doesn't have to be unique. + * If not specified at creation, the cluster name will be an empty string. + * + */ + cluster_name?: string; + cluster_source?: string; + /** + * Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS + * instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + * + * - Currently, Databricks allows at most 45 custom tags + * + * - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + */ + custom_tags?: { + [k: string]: string; + }; + data_security_mode?: string; + docker_image?: { + basic_auth?: { + /** + * Password of the user + */ + password?: string; + /** + * Name of the user + */ + username?: string; + }; + /** + * URL of the docker image. + */ + url?: string; + }; + /** + * The optional ID of the instance pool for the driver of the cluster belongs. + * The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not + * assigned. + */ + driver_instance_pool_id?: string; + /** + * The node type of the Spark driver. Note that this field is optional; + * if unset, the driver node type will be set as the same value + * as `node_type_id` defined above. + * + */ + driver_node_type_id?: string; + /** + * Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk + * space when its Spark workers are running low on disk space. This feature requires specific AWS + * permissions to function correctly - refer to the User Guide for more details. + */ + enable_elastic_disk?: boolean; + /** + * Whether to enable LUKS on cluster VMs' local disks + */ + enable_local_disk_encryption?: boolean; + /** + * Attributes related to clusters running on Google Cloud Platform. + * If not specified at cluster creation, a set of default values will be used. + */ + gcp_attributes?: { + availability?: string; + /** + * boot disk size in GB + */ + boot_disk_size?: number; + /** + * If provided, the cluster will impersonate the google service account when accessing + * gcloud services (like GCS). The google service account + * must have previously been added to the Databricks environment by an account + * administrator. + */ + google_service_account?: string; + /** + * If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + */ + local_ssd_count?: number; + }; + /** + * The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`. + */ + init_scripts?: { + /** + * destination needs to be provided. e.g. + * `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + */ + dbfs?: { + /** + * dbfs destination, e.g. `dbfs:/my/path` + */ + destination?: string; + }; + /** + * destination and either the region or endpoint need to be provided. e.g. + * `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + * Cluster iam role is used to access s3, please make sure the cluster iam role in + * `instance_profile_arn` has permission to write data to the s3 destination. + */ + s3?: { + /** + * (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. + * If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on + * the destination bucket and prefix. The full list of possible canned acl can be found at + * http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. + * Please also note that by default only the object owner gets full controls. If you are using cross account + * role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to + * read the logs. + */ + canned_acl?: string; + /** + * S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using + * cluster iam role, please make sure you set cluster iam role and the role has write access to the + * destination. Please also note that you cannot use AWS keys to deliver logs. + */ + destination?: string; + /** + * (Optional) Flag to enable server side encryption, `false` by default. + */ + enable_encryption?: boolean; + /** + * (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when + * encryption is enabled and the default type is `sse-s3`. + */ + encryption_type?: string; + /** + * S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. + * If both are set, endpoint will be used. + */ + endpoint?: string; + /** + * (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + */ + kms_key?: string; + /** + * S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, + * endpoint will be used. + */ + region?: string; + }; + volumes?: { + destination?: string; + }; + /** + * destination needs to be provided. e.g. + * `{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }` + */ + workspace?: { + /** + * workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh` + */ + destination?: string; + }; + }[]; + /** + * The optional ID of the instance pool to which the cluster belongs. + */ + instance_pool_id?: string; + /** + * This field encodes, through a single value, the resources available to each of + * the Spark nodes in this cluster. For example, the Spark nodes can be provisioned + * and optimized for memory or compute intensive workloads. A list of available node + * types can be retrieved by using the :method:clusters/listNodeTypes API call. + * + */ + node_type_id?: string; + /** + * Number of worker nodes that this cluster should have. A cluster has one Spark Driver + * and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + * + * Note: When reading the properties of a cluster, this field reflects the desired number + * of workers rather than the actual current number of workers. For instance, if a cluster + * is resized from 5 to 10 workers, this field will immediately be updated to reflect + * the target size of 10 workers, whereas the workers listed in `spark_info` will gradually + * increase from 5 to 10 as the new nodes are provisioned. + */ + num_workers?: number; + /** + * The ID of the cluster policy used to create the cluster if applicable. + */ + policy_id?: string; + runtime_engine?: string; + /** + * Single user name if data_security_mode is `SINGLE_USER` + */ + single_user_name?: string; + /** + * An object containing a set of optional, user-specified Spark configuration key-value pairs. + * Users can also pass in a string of extra JVM options to the driver and the executors via + * `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively. + * + */ + spark_conf?: { + [k: string]: string; + }; + /** + * An object containing a set of optional, user-specified environment variable key-value pairs. + * Please note that key-value pair of the form (X,Y) will be exported as is (i.e., + * `export X='Y'`) while launching the driver and workers. + * + * In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending + * them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all + * default databricks managed environmental variables are included as well. + * + * Example Spark environment variables: + * `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or + * `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + */ + spark_env_vars?: { + [k: string]: string; + }; + /** + * The Spark version of the cluster, e.g. `3.3.x-scala2.11`. + * A list of available Spark versions can be retrieved by using + * the :method:clusters/sparkVersions API call. + * + */ + spark_version?: string; + /** + * SSH public key contents that will be added to each Spark node in this cluster. The + * corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. + * Up to 10 keys can be specified. + */ + ssh_public_keys?: string[]; + workload_type?: { + /** + * defined what type of clients can use the cluster. E.g. Notebooks, Jobs + */ + clients?: { + /** + * With jobs set, the cluster can be used for jobs + */ + jobs?: boolean; + /** + * With notebooks set, this cluster can be used for notebooks + */ + notebooks?: boolean; + }; + }; + }; + /** + * If notebook_task, indicates that this task must run a notebook. This field may not be specified in conjunction with spark_jar_task. + */ + notebook_task?: { + /** + * Base parameters to be used for each run of this job. If the run is initiated by a call to + * :method:jobs/runNow with parameters specified, the two parameters maps are merged. If the same key is specified in + * `base_parameters` and in `run-now`, the value from `run-now` is used. + * + * Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + * + * If the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters, + * the default value from the notebook is used. + * + * Retrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets). + * + */ + base_parameters?: { + [k: string]: string; + }; + /** + * The path of the notebook to be run in the Databricks workspace or remote repository. + * For notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash. + * For notebooks stored in a remote repository, the path must be relative. This field is required. + * + */ + notebook_path: string; + /** + * Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved + * from the local workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`, + * the Python file will be retrieved from a Git repository defined in `git_source`. + * + * * `WORKSPACE`: The Python file is located in a workspace or at a cloud filesystem URI. + * * `GIT`: The Python file is located in a remote Git repository. + * + */ + source?: string; + }; + /** + * Optional notification settings that are used when sending notifications to each of the `email_notifications` for this task. + */ + notification_settings?: { + /** + * If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run. + */ + alert_on_last_attempt?: boolean; + /** + * If true, do not send notifications to recipients specified in `on_failure` if the run is canceled. + */ + no_alert_for_canceled_runs?: boolean; + /** + * If true, do not send notifications to recipients specified in `on_failure` if the run is skipped. + */ + no_alert_for_skipped_runs?: boolean; + }; + /** + * If pipeline_task, indicates that this task must execute a Pipeline. + */ + pipeline_task?: { + /** + * If true, a full refresh will be triggered on the delta live table. + */ + full_refresh?: boolean; + /** + * The full name of the pipeline task to execute. + */ + pipeline_id?: string; + }; + /** + * If python_wheel_task, indicates that this job must execute a PythonWheel. + */ + python_wheel_task?: { + /** + * Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()` + */ + entry_point?: string; + /** + * Command-line parameters passed to Python wheel task in the form of `["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty if `parameters` is not null. + */ + named_parameters?: { + [k: string]: string; + }; + /** + * Name of the package to execute + */ + package_name?: string; + /** + * Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null. + */ + parameters?: string[]; + }; + /** + * An optional policy to specify whether to retry a task when it times out. The default behavior is to not retry on timeout. + */ + retry_on_timeout?: boolean; + /** + * An optional value specifying the condition determining whether the task is run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`. + * + * * `ALL_SUCCESS`: All dependencies have executed and succeeded + * * `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded + * * `NONE_FAILED`: None of the dependencies have failed and at least one was executed + * * `ALL_DONE`: All dependencies completed and at least one was executed + * * `AT_LEAST_ONE_FAILED`: At least one dependency failed + * * `ALL_FAILED`: ALl dependencies have failed + * + */ + run_if?: string; + run_job_task?: { + job_id: number; + job_parameters?: unknown; + }; + /** + * If spark_jar_task, indicates that this task must run a JAR. + */ + spark_jar_task?: { + /** + * Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create. + * + */ + jar_uri?: string; + /** + * The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library. + * + * The code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail. + */ + main_class_name?: string; + /** + * Parameters passed to the main method. + * + * Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + * + */ + parameters?: string[]; + }; + /** + * If spark_python_task, indicates that this task must run a Python file. + */ + spark_python_task?: { + /** + * Command line parameters passed to the Python file. + * + * Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + * + */ + parameters?: string[]; + /** + * The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required. + */ + python_file: string; + /** + * Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved + * from the local workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`, + * the Python file will be retrieved from a Git repository defined in `git_source`. + * + * * `WORKSPACE`: The Python file is located in a workspace or at a cloud filesystem URI. + * * `GIT`: The Python file is located in a remote Git repository. + * + */ + source?: string; + }; + /** + * If spark_submit_task, indicates that this task must be launched by the spark submit script. This task can run only on new clusters. + */ + spark_submit_task?: { + /** + * Command-line parameters passed to spark submit. + * + * Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs. + * + */ + parameters?: string[]; + }; + /** + * If sql_task, indicates that this job must execute a SQL task. + */ + sql_task?: { + /** + * If alert, indicates that this job must refresh a SQL alert. + */ + alert?: { + /** + * The canonical identifier of the SQL alert. + */ + alert_id: string; + /** + * If true, the alert notifications are not sent to subscribers. + */ + pause_subscriptions?: boolean; + /** + * If specified, alert notifications are sent to subscribers. + */ + subscriptions?: { + /** + * The canonical identifier of the destination to receive email notification. + */ + destination_id?: string; + /** + * The user name to receive the subscription email. + */ + user_name?: string; + }[]; + }; + /** + * If dashboard, indicates that this job must refresh a SQL dashboard. + */ + dashboard?: { + /** + * Subject of the email sent to subscribers of this task. + */ + custom_subject?: string; + /** + * The canonical identifier of the SQL dashboard. + */ + dashboard_id: string; + /** + * If true, the dashboard snapshot is not taken, and emails are not sent to subscribers. + */ + pause_subscriptions?: boolean; + /** + * If specified, dashboard snapshots are sent to subscriptions. + */ + subscriptions?: { + /** + * The canonical identifier of the destination to receive email notification. + */ + destination_id?: string; + /** + * The user name to receive the subscription email. + */ + user_name?: string; + }[]; + }; + /** + * If file, indicates that this job runs a SQL file in a remote Git repository. Only one SQL statement is supported in a file. Multiple SQL statements separated by semicolons (;) are not permitted. + */ + file?: { + /** + * Relative path of the SQL file in the remote Git repository. + */ + path: string; + }; + /** + * Parameters to be used for each run of this job. The SQL alert task does not support custom parameters. + */ + parameters?: { + [k: string]: string; + }; + /** + * If query, indicates that this job must execute a SQL query. + */ + query?: { + /** + * The canonical identifier of the SQL query. + */ + query_id: string; + }; + /** + * The canonical identifier of the SQL warehouse. Only serverless and pro SQL warehouses are supported. + */ + warehouse_id: string; + }; + /** + * A unique name for the task. This field is used to refer to this task from other tasks. + * This field is required and must be unique within its parent job. + * On Update or Reset, this field is used to reference the tasks to be updated or reset. + * The maximum length is 100 characters. + */ + task_key: string; + /** + * An optional timeout applied to each run of this job task. The default behavior is to have no timeout. + */ + timeout_seconds?: number; + }[]; + /** + * An optional timeout applied to each run of this job. The default behavior is to have no timeout. + */ + timeout_seconds?: number; + /** + * Trigger settings for the job. Can be used to trigger a run when new files arrive in an external location. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`. + */ + trigger?: { + /** + * File arrival trigger settings. + */ + file_arrival?: { + /** + * If set, the trigger starts a run only after the specified amount of time passed since + * the last time the trigger fired. The minimum allowed value is 60 seconds + * + */ + min_time_between_triggers_seconds?: number; + /** + * URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location. + */ + url?: string; + /** + * If set, the trigger starts a run only after no file activity has occurred for the specified amount of time. + * This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The + * minimum allowed value is 60 seconds. + * + */ + wait_after_last_change_seconds?: number; + }; + /** + * Whether this trigger is paused or not. + */ + pause_status?: string; + }; + /** + * A collection of system notification IDs to notify when the run begins or completes. The default behavior is to not send any system notifications. + */ + webhook_notifications?: { + on_duration_warning_threshold_exceeded?: { + id?: string; + }[]; + /** + * An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property. + */ + on_failure?: { + id?: string; + }[]; + /** + * An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property. + */ + on_start?: { + id?: string; + }[]; + /** + * An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property. + */ + on_success?: { + id?: string; + }[]; + }; + }; + }; + model_serving_endpoints?: { + [k: string]: { + config: { + served_models: { + environment_vars?: { + [k: string]: string; + }; + instance_profile_arn?: string; + model_name: string; + model_version: string; + name?: string; + scale_to_zero_enabled: boolean; + workload_size: string; + }[]; + traffic_config?: { + routes?: { + served_model_name: string; + traffic_percentage: number; + }[]; + }; + }; + name: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + }; + }; + /** + * List of MLflow models + */ + models?: { + [k: string]: { + /** + * Timestamp recorded when this `registered_model` was created. + */ + creation_timestamp?: number; + /** + * Description of this `registered_model`. + */ + description?: string; + /** + * Timestamp recorded when metadata for this `registered_model` was last updated. + */ + last_updated_timestamp?: number; + /** + * Collection of latest model versions for each stage. + * Only contains models with current `READY` status. + */ + latest_versions?: { + /** + * Timestamp recorded when this `model_version` was created. + */ + creation_timestamp?: number; + /** + * Current stage for this `model_version`. + */ + current_stage?: string; + /** + * Description of this `model_version`. + */ + description?: string; + /** + * Timestamp recorded when metadata for this `model_version` was last updated. + */ + last_updated_timestamp?: number; + /** + * Unique name of the model + */ + name?: string; + /** + * MLflow run ID used when creating `model_version`, if `source` was generated by an + * experiment run stored in MLflow tracking server. + */ + run_id?: string; + /** + * Run Link: Direct link to the run that generated this version + */ + run_link?: string; + /** + * URI indicating the location of the source model artifacts, used when creating `model_version` + */ + source?: string; + /** + * Current status of `model_version` + */ + status?: string; + /** + * Details on current `status`, if it is pending or failed. + */ + status_message?: string; + /** + * Tags: Additional metadata key-value pairs for this `model_version`. + */ + tags?: { + /** + * The tag key. + */ + key?: string; + /** + * The tag value. + */ + value?: string; + }[]; + /** + * User that created this `model_version`. + */ + user_id?: string; + /** + * Model's version number. + */ + version?: string; + }[]; + /** + * Unique name for the model. + */ + name?: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + /** + * Tags: Additional metadata key-value pairs for this `registered_model`. + */ + tags?: { + /** + * The tag key. + */ + key?: string; + /** + * The tag value. + */ + value?: string; + }[]; + /** + * User that created this `registered_model` + */ + user_id?: string; + }; + }; + /** + * List of DLT pipelines + */ + pipelines?: { + [k: string]: { + /** + * A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog. + */ + catalog?: string; + /** + * DLT Release Channel that specifies which version to use. + */ + channel?: string; + /** + * Cluster settings for this pipeline deployment. + */ + clusters?: { + /** + * Note: This field won't be persisted. Only API users will check this field. + */ + apply_policy_default_values?: boolean; + /** + * Parameters needed in order to automatically scale clusters up and down based on load. + * Note: autoscaling works best with DB runtime versions 3.0 or later. + */ + autoscale?: { + /** + * The maximum number of workers to which the cluster can scale up when overloaded. + * Note that `max_workers` must be strictly greater than `min_workers`. + */ + max_workers: number; + /** + * The minimum number of workers to which the cluster can scale down when underutilized. + * It is also the initial number of workers the cluster will have after creation. + */ + min_workers: number; + }; + /** + * Attributes related to clusters running on Amazon Web Services. + * If not specified at cluster creation, a set of default values will be used. + */ + aws_attributes?: { + availability?: string; + /** + * The number of volumes launched for each instance. Users can choose up to 10 volumes. + * This feature is only enabled for supported node types. Legacy node types cannot specify + * custom EBS volumes. + * For node types with no instance store, at least one EBS volume needs to be specified; + * otherwise, cluster creation will fail. + * + * These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc. + * Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc. + * + * If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for + * scratch storage because heterogenously sized scratch devices can lead to inefficient disk + * utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance + * store volumes. + * + * Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir` + * will be overridden. + */ + ebs_volume_count?: number; + /** + * + */ + ebs_volume_iops?: number; + /** + * The size of each EBS volume (in GiB) launched for each instance. For general purpose + * SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, + * this value must be within the range 500 - 4096. + */ + ebs_volume_size?: number; + /** + * + */ + ebs_volume_throughput?: number; + ebs_volume_type?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * If this value is greater than 0, the cluster driver node in particular will be placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Nodes for this cluster will only be placed on AWS instances with this instance profile. If + * ommitted, nodes will be placed on instances without an IAM instance profile. The instance + * profile must have previously been added to the Databricks environment by an account + * administrator. + * + * This feature may only be available to certain customer plans. + * + * If this field is ommitted, we will pull in the default from the conf if it exists. + */ + instance_profile_arn?: string; + /** + * The bid price for AWS spot instances, as a percentage of the corresponding instance type's + * on-demand price. + * For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot + * instance, then the bid price is half of the price of + * on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice + * the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. + * When spot instances are requested for this cluster, only spot instances whose bid price + * percentage matches this field will be considered. + * Note that, for safety, we enforce this field to be no more than 10000. + * + * The default value and documentation here should be kept consistent with + * CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent. + */ + spot_bid_price_percent?: number; + /** + * Identifier for the availability zone/datacenter in which the cluster resides. + * This string will be of a form like "us-west-2a". The provided availability + * zone must be in the same region as the Databricks deployment. For example, "us-west-2a" + * is not a valid zone id if the Databricks deployment resides in the "us-east-1" region. + * This is an optional field at cluster creation, and if not specified, a default zone will be used. + * If the zone specified is "auto", will try to place cluster in a zone with high availability, + * and will retry placement in a different AZ if there is not enough capacity. + * See [[AutoAZHelper.scala]] for more details. + * The list of available zones as well as the default value can be found by using the + * `List Zones`_ method. + */ + zone_id?: string; + }; + /** + * Attributes related to clusters running on Microsoft Azure. + * If not specified at cluster creation, a set of default values will be used. + */ + azure_attributes?: { + availability?: string; + /** + * The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. + * This value should be greater than 0, to make sure the cluster driver node is placed on an + * on-demand instance. If this value is greater than or equal to the current cluster size, all + * nodes will be placed on on-demand instances. If this value is less than the current cluster + * size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will + * be placed on `availability` instances. Note that this value does not affect + * cluster size and cannot currently be mutated over the lifetime of a cluster. + */ + first_on_demand?: number; + /** + * Defines values necessary to configure and run Azure Log Analytics agent + */ + log_analytics_info?: { + /** + * + */ + log_analytics_primary_key?: string; + /** + * + */ + log_analytics_workspace_id?: string; + }; + /** + * The max bid price to be used for Azure spot instances. + * The Max price for the bid cannot be higher than the on-demand price of the instance. + * If not specified, the default value is -1, which specifies that the instance cannot be evicted + * on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1. + */ + spot_bid_max_price?: number; + }; + /** + * The configuration for delivering spark logs to a long-term storage destination. + * Only dbfs destinations are supported. Only one destination can be specified + * for one cluster. If the conf is given, the logs will be delivered to the destination every + * `5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while + * the destination of executor logs is `$destination/$clusterId/executor`. + * + */ + cluster_log_conf?: { + /** + * destination needs to be provided. e.g. + * `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }` + */ + dbfs?: { + /** + * dbfs destination, e.g. `dbfs:/my/path` + */ + destination?: string; + }; + /** + * destination and either the region or endpoint need to be provided. e.g. + * `{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` + * Cluster iam role is used to access s3, please make sure the cluster iam role in + * `instance_profile_arn` has permission to write data to the s3 destination. + */ + s3?: { + /** + * (Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`. + * If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on + * the destination bucket and prefix. The full list of possible canned acl can be found at + * http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl. + * Please also note that by default only the object owner gets full controls. If you are using cross account + * role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to + * read the logs. + */ + canned_acl?: string; + /** + * S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using + * cluster iam role, please make sure you set cluster iam role and the role has write access to the + * destination. Please also note that you cannot use AWS keys to deliver logs. + */ + destination?: string; + /** + * (Optional) Flag to enable server side encryption, `false` by default. + */ + enable_encryption?: boolean; + /** + * (Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when + * encryption is enabled and the default type is `sse-s3`. + */ + encryption_type?: string; + /** + * S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set. + * If both are set, endpoint will be used. + */ + endpoint?: string; + /** + * (Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`. + */ + kms_key?: string; + /** + * S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set, + * endpoint will be used. + */ + region?: string; + }; + }; + /** + * Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS + * instances and EBS volumes) with these tags in addition to `default_tags`. Notes: + * + * - Currently, Databricks allows at most 45 custom tags + * + * - Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags + */ + custom_tags?: { + [k: string]: string; + }; + /** + * The optional ID of the instance pool for the driver of the cluster belongs. + * The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not + * assigned. + */ + driver_instance_pool_id?: string; + /** + * The node type of the Spark driver. + * Note that this field is optional; if unset, the driver node type will be set as the same value + * as `node_type_id` defined above. + */ + driver_node_type_id?: string; + /** + * Attributes related to clusters running on Google Cloud Platform. + * If not specified at cluster creation, a set of default values will be used. + */ + gcp_attributes?: { + availability?: string; + /** + * boot disk size in GB + */ + boot_disk_size?: number; + /** + * If provided, the cluster will impersonate the google service account when accessing + * gcloud services (like GCS). The google service account + * must have previously been added to the Databricks environment by an account + * administrator. + */ + google_service_account?: string; + /** + * If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type. + */ + local_ssd_count?: number; + }; + /** + * The optional ID of the instance pool to which the cluster belongs. + */ + instance_pool_id?: string; + /** + * A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`. + */ + label?: string; + /** + * This field encodes, through a single value, the resources available to each of + * the Spark nodes in this cluster. For example, the Spark nodes can be provisioned + * and optimized for memory or compute intensive workloads. A list of available node + * types can be retrieved by using the :method:clusters/listNodeTypes API call. + * + */ + node_type_id?: string; + /** + * Number of worker nodes that this cluster should have. A cluster has one Spark Driver + * and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes. + * + * Note: When reading the properties of a cluster, this field reflects the desired number + * of workers rather than the actual current number of workers. For instance, if a cluster + * is resized from 5 to 10 workers, this field will immediately be updated to reflect + * the target size of 10 workers, whereas the workers listed in `spark_info` will gradually + * increase from 5 to 10 as the new nodes are provisioned. + */ + num_workers?: number; + /** + * The ID of the cluster policy used to create the cluster if applicable. + */ + policy_id?: string; + /** + * An object containing a set of optional, user-specified Spark configuration key-value pairs. + * See :method:clusters/create for more details. + * + */ + spark_conf?: { + [k: string]: string; + }; + /** + * An object containing a set of optional, user-specified environment variable key-value pairs. + * Please note that key-value pair of the form (X,Y) will be exported as is (i.e., + * `export X='Y'`) while launching the driver and workers. + * + * In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending + * them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all + * default databricks managed environmental variables are included as well. + * + * Example Spark environment variables: + * `{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or + * `{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}` + */ + spark_env_vars?: { + [k: string]: string; + }; + /** + * SSH public key contents that will be added to each Spark node in this cluster. The + * corresponding private keys can be used to login with the user name `ubuntu` on port `2200`. + * Up to 10 keys can be specified. + */ + ssh_public_keys?: string[]; + }[]; + /** + * String-String configuration for this pipeline execution. + */ + configuration?: { + [k: string]: string; + }; + /** + * Whether the pipeline is continuous or triggered. This replaces `trigger`. + */ + continuous?: boolean; + /** + * Whether the pipeline is in Development mode. Defaults to false. + */ + development?: boolean; + /** + * Pipeline product edition. + */ + edition?: string; + /** + * Filters on which Pipeline packages to include in the deployed graph. + */ + filters?: { + /** + * Paths to exclude. + */ + exclude?: string[]; + /** + * Paths to include. + */ + include?: string[]; + }; + /** + * Unique identifier for this pipeline. + */ + id?: string; + /** + * Libraries or code needed by this deployment. + */ + libraries?: { + /** + * The path to a file that defines a pipeline and is stored in the Databricks Repos. + * + */ + file?: { + /** + * The absolute path of the file. + */ + path?: string; + }; + /** + * URI of the jar to be installed. Currently only DBFS is supported. + * + */ + jar?: string; + /** + * Specification of a maven library to be installed. + * + */ + maven?: { + /** + * Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2". + */ + coordinates: string; + /** + * List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`. + * + * Maven dependency exclusions: + * https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html. + */ + exclusions?: string[]; + /** + * Maven repo to install the Maven package from. If omitted, both Maven Central Repository + * and Spark Packages are searched. + */ + repo?: string; + }; + /** + * The path to a notebook that defines a pipeline and is stored in the workspace. + * + */ + notebook?: { + /** + * The absolute path of the notebook. + */ + path?: string; + }; + }[]; + /** + * Friendly identifier for this pipeline. + */ + name?: string; + permissions?: { + group_name?: string; + level: string; + service_principal_name?: string; + user_name?: string; + }[]; + /** + * Whether Photon is enabled for this pipeline. + */ + photon?: boolean; + /** + * Whether serverless compute is enabled for this pipeline. + */ + serverless?: boolean; + /** + * DBFS root directory for storing checkpoints and tables. + */ + storage?: string; + /** + * Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + */ + target?: string; + /** + * Which pipeline trigger to use. Deprecated: Use `continuous` instead. + */ + trigger?: { + cron?: { + quartz_cron_schedule?: string; + timezone_id?: string; + }; + manual?: unknown; + }; + }; + }; + }; + run_as?: { + service_principal_name?: string; + user_name?: string; + }; + variables?: { + [k: string]: string; + }; + /** + * Configures which workspace to connect to and locations for files, state, and similar locations within the workspace file tree. + */ + workspace?: { + /** + * The remote path to synchronize build artifacts to. This defaults to `${workspace.root}/artifacts` + */ + artifact_path?: string; + auth_type?: string; + azure_client_id?: string; + /** + * Azure environment, one of (Public, UsGov, China, Germany). + */ + azure_environment?: string; + /** + * Azure Login Application ID. + */ + azure_login_app_id?: string; + azure_tenant_id?: string; + azure_use_msi?: boolean; + /** + * Azure Resource Manager ID for Azure Databricks workspace. + */ + azure_workspace_resource_id?: string; + client_id?: string; + /** + * The remote path to synchronize local files artifacts to. This defaults to `${workspace.root}/files` + */ + file_path?: string; + google_service_account?: string; + /** + * Host url of the workspace. + */ + host?: string; + /** + * Connection profile to use. By default profiles are specified in ~/.databrickscfg. + */ + profile?: string; + /** + * The base location for synchronizing files, artifacts and state. Defaults to `/Users/jane@doe.com/.bundle/${bundle.name}/${bundle.target}` + */ + root_path?: string; + /** + * The remote path to synchronize bundle state to. This defaults to `${workspace.root}/state` + */ + state_path?: string; + }; + }; + }; + variables?: { + [k: string]: { + default?: string; + description?: string; + }; + }; + /** + * Configures which workspace to connect to and locations for files, state, and similar locations within the workspace file tree. + */ + workspace?: { + /** + * The remote path to synchronize build artifacts to. This defaults to `${workspace.root}/artifacts` + */ + artifact_path?: string; + auth_type?: string; + azure_client_id?: string; + /** + * Azure environment, one of (Public, UsGov, China, Germany). + */ + azure_environment?: string; + /** + * Azure Login Application ID. + */ + azure_login_app_id?: string; + azure_tenant_id?: string; + azure_use_msi?: boolean; + /** + * Azure Resource Manager ID for Azure Databricks workspace. + */ + azure_workspace_resource_id?: string; + client_id?: string; + /** + * The remote path to synchronize local files artifacts to. This defaults to `${workspace.root}/files` + */ + file_path?: string; + google_service_account?: string; + /** + * Host url of the workspace. + */ + host?: string; + /** + * Connection profile to use. By default profiles are specified in ~/.databrickscfg. + */ + profile?: string; + /** + * The base location for synchronizing files, artifacts and state. Defaults to `/Users/jane@doe.com/.bundle/${bundle.name}/${bundle.target}` + */ + root_path?: string; + /** + * The remote path to synchronize bundle state to. This defaults to `${workspace.root}/state` + */ + state_path?: string; + }; +} diff --git a/packages/databricks-vscode/src/bundle/GenerateBundle.ts b/packages/databricks-vscode/src/bundle/GenerateBundle.ts deleted file mode 100644 index 55a948ef0..000000000 --- a/packages/databricks-vscode/src/bundle/GenerateBundle.ts +++ /dev/null @@ -1,46 +0,0 @@ -import {CliWrapper} from "../cli/CliWrapper"; -import {extensions, Uri} from "vscode"; -import path from "node:path"; - -export async function generateBundleSchema(cli: CliWrapper) { - // get freshly generated bundle schema - const bundleSchema = await cli.getBundleSchema(); - - // URI scheme for DABs JSON schemas - const dabsUriScheme = "dabs"; - - // URI for bundle root config json schema - const rootConfigSchemaUri = `${dabsUriScheme}:///root.json`; - - const extensionYaml = extensions.getExtension("redhat.vscode-yaml"); - if (extensionYaml) { - const redHatYamlSchemaApi = await extensionYaml.activate(); - - // We use the API exposed from teh activate() function of the redhat.vscode-yaml - // extension to registor a custom schema provider - redHatYamlSchemaApi.registerContributor( - "dabs", - (resource: string) => { - const validFileNames: string[] = [ - "databricks.yml", - "databricks.yaml", - "bundle.yml", - "bundle.yaml", - ]; - for (const name of validFileNames) { - if (path.basename(resource) === name) { - return rootConfigSchemaUri; - } - } - return undefined; - }, - (uri: string) => { - // Any JSON schemas with URI scheme = "dabs" resolves here - const parsedUri = Uri.parse(uri); - if (parsedUri.scheme === dabsUriScheme) { - return bundleSchema; - } - } - ); - } -} diff --git a/packages/databricks-vscode/src/bundle/bundleAutocompleteProvider.ts b/packages/databricks-vscode/src/bundle/bundleAutocompleteProvider.ts new file mode 100644 index 000000000..1cf24acc7 --- /dev/null +++ b/packages/databricks-vscode/src/bundle/bundleAutocompleteProvider.ts @@ -0,0 +1,65 @@ +import {CliWrapper} from "../cli/CliWrapper"; +import {ExtensionContext, extensions, Uri} from "vscode"; +import {BundleFileSet} from "./BundleFileSet"; +import {BundleWatcher} from "../file-managers/BundleWatcher"; + +export async function registerBundleAutocompleteProvider( + cli: CliWrapper, + bundleFileSet: BundleFileSet, + bundleWatcher: BundleWatcher, + context: ExtensionContext +) { + // get freshly generated bundle schema + const bundleSchema = await cli.getBundleSchema(); + + // URI scheme for DABs JSON schemas + const dabsUriScheme = "dabs"; + + // URI for bundle root config json schema + const rootConfigSchemaUri = `${dabsUriScheme}:///databricks-asset-bundles.json`; + + const extensionYaml = extensions.getExtension("redhat.vscode-yaml"); + if (extensionYaml) { + const redHatYamlSchemaApi = await extensionYaml.activate(); + + // We use the API exposed from teh activate() function of the redhat.vscode-yaml + // extension to registor a custom schema provider + let bundleFileList = await bundleFileSet.allFiles(); + context.subscriptions.push( + bundleWatcher.onDidChangeRootFile(async () => { + bundleFileList = await bundleFileSet.allFiles(); + }), + bundleWatcher.onDidCreate(async (e) => { + bundleFileList.push(e); + }), + bundleWatcher.onDidDelete(async (e) => { + const idx = bundleFileList.findIndex( + (v) => v.fsPath === e.fsPath + ); + if (idx !== -1) { + bundleFileList.splice(idx, 1); + } + }) + ); + redHatYamlSchemaApi.registerContributor( + "dabs", + (resource: string) => { + const resourceUri = Uri.parse(resource); + if ( + bundleFileList.find( + (i) => i.fsPath === resourceUri.fsPath + ) !== undefined + ) { + return rootConfigSchemaUri; + } + }, + (uri: string) => { + // Any JSON schemas with URI scheme = "dabs" resolves here + const parsedUri = Uri.parse(uri); + if (parsedUri.scheme === dabsUriScheme) { + return bundleSchema; + } + } + ); + } +} diff --git a/packages/databricks-vscode/src/bundle/types.ts b/packages/databricks-vscode/src/bundle/types.ts new file mode 100644 index 000000000..2a320a1c5 --- /dev/null +++ b/packages/databricks-vscode/src/bundle/types.ts @@ -0,0 +1,3 @@ +import {BundleSchema} from "./BundleSchema"; + +export type BundleTarget = Required["targets"][string]; diff --git a/packages/databricks-vscode/src/cli/DatabricksCliSyncParser.ts b/packages/databricks-vscode/src/cli/DatabricksCliSyncParser.ts index f87fd0f24..9d9efa3b3 100644 --- a/packages/databricks-vscode/src/cli/DatabricksCliSyncParser.ts +++ b/packages/databricks-vscode/src/cli/DatabricksCliSyncParser.ts @@ -99,6 +99,7 @@ export class DatabricksCliSyncParser { return; } } + this.syncStateCallback("ERROR", data); } private matchForErrors(line: string) { diff --git a/packages/databricks-vscode/src/configuration/BundleConfigReaderWriter.ts b/packages/databricks-vscode/src/configuration/BundleConfigReaderWriter.ts new file mode 100644 index 000000000..32b86a552 --- /dev/null +++ b/packages/databricks-vscode/src/configuration/BundleConfigReaderWriter.ts @@ -0,0 +1,210 @@ +import {Uri} from "vscode"; +import { + BundleFileSet, + parseBundleYaml, + writeBundleYaml, +} from "../bundle/BundleFileSet"; +import {BundleTarget} from "../bundle/types"; +import {Mutex} from "../locking"; +import {RemoteUri} from "../sync/SyncDestination"; +import {BundleConfigs, isBundleConfig} from "./types"; + +export class BundleConfigReaderWriter { + private readonly writeMutex = new Mutex(); + + private readonly writerMapping: Record< + keyof BundleConfigs, + (t: BundleTarget, v: any) => BundleTarget + > = { + clusterId: this.setClusterId, + authType: this.setAuthType, + mode: this.setMode, + host: this.setHost, + workspaceFsPath: this.setWorkspaceFsPath, + }; + + private readonly readerMapping: Record< + keyof BundleConfigs, + ( + t?: BundleTarget + ) => Promise + > = { + clusterId: this.getClusterId, + authType: this.getAuthType, + mode: this.getMode, + host: this.getHost, + workspaceFsPath: this.getWorkspaceFsPath, + }; + + constructor(private readonly bundleFileSet: BundleFileSet) {} + + public async getHost(target?: BundleTarget) { + return target?.workspace?.host; + } + public setHost(target: BundleTarget, value: BundleConfigs["host"]) { + target = {...target}; // create an explicit copy so as to not modify the original object + target.workspace = {...target.workspace, host: value}; + return target; + } + + public async getMode(target?: BundleTarget) { + return target?.mode; + } + public setMode(target: BundleTarget, value: BundleConfigs["mode"]) { + target = {...target}; + target.mode = value; + return target; + } + + public async getClusterId(target?: BundleTarget) { + return target?.compute_id; + } + public setClusterId( + target: BundleTarget, + value: BundleConfigs["clusterId"] + ) { + target = {...target}; + target.compute_id = value; + return target; + } + + public async getWorkspaceFsPath(target?: BundleTarget) { + const filePath = target?.workspace?.file_path; + return filePath ? new RemoteUri(filePath) : undefined; + } + public setWorkspaceFsPath( + target: BundleTarget, + value: BundleConfigs["workspaceFsPath"] + ) { + target = {...target}; + target.workspace = { + ...target.workspace, + // eslint-disable-next-line @typescript-eslint/naming-convention + file_path: value?.path, + }; + return target; + } + + public async getAuthType(target?: BundleTarget) { + return target?.workspace?.auth_type; + } + public setAuthType(target: BundleTarget, value: BundleConfigs["authType"]) { + target = {...target}; + target.workspace = { + ...target.workspace, + // eslint-disable-next-line @typescript-eslint/naming-convention + auth_type: value, + }; + return target; + } + + get targets() { + return this.bundleFileSet.bundleDataCache.value.then( + (data) => data?.targets + ); + } + + get defaultTarget() { + return this.targets.then((targets) => { + if (targets === undefined) { + return undefined; + } + const defaultTarget = Object.keys(targets).find( + (target) => targets[target].default + ); + return ( + defaultTarget ?? + Object.keys(targets).find( + (target) => targets[target].mode === "dev" + ) + ); + }); + } + + async getFileToWrite( + key: T, + target: string + ) { + const priorityList: {uri: Uri; priority: number}[] = []; + await this.bundleFileSet.forEach(async (data, file) => { + // try to find a file which has the config + if ( + (await this.readerMapping[key](data.targets?.[target])) !== + undefined + ) { + priorityList.push({ + uri: file, + priority: 1, + }); + return; + } + + // If no file has the config, try to find a file which has the target + if (data.targets?.[target] !== undefined) { + priorityList.push({ + uri: file, + priority: 2, + }); + return; + } + }); + priorityList.sort((a, b) => a.priority - b.priority); + + return priorityList.length > 0 ? priorityList[0].uri : undefined; + } + + /** + * Write the value to the bundle. This is silent (writes value to a bundle without prompting the user) + * @param key the key to write + * @param target the bundle target to write to + * @param value the value to write. If undefined the config is removed. + * @returns status of the write + */ + @Mutex.synchronise("writeMutex") + async write( + key: T, + target: string, + value?: BundleConfigs[T] + ) { + const file = await this.getFileToWrite(key, target); + if (file === undefined) { + return false; + } + const data = await parseBundleYaml(file); + const targetData = data.targets?.[target]; + if (targetData === undefined) { + return false; + } + + const newTargetData = this.writerMapping[key](targetData, value); + data.targets = {...data.targets, [target]: newTargetData}; + await writeBundleYaml(file, data); + + return true; + } + + /** + * Read the config from the bundle. + * @param key config key to reead + * @param target target to read from + * @returns value of the config + */ + async read(key: T, target: string) { + const targetObject = (await this.bundleFileSet.bundleDataCache.value) + .targets?.[target]; + return (await this.readerMapping[key](targetObject)) as + | BundleConfigs[T] + | undefined; + } + + async readAll(target: string) { + const configs = {} as any; + for (const key of Object.keys(this.readerMapping)) { + if (!isBundleConfig(key)) { + continue; + } + configs[key] = await this.read(key, target); + } + return configs as BundleConfigs; + } +} diff --git a/packages/databricks-vscode/src/configuration/ConfigModel.ts b/packages/databricks-vscode/src/configuration/ConfigModel.ts new file mode 100644 index 000000000..83a372129 --- /dev/null +++ b/packages/databricks-vscode/src/configuration/ConfigModel.ts @@ -0,0 +1,167 @@ +import {Disposable, EventEmitter, Uri} from "vscode"; +import { + BundleConfigs, + DatabricksConfigs, + isBundleConfig, + isOverrideableConfig, +} from "./types"; +import {ConfigOverrideReaderWriter} from "./ConfigOverrideReaderWriter"; +import {BundleConfigReaderWriter} from "./BundleConfigReaderWriter"; +import {Mutex} from "../locking"; +import {BundleWatcher} from "../file-managers/BundleWatcher"; +import {CachedValue} from "../locking/CachedValue"; +import {StateStorage} from "../vscode-objs/StateStorage"; + +function isDirectToBundleConfig( + key: keyof BundleConfigs, + mode?: BundleConfigs["mode"] +) { + const directToBundleConfigs: (keyof BundleConfigs)[] = []; + if (mode !== undefined) { + // filter by mode + } + return directToBundleConfigs.includes(key); +} + +const defaults: DatabricksConfigs = { + mode: "dev", +}; +/** + * In memory view of the databricks configs loaded from overrides and bundle. + */ +export class ConfigModel implements Disposable { + private disposables: Disposable[] = []; + + private readonly configsMutex = new Mutex(); + private readonly configCache = new CachedValue( + async (oldValue) => { + if (this.target === undefined) { + return {}; + } + const overrides = this.overrideReaderWriter.readAll(this.target); + const bundleConfigs = await this.bundleConfigReaderWriter.readAll( + this.target + ); + const newValue = {...bundleConfigs, ...overrides}; + + if (JSON.stringify(oldValue) !== JSON.stringify(newValue)) { + this.onDidChangeEmitter.fire(); + } + + return newValue; + } + ); + + private readonly onDidChangeEmitter = new EventEmitter(); + public readonly onDidChange = this.onDidChangeEmitter.event; + + private _target: string | undefined; + + constructor( + public readonly overrideReaderWriter: ConfigOverrideReaderWriter, + public readonly bundleConfigReaderWriter: BundleConfigReaderWriter, + private readonly stateStorage: StateStorage, + private readonly bundleWatcher: BundleWatcher + ) { + this.disposables.push( + this.overrideReaderWriter.onDidChange(async () => { + await this.configCache.invalidate(); + //try to access the value to trigger cache update and onDidChange event + this.configCache.value; + }), + this.bundleWatcher.onDidChange(async () => { + await this.readTarget(); + await this.configCache.invalidate(); + //try to access the value to trigger cache update and onDidChange event + this.configCache.value; + }) + ); + } + + public async readTarget() { + const targets = Object.keys( + (await this.bundleConfigReaderWriter.targets) ?? {} + ); + if (targets.includes(this.target ?? "")) { + return; + } + + let savedTarget: string | undefined; + await this.configsMutex.synchronise(async () => { + savedTarget = this.stateStorage.get("databricks.bundle.target"); + + if (savedTarget !== undefined && targets.includes(savedTarget)) { + return; + } + savedTarget = await this.bundleConfigReaderWriter.defaultTarget; + }); + await this.setTarget(savedTarget); + } + + public get target() { + return this._target; + } + + public async setTarget(target: string | undefined) { + if (target === this._target) { + return; + } + + await this.configsMutex.synchronise(async () => { + this._target = target; + await this.stateStorage.set("databricks.bundle.target", target); + }); + await this.configCache.invalidate(); + this.onDidChangeEmitter.fire(); + } + + public async get( + key: T + ): Promise { + return (await this.configCache.value)[key] ?? defaults[key]; + } + + @Mutex.synchronise("configsMutex") + public async set( + key: T, + value?: DatabricksConfigs[T], + handleInteractiveWrite?: (file: Uri | undefined) => any + ): Promise { + // We work with 1 set of configs throughout the function. + // No changes to the cache can happen when the global mutex is held. + // The assumption is that user doesn't change the target mode in the middle of + // writing a new config. + const {mode} = {...(await this.configCache.value)}; + + if (this.target === undefined) { + return false; + } + if (isOverrideableConfig(key)) { + return this.overrideReaderWriter.write(key, this.target, value); + } else if (isBundleConfig(key)) { + const isInteractive = handleInteractiveWrite !== undefined; + + // write to bundle if not interactive and the config can be safely written to bundle + if (!isInteractive && isDirectToBundleConfig(key, mode)) { + return await this.bundleConfigReaderWriter.write( + key, + this.target, + value + ); + } + + if (isInteractive) { + const file = await this.bundleConfigReaderWriter.getFileToWrite( + key, + this.target + ); + handleInteractiveWrite(file); + } + } + return true; + } + + dispose() { + this.disposables.forEach((d) => d.dispose()); + } +} diff --git a/packages/databricks-vscode/src/configuration/ConfigOverrideReaderWriter.ts b/packages/databricks-vscode/src/configuration/ConfigOverrideReaderWriter.ts new file mode 100644 index 000000000..c76f308fa --- /dev/null +++ b/packages/databricks-vscode/src/configuration/ConfigOverrideReaderWriter.ts @@ -0,0 +1,55 @@ +import {EventEmitter} from "vscode"; +import {Mutex} from "../locking"; +import {StateStorage} from "../vscode-objs/StateStorage"; +import {OverrideableConfigs} from "./types"; + +export class ConfigOverrideReaderWriter { + private writeMutex = new Mutex(); + private onDidChangeEmitter = new EventEmitter(); + public readonly onDidChange = this.onDidChangeEmitter.event; + + constructor(private readonly storage: StateStorage) {} + + /** + * Write the config as an override to the bundle. + * @param key the key to write + * @param target the bundle target to write to + * @param value the value to write. If undefined, the override is removed. + * @returns status of the write + */ + @Mutex.synchronise("writeMutex") + async write( + key: T, + target: string, + value?: OverrideableConfigs[T] + ) { + const data = this.storage.get("databricks.bundle.overrides"); + if (data[target]?.[key] !== undefined) { + if (data[target] === undefined) { + data[target] = {}; + } + data[target][key] = value; + await this.storage.set("databricks.bundle.overrides", data); + this.onDidChangeEmitter.fire(); + } + return true; + } + + /** + * Read the value from storage overrides. + * @param key the key to read + * @param target the bundle target to read from + * @returns the value id override + */ + + async readAll(target: string) { + return this.storage.get("databricks.bundle.overrides")[target]; + } + + async read( + key: T, + target: string + ): Promise { + return this.storage.get("databricks.bundle.overrides")[target]?.[key]; + } +} diff --git a/packages/databricks-vscode/src/configuration/ConnectionManager.ts b/packages/databricks-vscode/src/configuration/ConnectionManager.ts index afae2fb22..f7c20d363 100644 --- a/packages/databricks-vscode/src/configuration/ConnectionManager.ts +++ b/packages/databricks-vscode/src/configuration/ConnectionManager.ts @@ -142,7 +142,7 @@ export class ConnectionManager { } } - if (!(await projectConfigFile.authProvider.check(true))) { + if (!(await projectConfigFile.authProvider.check())) { throw new Error( `Can't login with ${projectConfigFile.authProvider.describe()}.` ); @@ -277,7 +277,7 @@ export class ConnectionManager { return; } - if (!(await config.authProvider.check(false))) { + if (!(await config.authProvider.check())) { return; } diff --git a/packages/databricks-vscode/src/configuration/auth/AuthProvider.ts b/packages/databricks-vscode/src/configuration/auth/AuthProvider.ts index a9ec660e6..66ae81b6f 100644 --- a/packages/databricks-vscode/src/configuration/auth/AuthProvider.ts +++ b/packages/databricks-vscode/src/configuration/auth/AuthProvider.ts @@ -3,7 +3,9 @@ import { Config, ProductVersion, WorkspaceClient, + logging, } from "@databricks/databricks-sdk"; +import {window} from "vscode"; import {normalizeHost} from "../../utils/urlUtils"; import {workspaceConfigs} from "../../vscode-objs/WorkspaceConfigs"; @@ -13,6 +15,7 @@ const extensionVersion = require("../../../package.json") import {AzureCliCheck} from "./AzureCliCheck"; import {DatabricksCliCheck} from "./DatabricksCliCheck"; +import {Loggers} from "../../logger"; // TODO: Resolve this with SDK's AuthType. export type AuthType = "azure-cli" | "google-id" | "databricks-cli" | "profile"; @@ -47,10 +50,12 @@ export abstract class AuthProvider { }); } - // eslint-disable-next-line @typescript-eslint/no-unused-vars - async check(silent: boolean): Promise { - return true; - } + /** + * Check if the currently selected auth method can be used to login to Databricks. + * This function should not throw an error and each implementing class must + * handle it's own error messages and retry loops. + */ + abstract check(): Promise; protected abstract getSdkConfig(): Config; @@ -129,6 +134,25 @@ export class ProfileAuthProvider extends AuthProvider { env: {}, }); } + + async check() { + try { + const workspaceClient = this.getWorkspaceClient(); + await workspaceClient.currentUser.me(); + return true; + } catch (e) { + let message: string = `Can't login with config profile ${this.profile}`; + if (e instanceof Error) { + message = `Can't login with config profile ${this.profile}: ${e.message}`; + } + logging.NamedLogger.getOrCreate(Loggers.Extension).error( + message, + e + ); + window.showErrorMessage(message); + return false; + } + } } export class DatabricksCliAuthProvider extends AuthProvider { @@ -167,9 +191,9 @@ export class DatabricksCliAuthProvider extends AuthProvider { }; } - async check(silent: boolean): Promise { + async check(): Promise { const databricksCliCheck = new DatabricksCliCheck(this); - return databricksCliCheck.check(silent); + return databricksCliCheck.check(); } } @@ -225,9 +249,9 @@ export class AzureCliAuthProvider extends AuthProvider { return envVars; } - async check(silent: boolean): Promise { + async check(): Promise { const cliCheck = new AzureCliCheck(this); - const result = await cliCheck.check(silent); + const result = await cliCheck.check(); this._tenantId = cliCheck.tenantId; this._appId = cliCheck.azureLoginAppId; return result; diff --git a/packages/databricks-vscode/src/configuration/auth/AzureCliCheck.ts b/packages/databricks-vscode/src/configuration/auth/AzureCliCheck.ts index 52e85751f..051343e96 100644 --- a/packages/databricks-vscode/src/configuration/auth/AzureCliCheck.ts +++ b/packages/databricks-vscode/src/configuration/auth/AzureCliCheck.ts @@ -46,7 +46,7 @@ export class AzureCliCheck implements Disposable { this.disposables = []; } - public async check(silent = false): Promise { + public async check(): Promise { this.tenantId = this.authProvider.tenantId; let loginAttempts = 0; @@ -156,11 +156,12 @@ export class AzureCliCheck implements Disposable { message = e.message; } + NamedLogger.getOrCreate(Loggers.Extension).error(message, e); window.showErrorMessage(message); return false; } - if (result && !silent) { + if (result) { window.showInformationMessage( "Databricks: Successfully logged in with Azure CLI" ); diff --git a/packages/databricks-vscode/src/configuration/auth/DatabricksCliCheck.ts b/packages/databricks-vscode/src/configuration/auth/DatabricksCliCheck.ts index a190b7086..278dfa688 100644 --- a/packages/databricks-vscode/src/configuration/auth/DatabricksCliCheck.ts +++ b/packages/databricks-vscode/src/configuration/auth/DatabricksCliCheck.ts @@ -2,10 +2,12 @@ import { ExecUtils, ProductVersion, WorkspaceClient, + logging, } from "@databricks/databricks-sdk"; import {Disposable, window} from "vscode"; import {DatabricksCliAuthProvider} from "./AuthProvider"; import {orchestrate, OrchestrationLoopError, Step} from "./orchestrate"; +import {Loggers} from "../../logger"; // eslint-disable-next-line @typescript-eslint/no-var-requires const extensionVersion = require("../../../package.json") @@ -23,7 +25,7 @@ export class DatabricksCliCheck implements Disposable { this.disposables = []; } - async check(silent: boolean): Promise { + async check(): Promise { const steps: Record> = { tryLogin: async () => { if (await this.tryLogin()) { @@ -55,12 +57,15 @@ export class DatabricksCliCheck implements Disposable { } else { message = e.message; } - + logging.NamedLogger.getOrCreate(Loggers.Extension).error( + message, + e + ); window.showErrorMessage(message); return false; } - if (result && !silent) { + if (result) { window.showInformationMessage( "Databricks: Successfully logged in with Databricks CLI" ); diff --git a/packages/databricks-vscode/src/configuration/auth/orchestrate.ts b/packages/databricks-vscode/src/configuration/auth/orchestrate.ts index 1f262f0d4..022b8a347 100644 --- a/packages/databricks-vscode/src/configuration/auth/orchestrate.ts +++ b/packages/databricks-vscode/src/configuration/auth/orchestrate.ts @@ -42,7 +42,7 @@ export async function orchestrate( throw new OrchestrationLoopError(); } const result: StepResult = await steps[step](); - logger?.info(`Azire CLI check: ${step}`, result); + logger?.info(`Auth check: ${step}`, result); if (result.type === "error") { throw result.error; diff --git a/packages/databricks-vscode/src/configuration/types.ts b/packages/databricks-vscode/src/configuration/types.ts new file mode 100644 index 000000000..cd77c79a0 --- /dev/null +++ b/packages/databricks-vscode/src/configuration/types.ts @@ -0,0 +1,47 @@ +import {RemoteUri} from "../sync/SyncDestination"; + +export type DatabricksConfigs = { + host?: string; + + // reconcile with actual mode and auth type enums from bundle + mode?: "dev" | "staging" | "prod"; + authType?: string; + + clusterId?: string; + workspaceFsPath?: RemoteUri; +}; + +export const OVERRIDEABLE_CONFIGS = [ + "clusterId", + "authType", + "workspaceFsPath", +] as const; + +export type OverrideableConfigs = Pick< + DatabricksConfigs, + (typeof OVERRIDEABLE_CONFIGS)[number] +>; + +export const BUNDLE_CONFIGS = [ + "clusterId", + "authType", + "workspaceFsPath", + "mode", + "host", +] as const; + +/** These are configs which can be loaded from the bundle */ +export type BundleConfigs = Pick< + DatabricksConfigs, + (typeof BUNDLE_CONFIGS)[number] +>; + +export function isOverrideableConfig( + key: any +): key is keyof OverrideableConfigs { + return OVERRIDEABLE_CONFIGS.includes(key); +} + +export function isBundleConfig(key: any): key is keyof BundleConfigs { + return BUNDLE_CONFIGS.includes(key); +} diff --git a/packages/databricks-vscode/src/extension.ts b/packages/databricks-vscode/src/extension.ts index 5893a1619..4085ba61d 100644 --- a/packages/databricks-vscode/src/extension.ts +++ b/packages/databricks-vscode/src/extension.ts @@ -32,7 +32,7 @@ import { WorkspaceFsCommands, WorkspaceFsDataProvider, } from "./workspace-fs"; -import {generateBundleSchema} from "./bundle/GenerateBundle"; +import {registerBundleAutocompleteProvider} from "./bundle/bundleAutocompleteProvider"; import {CustomWhenContext} from "./vscode-objs/CustomWhenContext"; import {StateStorage} from "./vscode-objs/StateStorage"; import path from "node:path"; @@ -50,6 +50,8 @@ import {DbConnectStatusBarButton} from "./language/DbConnectStatusBarButton"; import {NotebookAccessVerifier} from "./language/notebooks/NotebookAccessVerifier"; import {NotebookInitScriptManager} from "./language/notebooks/NotebookInitScriptManager"; import {showRestartNotebookDialogue} from "./language/notebooks/restartNotebookDialogue"; +import {BundleWatcher} from "./file-managers/BundleWatcher"; +import {BundleFileSet} from "./bundle/BundleFileSet"; import {showWhatsNewPopup} from "./whatsNewPopup"; export async function activate( @@ -541,9 +543,18 @@ export async function activate( }) ); + const bundleFileSet = new BundleFileSet(workspace.workspaceFolders[0].uri); + const bundleFileWatcher = new BundleWatcher(bundleFileSet); + context.subscriptions.push(bundleFileWatcher); + // generate a json schema for bundle root and load a custom provider into // redhat.vscode-yaml extension to validate bundle config files with this schema - generateBundleSchema(cli).catch((e) => { + registerBundleAutocompleteProvider( + cli, + bundleFileSet, + bundleFileWatcher, + context + ).catch((e) => { logging.NamedLogger.getOrCreate("Extension").error( "Failed to load bundle schema: ", e diff --git a/packages/databricks-vscode/src/file-managers/BundleWatcher.ts b/packages/databricks-vscode/src/file-managers/BundleWatcher.ts new file mode 100644 index 000000000..2df088f03 --- /dev/null +++ b/packages/databricks-vscode/src/file-managers/BundleWatcher.ts @@ -0,0 +1,72 @@ +import {Disposable, EventEmitter, Uri, workspace} from "vscode"; +import {BundleFileSet} from "../bundle/BundleFileSet"; +import {WithMutex} from "../locking"; +import path from "path"; + +export class BundleWatcher implements Disposable { + private disposables: Disposable[] = []; + + private readonly _onDidChange = new EventEmitter(); + public readonly onDidChange = this._onDidChange.event; + + private readonly _onDidChangeRootFile = new EventEmitter(); + public readonly onDidChangeRootFile = this._onDidChangeRootFile.event; + + private readonly _onDidCreate = new EventEmitter(); + public readonly onDidCreate = this._onDidCreate.event; + + private readonly _onDidDelete = new EventEmitter(); + public readonly onDidDelete = this._onDidDelete.event; + + private bundleFileSet: WithMutex; + + constructor(bundleFileSet: BundleFileSet) { + this.bundleFileSet = new WithMutex(bundleFileSet); + const yamlWatcher = workspace.createFileSystemWatcher( + this.bundleFileSet.value.getAbsolutePath( + path.join("**", "*.{yaml,yml}") + ).fsPath + ); + + this.disposables.push( + yamlWatcher, + yamlWatcher.onDidCreate((e) => { + this.yamlFileChangeHandler(e, "CREATE"); + }), + yamlWatcher.onDidChange((e) => { + this.yamlFileChangeHandler(e, "CHANGE"); + }), + yamlWatcher.onDidDelete((e) => { + this.yamlFileChangeHandler(e, "DELETE"); + }) + ); + } + + private async yamlFileChangeHandler( + e: Uri, + type: "CREATE" | "CHANGE" | "DELETE" + ) { + if (!(await this.bundleFileSet.value.isBundleFile(e))) { + return; + } + + await this.bundleFileSet.value.bundleDataCache.invalidate(); + this._onDidChange.fire(); + // to provide additional granularity, we also fire an event when the root bundle file changes + if (this.bundleFileSet.value.isRootBundleFile(e)) { + this._onDidChangeRootFile.fire(); + } + switch (type) { + case "CREATE": + this._onDidCreate.fire(e); + break; + case "DELETE": + this._onDidDelete.fire(e); + break; + } + } + + dispose() { + this.disposables.forEach((i) => i.dispose()); + } +} diff --git a/packages/databricks-vscode/src/file-managers/ProjectConfigFile.test.ts b/packages/databricks-vscode/src/file-managers/ProjectConfigFile.test.ts index 5c72f8606..2bfb37d21 100644 --- a/packages/databricks-vscode/src/file-managers/ProjectConfigFile.test.ts +++ b/packages/databricks-vscode/src/file-managers/ProjectConfigFile.test.ts @@ -75,7 +75,6 @@ describe(__filename, () => { const configFile = ProjectConfigFile.getProjectConfigFilePath(tempDir); await mkdir(path.dirname(configFile), {recursive: true}); - console.log(configFile); const config = { profile: "testProfile", workspacePath: "workspacePath", diff --git a/packages/databricks-vscode/src/locking/CachedValue.test.ts b/packages/databricks-vscode/src/locking/CachedValue.test.ts new file mode 100644 index 000000000..79eed40a7 --- /dev/null +++ b/packages/databricks-vscode/src/locking/CachedValue.test.ts @@ -0,0 +1,42 @@ +import {expect} from "chai"; +import {CachedValue} from "./CachedValue"; + +describe(__filename, () => { + class GetterSpy { + value: string = "test"; + callCount: number = 0; + get(): Promise { + this.callCount += 1; + return Promise.resolve(this.value); + } + } + + let getterSpy: GetterSpy; + let st: CachedValue; + + beforeEach(() => { + getterSpy = new GetterSpy(); + st = new CachedValue(getterSpy.get.bind(getterSpy)); + }); + + it("should use getter to lazily fetch value initially", async () => { + expect(await st.value).to.equal("test"); + expect(getterSpy.callCount).to.equal(1); + }); + + it("should use cached value if not dirty", async () => { + expect(await st.value).to.equal("test"); + expect(await st.value).to.equal("test"); + expect(getterSpy.callCount).to.equal(1); + }); + + it("should use getter if dirty", async () => { + expect(await st.value).to.equal("test"); + getterSpy.value = "test2"; + expect(getterSpy.callCount).to.equal(1); + + await st.invalidate(); + expect(await st.value).to.equal("test2"); + expect(getterSpy.callCount).to.equal(2); + }); +}); diff --git a/packages/databricks-vscode/src/locking/CachedValue.ts b/packages/databricks-vscode/src/locking/CachedValue.ts new file mode 100644 index 000000000..4a21891af --- /dev/null +++ b/packages/databricks-vscode/src/locking/CachedValue.ts @@ -0,0 +1,32 @@ +import {Mutex} from "."; + +export class CachedValue { + private _value: T | null = null; + private _dirty = true; + private readonly mutex = new Mutex(); + + constructor(private readonly getter: (value: T | null) => Promise) {} + + get value(): Promise { + if (this._dirty || this._value === null) { + return this.mutex + .wait() + .then(async () => { + this._value = await this.getter(this._value); + this._dirty = false; + return this._value; + }) + .finally(() => { + this.mutex.signal(); + }); + } + + return Promise.resolve(this._value); + } + + async invalidate() { + await this.mutex.wait(); + this._dirty = true; + this.mutex.signal(); + } +} diff --git a/packages/databricks-vscode/src/locking/Mutex.ts b/packages/databricks-vscode/src/locking/Mutex.ts index 7a30e0b18..3c64af3e8 100644 --- a/packages/databricks-vscode/src/locking/Mutex.ts +++ b/packages/databricks-vscode/src/locking/Mutex.ts @@ -24,4 +24,32 @@ export class Mutex { get locked() { return this._locked; } + + async synchronise(fn: () => Promise) { + await this.wait(); + try { + await fn(); + } finally { + this.signal(); + } + } + + static synchronise(mutexKey: string) { + return function ( + target: any, + key: string, + descriptor: PropertyDescriptor + ) { + const original = descriptor.value; + descriptor.value = async function (...args: any[]) { + const mutex = (this as any)[mutexKey] as Mutex; + await mutex.wait(); + try { + return await original.apply(this, args); + } finally { + mutex.signal(); + } + }; + }; + } } diff --git a/packages/databricks-vscode/src/locking/index.ts b/packages/databricks-vscode/src/locking/index.ts index ac662fb58..834516389 100644 --- a/packages/databricks-vscode/src/locking/index.ts +++ b/packages/databricks-vscode/src/locking/index.ts @@ -1,2 +1,9 @@ +import {Mutex} from "./Mutex"; + export * from "./Mutex"; export * from "./RWLock"; + +export class WithMutex { + public readonly mutex = new Mutex(); + constructor(public value: T) {} +} diff --git a/packages/databricks-vscode/src/vscode-objs/StateStorage.ts b/packages/databricks-vscode/src/vscode-objs/StateStorage.ts index b0a72c855..b453d7f0f 100644 --- a/packages/databricks-vscode/src/vscode-objs/StateStorage.ts +++ b/packages/databricks-vscode/src/vscode-objs/StateStorage.ts @@ -1,5 +1,6 @@ import {randomUUID} from "crypto"; import {ExtensionContext} from "vscode"; +import {OverrideableConfigs} from "../configuration/types"; /* eslint-disable @typescript-eslint/naming-convention */ type KeyInfo = { @@ -16,8 +17,11 @@ function withType() { } const Keys = { - "databricks.clusterId": withType()({ + "databricks.bundle.overrides": withType<{ + [k: string]: OverrideableConfigs; + }>()({ location: "workspace", + defaultValue: {}, }), "databricks.wsfs.skipSwitchToWorkspace": withType()({ @@ -69,6 +73,10 @@ const Keys = { }, }), + "databricks.bundle.target": withType()({ + location: "workspace", + }), + "databricks.lastInstalledExtensionVersion": withType()({ location: "workspace", defaultValue: "0.0.0", diff --git a/yarn.lock b/yarn.lock index 1ef960e18..39c1cf33a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -249,6 +249,18 @@ __metadata: languageName: node linkType: hard +"@bcherny/json-schema-ref-parser@npm:10.0.5-fork": + version: 10.0.5-fork + resolution: "@bcherny/json-schema-ref-parser@npm:10.0.5-fork" + dependencies: + "@jsdevtools/ono": ^7.1.3 + "@types/json-schema": ^7.0.6 + call-me-maybe: ^1.0.1 + js-yaml: ^4.1.0 + checksum: e90eb3655c4e15f54ebc5138baac98471d159e3a253b484416c03c2d43f5c3bc80a4d6fe18acd71f77bf2f95f7fbc36730abb21cbd1f9d80a6af630c554e6d62 + languageName: node + linkType: hard + "@colors/colors@npm:1.5.0": version: 1.5.0 resolution: "@colors/colors@npm:1.5.0" @@ -487,7 +499,7 @@ __metadata: languageName: node linkType: hard -"@eslint-community/eslint-utils@npm:^4.2.0, @eslint-community/eslint-utils@npm:^4.3.0": +"@eslint-community/eslint-utils@npm:^4.2.0, @eslint-community/eslint-utils@npm:^4.3.0, @eslint-community/eslint-utils@npm:^4.4.0": version: 4.4.0 resolution: "@eslint-community/eslint-utils@npm:4.4.0" dependencies: @@ -886,6 +898,13 @@ __metadata: languageName: node linkType: hard +"@jsdevtools/ono@npm:^7.1.3": + version: 7.1.3 + resolution: "@jsdevtools/ono@npm:7.1.3" + checksum: 2297fcd472ba810bffe8519d2249171132844c7174f3a16634f9260761c8c78bc0428a4190b5b6d72d45673c13918ab9844d706c3ed4ef8f62ab11a2627a08ad + languageName: node + linkType: hard + "@lukeed/ms@npm:^2.0.1": version: 2.0.1 resolution: "@lukeed/ms@npm:2.0.1" @@ -1245,6 +1264,23 @@ __metadata: languageName: node linkType: hard +"@types/eslint@npm:^8.44.6": + version: 8.44.6 + resolution: "@types/eslint@npm:8.44.6" + dependencies: + "@types/estree": "*" + "@types/json-schema": "*" + checksum: ed8de582ab3dbd7ec0bf97d41f4f3de28dd8a37fc48bc423e1c406bbb70d1fd8c4175ba17ad6495ef9ef99a43df71421277b7a2a0355097489c4c4cf6bb266ff + languageName: node + linkType: hard + +"@types/estree@npm:*": + version: 1.0.3 + resolution: "@types/estree@npm:1.0.3" + checksum: f21a5448995f8aa61ab2248d10590d275666b11d26c27fe75b3c23420b07b469d5ce820deefcf7399671faa09d56eb7ce012322948e484d94686fda154be5221 + languageName: node + linkType: hard + "@types/fs-extra@npm:^11.0.1": version: 11.0.1 resolution: "@types/fs-extra@npm:11.0.1" @@ -1255,6 +1291,16 @@ __metadata: languageName: node linkType: hard +"@types/glob@npm:^7.1.3": + version: 7.2.0 + resolution: "@types/glob@npm:7.2.0" + dependencies: + "@types/minimatch": "*" + "@types/node": "*" + checksum: 6ae717fedfdfdad25f3d5a568323926c64f52ef35897bcac8aca8e19bc50c0bd84630bbd063e5d52078b2137d8e7d3c26eabebd1a2f03ff350fff8a91e79fc19 + languageName: node + linkType: hard + "@types/http-cache-semantics@npm:^4.0.1": version: 4.0.1 resolution: "@types/http-cache-semantics@npm:4.0.1" @@ -1294,6 +1340,13 @@ __metadata: languageName: node linkType: hard +"@types/json-schema@npm:*, @types/json-schema@npm:^7.0.12": + version: 7.0.14 + resolution: "@types/json-schema@npm:7.0.14" + checksum: 4b3dd99616c7c808201c56f6c7f6552eb67b5c0c753ab3fa03a6cb549aae950da537e9558e53fa65fba23d1be624a1e4e8d20c15027efbe41e03ca56f2b04fb0 + languageName: node + linkType: hard + "@types/json-schema@npm:^7.0.11": version: 7.0.12 resolution: "@types/json-schema@npm:7.0.12" @@ -1301,6 +1354,13 @@ __metadata: languageName: node linkType: hard +"@types/json-schema@npm:^7.0.6": + version: 7.0.13 + resolution: "@types/json-schema@npm:7.0.13" + checksum: 345df21a678fa72fb389f35f33de77833d09d4a142bb2bcb27c18690efa4cf70fc2876e43843cefb3fbdb9fcb12cd3e970a90936df30f53bbee899865ff605ab + languageName: node + linkType: hard + "@types/json5@npm:^0.0.29": version: 0.0.29 resolution: "@types/json5@npm:0.0.29" @@ -1326,6 +1386,20 @@ __metadata: languageName: node linkType: hard +"@types/lodash@npm:^4.14.182, @types/lodash@npm:^4.14.199": + version: 4.14.199 + resolution: "@types/lodash@npm:4.14.199" + checksum: e68d1fcbbfce953ed87b296a628573f62939227bcda0c934954e862b421e8a34c5e71cad6fea27b9980567909e6a4698f09025692958e36d64ea9ed99ec6fb2e + languageName: node + linkType: hard + +"@types/minimatch@npm:*": + version: 5.1.2 + resolution: "@types/minimatch@npm:5.1.2" + checksum: 0391a282860c7cb6fe262c12b99564732401bdaa5e395bee9ca323c312c1a0f45efbf34dce974682036e857db59a5c9b1da522f3d6055aeead7097264c8705a8 + languageName: node + linkType: hard + "@types/minimist@npm:^1.2.0": version: 1.2.2 resolution: "@types/minimist@npm:1.2.2" @@ -1333,13 +1407,20 @@ __metadata: languageName: node linkType: hard -"@types/mocha@npm:^10.0.0, @types/mocha@npm:^10.0.1": +"@types/mocha@npm:^10.0.0": version: 10.0.1 resolution: "@types/mocha@npm:10.0.1" checksum: 224ea9fce7b1734ccdb9aa99a622d902a538ce1847bca7fd22c5fb38adcf3ed536f50f48f587085db988a4bb3c2eb68f4b98e1cd6a38bc5547bd3bbbedc54495 languageName: node linkType: hard +"@types/mocha@npm:^10.0.2": + version: 10.0.2 + resolution: "@types/mocha@npm:10.0.2" + checksum: a78a02691f102beb02f9ec435458107d21b518fc477c3b2f37c90b8e70b67bff888351715ae173bd31ede25ee5e0d688aefb0faf4284034d08ba63027c8b0c01 + languageName: node + linkType: hard + "@types/mock-require@npm:^2.0.1": version: 2.0.1 resolution: "@types/mock-require@npm:2.0.1" @@ -1393,6 +1474,13 @@ __metadata: languageName: node linkType: hard +"@types/prettier@npm:^2.6.1": + version: 2.7.3 + resolution: "@types/prettier@npm:2.7.3" + checksum: 705384209cea6d1433ff6c187c80dcc0b95d99d5c5ce21a46a9a58060c527973506822e428789d842761e0280d25e3359300f017fbe77b9755bc772ab3dc2f83 + languageName: node + linkType: hard + "@types/responselike@npm:^1.0.0": version: 1.0.0 resolution: "@types/responselike@npm:1.0.0" @@ -1409,7 +1497,7 @@ __metadata: languageName: node linkType: hard -"@types/semver@npm:^7.5.1": +"@types/semver@npm:^7.5.0, @types/semver@npm:^7.5.1": version: 7.5.4 resolution: "@types/semver@npm:7.5.4" checksum: 120c0189f6fec5f2d12d0d71ac8a4cfa952dc17fa3d842e8afddb82bba8828a4052f8799c1653e2b47ae1977435f38e8985658fde971905ce5afb8e23ee97ecf @@ -1556,6 +1644,16 @@ __metadata: languageName: node linkType: hard +"@typescript-eslint/scope-manager@npm:6.9.0": + version: 6.9.0 + resolution: "@typescript-eslint/scope-manager@npm:6.9.0" + dependencies: + "@typescript-eslint/types": 6.9.0 + "@typescript-eslint/visitor-keys": 6.9.0 + checksum: b7ddcea11bdb95107659800bdf3b33eae22a4dc5c28dc0f8dc5507aa9affaae0e332b6d8c7d5286a7ec75e7c4abd211eb9fdf9647a9a796689cdcc11f6ab40c6 + languageName: node + linkType: hard + "@typescript-eslint/type-utils@npm:6.0.0": version: 6.0.0 resolution: "@typescript-eslint/type-utils@npm:6.0.0" @@ -1580,6 +1678,13 @@ __metadata: languageName: node linkType: hard +"@typescript-eslint/types@npm:6.9.0": + version: 6.9.0 + resolution: "@typescript-eslint/types@npm:6.9.0" + checksum: e0444afa1f2ebca746c72b3d0bf95982eb1e8b4fb91bcba465c1345c35fa13b36c589bfd91c776b864f223bc50817b2613df5892185c2e34332bf4cc57cc865d + languageName: node + linkType: hard + "@typescript-eslint/typescript-estree@npm:6.0.0": version: 6.0.0 resolution: "@typescript-eslint/typescript-estree@npm:6.0.0" @@ -1598,6 +1703,24 @@ __metadata: languageName: node linkType: hard +"@typescript-eslint/typescript-estree@npm:6.9.0": + version: 6.9.0 + resolution: "@typescript-eslint/typescript-estree@npm:6.9.0" + dependencies: + "@typescript-eslint/types": 6.9.0 + "@typescript-eslint/visitor-keys": 6.9.0 + debug: ^4.3.4 + globby: ^11.1.0 + is-glob: ^4.0.3 + semver: ^7.5.4 + ts-api-utils: ^1.0.1 + peerDependenciesMeta: + typescript: + optional: true + checksum: 51088c23cca608a6e5c195b0a2d8a17ad00ca47199ba4df0c1013912a80194bff9f5bd4d035d6ab2596788491e9a3e04bbf6cad6494a3b1bbd59fea442750268 + languageName: node + linkType: hard + "@typescript-eslint/utils@npm:6.0.0": version: 6.0.0 resolution: "@typescript-eslint/utils@npm:6.0.0" @@ -1616,6 +1739,23 @@ __metadata: languageName: node linkType: hard +"@typescript-eslint/utils@npm:^6.9.0": + version: 6.9.0 + resolution: "@typescript-eslint/utils@npm:6.9.0" + dependencies: + "@eslint-community/eslint-utils": ^4.4.0 + "@types/json-schema": ^7.0.12 + "@types/semver": ^7.5.0 + "@typescript-eslint/scope-manager": 6.9.0 + "@typescript-eslint/types": 6.9.0 + "@typescript-eslint/typescript-estree": 6.9.0 + semver: ^7.5.4 + peerDependencies: + eslint: ^7.0.0 || ^8.0.0 + checksum: 973c24d7858f224934958ee58c21ff21dfe54dbb1d0e0c5f889298fadcd7ee2dbfd49cf86ccafab74d428c31de66cd9beee7c39d2b64f9edcc9e941573bac175 + languageName: node + linkType: hard + "@typescript-eslint/visitor-keys@npm:6.0.0": version: 6.0.0 resolution: "@typescript-eslint/visitor-keys@npm:6.0.0" @@ -1626,6 +1766,16 @@ __metadata: languageName: node linkType: hard +"@typescript-eslint/visitor-keys@npm:6.9.0": + version: 6.9.0 + resolution: "@typescript-eslint/visitor-keys@npm:6.9.0" + dependencies: + "@typescript-eslint/types": 6.9.0 + eslint-visitor-keys: ^3.4.1 + checksum: de8e2e363df41e5ae9774a5ebd1c49d29c771ea8b3869917f65a74cd4d14a67417c79916f456ee81ef5b0d947b7b8975385fc6eea3f1812d53a2eaaea832459e + languageName: node + linkType: hard + "@vscode/debugadapter@npm:^1.61.0": version: 1.61.0 resolution: "@vscode/debugadapter@npm:1.61.0" @@ -2207,6 +2357,13 @@ __metadata: languageName: node linkType: hard +"any-promise@npm:^1.0.0": + version: 1.3.0 + resolution: "any-promise@npm:1.3.0" + checksum: 0ee8a9bdbe882c90464d75d1f55cf027f5458650c4bd1f0467e65aec38ccccda07ca5844969ee77ed46d04e7dded3eaceb027e8d32f385688523fe305fa7e1de + languageName: node + linkType: hard + "anymatch@npm:~3.1.2": version: 3.1.3 resolution: "anymatch@npm:3.1.3" @@ -2680,6 +2837,13 @@ __metadata: languageName: node linkType: hard +"call-me-maybe@npm:^1.0.1": + version: 1.0.2 + resolution: "call-me-maybe@npm:1.0.2" + checksum: 42ff2d0bed5b207e3f0122589162eaaa47ba618f79ad2382fe0ba14d9e49fbf901099a6227440acc5946f86a4953e8aa2d242b330b0a5de4d090bb18f8935cae + languageName: node + linkType: hard + "callsites@npm:^3.0.0": version: 3.1.0 resolution: "callsites@npm:3.1.0" @@ -2907,6 +3071,19 @@ __metadata: languageName: node linkType: hard +"cli-color@npm:^2.0.2": + version: 2.0.3 + resolution: "cli-color@npm:2.0.3" + dependencies: + d: ^1.0.1 + es5-ext: ^0.10.61 + es6-iterator: ^2.0.3 + memoizee: ^0.4.15 + timers-ext: ^0.1.7 + checksum: b1c5f3d0ec29cbe22be7a01d90bd0cfa080ffed6f1c321ea20ae3f10c6041f0e411e28ee2b98025945bee3548931deed1ae849b53c21b523ba74efef855cd73d + languageName: node + linkType: hard + "cli-cursor@npm:^3.1.0": version: 3.1.0 resolution: "cli-cursor@npm:3.1.0" @@ -3414,6 +3591,16 @@ __metadata: languageName: node linkType: hard +"d@npm:1, d@npm:^1.0.1": + version: 1.0.1 + resolution: "d@npm:1.0.1" + dependencies: + es5-ext: ^0.10.50 + type: ^1.0.1 + checksum: 49ca0639c7b822db670de93d4fbce44b4aa072cd848c76292c9978a8cd0fff1028763020ff4b0f147bd77bfe29b4c7f82e0f71ade76b2a06100543cdfd948d19 + languageName: node + linkType: hard + "dargs@npm:^7.0.0": version: 7.0.0 resolution: "dargs@npm:7.0.0" @@ -3431,8 +3618,10 @@ __metadata: "@sinonjs/fake-timers": ^11.2.2 "@types/bcryptjs": ^2.4.2 "@types/chai": ^4.3.5 + "@types/eslint": ^8.44.6 "@types/fs-extra": ^11.0.1 - "@types/mocha": ^10.0.1 + "@types/lodash": ^4.14.199 + "@types/mocha": ^10.0.2 "@types/mock-require": ^2.0.1 "@types/node": ^20.4.2 "@types/sinonjs__fake-timers": ^8.1.2 @@ -3442,6 +3631,7 @@ __metadata: "@types/yargs": ^17.0.24 "@typescript-eslint/eslint-plugin": ^6.0.0 "@typescript-eslint/parser": ^6.0.0 + "@typescript-eslint/utils": ^6.9.0 "@vscode/debugadapter": ^1.61.0 "@vscode/extension-telemetry": ^0.9.0 "@vscode/test-electron": ^2.3.3 @@ -3457,8 +3647,10 @@ __metadata: chai: ^4.3.7 esbuild: ^0.19.4 eslint: ^8.51.0 + eslint-plugin-local-rules: ^2.0.0 fs-extra: ^11.1.1 - glob: ^10.3.3 + glob: ^10.3.10 + json-schema-to-typescript: ^13.1.1 mocha: ^10.2.0 mock-require: ^3.0.3 nyc: ^15.1.0 @@ -4087,6 +4279,17 @@ __metadata: languageName: node linkType: hard +"es5-ext@npm:^0.10.35, es5-ext@npm:^0.10.46, es5-ext@npm:^0.10.50, es5-ext@npm:^0.10.53, es5-ext@npm:^0.10.61, es5-ext@npm:~0.10.14, es5-ext@npm:~0.10.2, es5-ext@npm:~0.10.46": + version: 0.10.62 + resolution: "es5-ext@npm:0.10.62" + dependencies: + es6-iterator: ^2.0.3 + es6-symbol: ^3.1.3 + next-tick: ^1.1.0 + checksum: 25f42f6068cfc6e393cf670bc5bba249132c5f5ec2dd0ed6e200e6274aca2fed8e9aec8a31c76031744c78ca283c57f0b41c7e737804c6328c7b8d3fbcba7983 + languageName: node + linkType: hard + "es6-error@npm:^4.0.1": version: 4.1.1 resolution: "es6-error@npm:4.1.1" @@ -4094,6 +4297,39 @@ __metadata: languageName: node linkType: hard +"es6-iterator@npm:^2.0.3": + version: 2.0.3 + resolution: "es6-iterator@npm:2.0.3" + dependencies: + d: 1 + es5-ext: ^0.10.35 + es6-symbol: ^3.1.1 + checksum: 6e48b1c2d962c21dee604b3d9f0bc3889f11ed5a8b33689155a2065d20e3107e2a69cc63a71bd125aeee3a589182f8bbcb5c8a05b6a8f38fa4205671b6d09697 + languageName: node + linkType: hard + +"es6-symbol@npm:^3.1.1, es6-symbol@npm:^3.1.3": + version: 3.1.3 + resolution: "es6-symbol@npm:3.1.3" + dependencies: + d: ^1.0.1 + ext: ^1.1.2 + checksum: cd49722c2a70f011eb02143ef1c8c70658d2660dead6641e160b94619f408b9cf66425515787ffe338affdf0285ad54f4eae30ea5bd510e33f8659ec53bcaa70 + languageName: node + linkType: hard + +"es6-weak-map@npm:^2.0.3": + version: 2.0.3 + resolution: "es6-weak-map@npm:2.0.3" + dependencies: + d: 1 + es5-ext: ^0.10.46 + es6-iterator: ^2.0.3 + es6-symbol: ^3.1.1 + checksum: 19ca15f46d50948ce78c2da5f21fb5b1ef45addd4fe17b5df952ff1f2a3d6ce4781249bc73b90995257264be2a98b2ec749bb2aba0c14b5776a1154178f9c927 + languageName: node + linkType: hard + "esbuild@npm:^0.19.4": version: 0.19.4 resolution: "esbuild@npm:0.19.4" @@ -4224,6 +4460,13 @@ __metadata: languageName: node linkType: hard +"eslint-plugin-local-rules@npm:^2.0.0": + version: 2.0.0 + resolution: "eslint-plugin-local-rules@npm:2.0.0" + checksum: 597a052a93de1fe9e9bfc7187db43f8ed66ceaf0e12bb759a931fe02607da42a70d1e1029bef2ee6ac221ad205ab63c18827e375ae5e88061bf7f24325479c11 + languageName: node + linkType: hard + "eslint-plugin-no-only-tests@npm:^3.1.0": version: 3.1.0 resolution: "eslint-plugin-no-only-tests@npm:3.1.0" @@ -4390,6 +4633,16 @@ __metadata: languageName: node linkType: hard +"event-emitter@npm:^0.3.5": + version: 0.3.5 + resolution: "event-emitter@npm:0.3.5" + dependencies: + d: 1 + es5-ext: ~0.10.14 + checksum: 27c1399557d9cd7e0aa0b366c37c38a4c17293e3a10258e8b692a847dd5ba9fb90429c3a5a1eeff96f31f6fa03ccbd31d8ad15e00540b22b22f01557be706030 + languageName: node + linkType: hard + "event-target-shim@npm:^5.0.0": version: 5.0.1 resolution: "event-target-shim@npm:5.0.1" @@ -4501,6 +4754,15 @@ __metadata: languageName: node linkType: hard +"ext@npm:^1.1.2": + version: 1.7.0 + resolution: "ext@npm:1.7.0" + dependencies: + type: ^2.7.2 + checksum: ef481f9ef45434d8c867cfd09d0393b60945b7c8a1798bedc4514cb35aac342ccb8d8ecb66a513e6a2b4ec1e294a338e3124c49b29736f8e7c735721af352c31 + languageName: node + linkType: hard + "external-editor@npm:^3.0.3": version: 3.1.0 resolution: "external-editor@npm:3.1.0" @@ -5101,6 +5363,13 @@ __metadata: languageName: node linkType: hard +"get-stdin@npm:^8.0.0": + version: 8.0.0 + resolution: "get-stdin@npm:8.0.0" + checksum: 40128b6cd25781ddbd233344f1a1e4006d4284906191ed0a7d55ec2c1a3e44d650f280b2c9eeab79c03ac3037da80257476c0e4e5af38ddfb902d6ff06282d77 + languageName: node + linkType: hard + "get-stream@npm:3.0.0, get-stream@npm:^3.0.0": version: 3.0.0 resolution: "get-stream@npm:3.0.0" @@ -5212,6 +5481,17 @@ __metadata: languageName: node linkType: hard +"glob-promise@npm:^4.2.2": + version: 4.2.2 + resolution: "glob-promise@npm:4.2.2" + dependencies: + "@types/glob": ^7.1.3 + peerDependencies: + glob: ^7.1.6 + checksum: c1a3d95f7c8393e4151d4899ec4e42bb2e8237160f840ad1eccbe9247407da8b6c13e28f463022e011708bc40862db87b9b77236d35afa3feb8aa86d518f2dfe + languageName: node + linkType: hard + "glob@npm:7.2.0": version: 7.2.0 resolution: "glob@npm:7.2.0" @@ -5241,18 +5521,18 @@ __metadata: languageName: node linkType: hard -"glob@npm:^10.3.3": - version: 10.3.3 - resolution: "glob@npm:10.3.3" +"glob@npm:^10.3.10": + version: 10.3.10 + resolution: "glob@npm:10.3.10" dependencies: foreground-child: ^3.1.0 - jackspeak: ^2.0.3 + jackspeak: ^2.3.5 minimatch: ^9.0.1 minipass: ^5.0.0 || ^6.0.2 || ^7.0.0 path-scurry: ^1.10.1 bin: - glob: dist/cjs/src/bin.js - checksum: 29190d3291f422da0cb40b77a72fc8d2c51a36524e99b8bf412548b7676a6627489528b57250429612b6eec2e6fe7826d328451d3e694a9d15e575389308ec53 + glob: dist/esm/bin.mjs + checksum: 4f2fe2511e157b5a3f525a54092169a5f92405f24d2aed3142f4411df328baca13059f4182f1db1bf933e2c69c0bd89e57ae87edd8950cba8c7ccbe84f721cf3 languageName: node linkType: hard @@ -6056,6 +6336,13 @@ __metadata: languageName: node linkType: hard +"is-promise@npm:^2.2.2": + version: 2.2.2 + resolution: "is-promise@npm:2.2.2" + checksum: 18bf7d1c59953e0ad82a1ed963fb3dc0d135c8f299a14f89a17af312fc918373136e56028e8831700e1933519630cc2fd4179a777030330fde20d34e96f40c78 + languageName: node + linkType: hard + "is-regex@npm:^1.1.4": version: 1.1.4 resolution: "is-regex@npm:1.1.4" @@ -6347,6 +6634,19 @@ __metadata: languageName: node linkType: hard +"jackspeak@npm:^2.3.5": + version: 2.3.6 + resolution: "jackspeak@npm:2.3.6" + dependencies: + "@isaacs/cliui": ^8.0.2 + "@pkgjs/parseargs": ^0.11.0 + dependenciesMeta: + "@pkgjs/parseargs": + optional: true + checksum: 57d43ad11eadc98cdfe7496612f6bbb5255ea69fe51ea431162db302c2a11011642f50cfad57288bd0aea78384a0612b16e131944ad8ecd09d619041c8531b54 + languageName: node + linkType: hard + "jake@npm:^10.8.5": version: 10.8.5 resolution: "jake@npm:10.8.5" @@ -6490,6 +6790,30 @@ __metadata: languageName: node linkType: hard +"json-schema-to-typescript@npm:^13.1.1": + version: 13.1.1 + resolution: "json-schema-to-typescript@npm:13.1.1" + dependencies: + "@bcherny/json-schema-ref-parser": 10.0.5-fork + "@types/json-schema": ^7.0.11 + "@types/lodash": ^4.14.182 + "@types/prettier": ^2.6.1 + cli-color: ^2.0.2 + get-stdin: ^8.0.0 + glob: ^7.1.6 + glob-promise: ^4.2.2 + is-glob: ^4.0.3 + lodash: ^4.17.21 + minimist: ^1.2.6 + mkdirp: ^1.0.4 + mz: ^2.7.0 + prettier: ^2.6.2 + bin: + json2ts: dist/src/cli.js + checksum: e6d894741703c27e0fae4a30b860fdd063439b78b5f98175203a6fe0c1152e2f6a5c4be34fbb2bc460ca50e14d8514cb3da3979d4f912bf2c6fada2f97b551b8 + languageName: node + linkType: hard + "json-schema-traverse@npm:^0.4.1": version: 0.4.1 resolution: "json-schema-traverse@npm:0.4.1" @@ -6956,6 +7280,15 @@ __metadata: languageName: node linkType: hard +"lru-queue@npm:^0.1.0": + version: 0.1.0 + resolution: "lru-queue@npm:0.1.0" + dependencies: + es5-ext: ~0.10.2 + checksum: 7f2c53c5e7f2de20efb6ebb3086b7aea88d6cf9ae91ac5618ece974122960c4e8ed04988e81d92c3e63d60b12c556b14d56ef7a9c5a4627b23859b813e39b1a2 + languageName: node + linkType: hard + "make-dir@npm:^1.0.0": version: 1.3.0 resolution: "make-dir@npm:1.3.0" @@ -7058,6 +7391,22 @@ __metadata: languageName: node linkType: hard +"memoizee@npm:^0.4.15": + version: 0.4.15 + resolution: "memoizee@npm:0.4.15" + dependencies: + d: ^1.0.1 + es5-ext: ^0.10.53 + es6-weak-map: ^2.0.3 + event-emitter: ^0.3.5 + is-promise: ^2.2.2 + lru-queue: ^0.1.0 + next-tick: ^1.1.0 + timers-ext: ^0.1.7 + checksum: 4065d94416dbadac56edf5947bf342beca0e9f051f33ad60d7c4baf3f6ca0f3c6fdb770c5caed5a89c0ceaf9121428582f396445d591785281383d60aa883418 + languageName: node + linkType: hard + "meow@npm:^8.1.2": version: 8.1.2 resolution: "meow@npm:8.1.2" @@ -7458,6 +7807,17 @@ __metadata: languageName: node linkType: hard +"mz@npm:^2.7.0": + version: 2.7.0 + resolution: "mz@npm:2.7.0" + dependencies: + any-promise: ^1.0.0 + object-assign: ^4.0.1 + thenify-all: ^1.0.0 + checksum: 8427de0ece99a07e9faed3c0c6778820d7543e3776f9a84d22cf0ec0a8eb65f6e9aee9c9d353ff9a105ff62d33a9463c6ca638974cc652ee8140cd1e35951c87 + languageName: node + linkType: hard + "nanoid@npm:3.3.3": version: 3.3.3 resolution: "nanoid@npm:3.3.3" @@ -7502,6 +7862,13 @@ __metadata: languageName: node linkType: hard +"next-tick@npm:1, next-tick@npm:^1.1.0": + version: 1.1.0 + resolution: "next-tick@npm:1.1.0" + checksum: 83b5cf36027a53ee6d8b7f9c0782f2ba87f4858d977342bfc3c20c21629290a2111f8374d13a81221179603ffc4364f38374b5655d17b6a8f8a8c77bdea4fe8b + languageName: node + linkType: hard + "node-abi@npm:^3.3.0": version: 3.35.0 resolution: "node-abi@npm:3.35.0" @@ -8350,6 +8717,15 @@ __metadata: languageName: node linkType: hard +"prettier@npm:^2.6.2": + version: 2.8.8 + resolution: "prettier@npm:2.8.8" + bin: + prettier: bin-prettier.js + checksum: b49e409431bf129dd89238d64299ba80717b57ff5a6d1c1a8b1a28b590d998a34e083fa13573bc732bb8d2305becb4c9a4407f8486c81fa7d55100eb08263cf8 + languageName: node + linkType: hard + "prettier@npm:^3.0.0": version: 3.0.0 resolution: "prettier@npm:3.0.0" @@ -9764,6 +10140,24 @@ __metadata: languageName: node linkType: hard +"thenify-all@npm:^1.0.0": + version: 1.6.0 + resolution: "thenify-all@npm:1.6.0" + dependencies: + thenify: ">= 3.1.0 < 4" + checksum: dba7cc8a23a154cdcb6acb7f51d61511c37a6b077ec5ab5da6e8b874272015937788402fd271fdfc5f187f8cb0948e38d0a42dcc89d554d731652ab458f5343e + languageName: node + linkType: hard + +"thenify@npm:>= 3.1.0 < 4": + version: 3.3.1 + resolution: "thenify@npm:3.3.1" + dependencies: + any-promise: ^1.0.0 + checksum: 84e1b804bfec49f3531215f17b4a6e50fd4397b5f7c1bccc427b9c656e1ecfb13ea79d899930184f78bc2f57285c54d9a50a590c8868f4f0cef5c1d9f898b05e + languageName: node + linkType: hard + "thread-stream@npm:^2.0.0": version: 2.3.0 resolution: "thread-stream@npm:2.3.0" @@ -9797,6 +10191,16 @@ __metadata: languageName: node linkType: hard +"timers-ext@npm:^0.1.7": + version: 0.1.7 + resolution: "timers-ext@npm:0.1.7" + dependencies: + es5-ext: ~0.10.46 + next-tick: 1 + checksum: ef3f27a0702a88d885bcbb0317c3e3ecd094ce644da52e7f7d362394a125d9e3578292a8f8966071a980d8abbc3395725333b1856f3ae93835b46589f700d938 + languageName: node + linkType: hard + "tiny-lru@npm:^11.0.1": version: 11.0.1 resolution: "tiny-lru@npm:11.0.1" @@ -10089,6 +10493,20 @@ __metadata: languageName: node linkType: hard +"type@npm:^1.0.1": + version: 1.2.0 + resolution: "type@npm:1.2.0" + checksum: dae8c64f82c648b985caf321e9dd6e8b7f4f2e2d4f846fc6fd2c8e9dc7769382d8a52369ddbaccd59aeeceb0df7f52fb339c465be5f2e543e81e810e413451ee + languageName: node + linkType: hard + +"type@npm:^2.7.2": + version: 2.7.2 + resolution: "type@npm:2.7.2" + checksum: 0f42379a8adb67fe529add238a3e3d16699d95b42d01adfe7b9a7c5da297f5c1ba93de39265ba30ffeb37dfd0afb3fb66ae09f58d6515da442219c086219f6f4 + languageName: node + linkType: hard + "typed-rest-client@npm:^1.8.4": version: 1.8.9 resolution: "typed-rest-client@npm:1.8.9"