diff --git a/knowledge_base/sbt-example/.gitignore b/knowledge_base/sbt-example/.gitignore new file mode 100644 index 0000000..27fb80b --- /dev/null +++ b/knowledge_base/sbt-example/.gitignore @@ -0,0 +1,4 @@ +/.bsp/ +target/ +.databricks +.vscode diff --git a/knowledge_base/sbt-example/README.md b/knowledge_base/sbt-example/README.md new file mode 100644 index 0000000..eb5452d --- /dev/null +++ b/knowledge_base/sbt-example/README.md @@ -0,0 +1,33 @@ +# sbt example + +This example demonstrates how to build a Scala JAR with [sbt](https://www.scala-sbt.org/) and use it from a job. + +## Prerequisites + +* Databricks CLI v0.226.0 or above +* [sbt](https://www.scala-sbt.org/) v1.10.1 or above + +## Usage + +Update the `host` field under `workspace` in `databricks.yml` to the Databricks workspace you wish to deploy to. + +Update the `artifact_path` field under `workspace` in `databricks.yml` to the Unity Catalog Volume path where the JAR artifact needs to be deployed. + +Run `databricks bundle deploy` to deploy the job. + +Run `databricks bundle run spark_jar_job` to run the job. + +Example output: + +``` +% databricks bundle run example_job +Run URL: https://... + +2024-08-09 15:49:17 "Example running a Scala JAR built with sbt" TERMINATED SUCCESS ++-----+ +| word| ++-----+ +|Hello| +|World| ++-----+ +``` diff --git a/knowledge_base/sbt-example/build.sbt b/knowledge_base/sbt-example/build.sbt new file mode 100644 index 0000000..69ca324 --- /dev/null +++ b/knowledge_base/sbt-example/build.sbt @@ -0,0 +1,10 @@ +name := "sbt-example" + +version := "0.1.0-SNAPSHOT" + +scalaVersion := "2.12.19" + +libraryDependencies ++= Seq( + "org.apache.spark" %% "spark-core" % "3.5.0", + "org.apache.spark" %% "spark-sql" % "3.5.0" +) diff --git a/knowledge_base/sbt-example/databricks.yml b/knowledge_base/sbt-example/databricks.yml new file mode 100644 index 0000000..1ee830f --- /dev/null +++ b/knowledge_base/sbt-example/databricks.yml @@ -0,0 +1,28 @@ +bundle: + name: sbt_example + +include: + - ./resources/job.yml + +workspace: + host: https://myworkspace.cloud.databricks.com + + # JARs must be stored in a Unity Catalog Volume. + # Uncomment the line below and replace the path with the path to your Unity Catalog Volume. + # + # artifact_path: /Volumes/my_catalog/my_schema/my_volume/some_path + +artifacts: + sbt_example: + type: jar + build: sbt package + files: + - source: ./target/scala-2.12/sbt-example*.jar + +permissions: + - group_name: users + level: CAN_VIEW + +targets: + dev: + default: true diff --git a/knowledge_base/sbt-example/project/build.properties b/knowledge_base/sbt-example/project/build.properties new file mode 100644 index 0000000..ee4c672 --- /dev/null +++ b/knowledge_base/sbt-example/project/build.properties @@ -0,0 +1 @@ +sbt.version=1.10.1 diff --git a/knowledge_base/sbt-example/resources/job.yml b/knowledge_base/sbt-example/resources/job.yml new file mode 100644 index 0000000..c59966c --- /dev/null +++ b/knowledge_base/sbt-example/resources/job.yml @@ -0,0 +1,27 @@ +resources: + jobs: + example_job: + name: "Example running a Scala JAR built with sbt" + + tasks: + - task_key: task + + spark_jar_task: + main_class_name: SparkApp + + libraries: + - jar: ../target/scala-2.12/sbt-example*.jar + + new_cluster: + node_type_id: i3.xlarge + spark_version: 15.4.x-scala2.12 + num_workers: 0 + spark_conf: + "spark.databricks.cluster.profile": "singleNode" + "spark.master": "local[*, 4]" + custom_tags: + "ResourceClass": "SingleNode" + + # The cluster must run in single user isolation mode. + # This means it is compatible with Unity Catalog and can access Unity Catalog Volumes. + data_security_mode: SINGLE_USER diff --git a/knowledge_base/sbt-example/src/main/scala/example/SparkApp.scala b/knowledge_base/sbt-example/src/main/scala/example/SparkApp.scala new file mode 100644 index 0000000..d589888 --- /dev/null +++ b/knowledge_base/sbt-example/src/main/scala/example/SparkApp.scala @@ -0,0 +1,12 @@ +import org.apache.spark.sql.SparkSession + +object SparkApp { + def main(args: Array[String]): Unit = { + val spark = SparkSession.builder().getOrCreate() + + import spark.implicits._ + + val data = Seq("Hello", "World").toDF("word") + data.show() + } +}