databricks · pietern · Aug 9, 2024 · Aug 9, 2024 · shreyas-goenka · Oct 16, 2024
diff --git a/knowledge_base/sbt-example/.gitignore b/knowledge_base/sbt-example/.gitignore
@@ -0,0 +1,4 @@
+/.bsp/
+target/
+.databricks
+.vscode
diff --git a/knowledge_base/sbt-example/README.md b/knowledge_base/sbt-example/README.md
@@ -0,0 +1,33 @@
+# sbt example
+
+This example demonstrates how to build a Scala JAR with [sbt](https://www.scala-sbt.org/) and use it from a job.
+
+## Prerequisites
+
+* Databricks CLI v0.226.0 or above
+* [sbt](https://www.scala-sbt.org/) v1.10.1 or above
+
+## Usage
+
+Update the `host` field under `workspace` in `databricks.yml` to the Databricks workspace you wish to deploy to.
+
+Update the `artifact_path` field under `workspace` in `databricks.yml` to the Unity Catalog Volume path where the JAR artifact needs to be deployed.
+
+Run `databricks bundle deploy` to deploy the job.
+
+Run `databricks bundle run spark_jar_job` to run the job.
+
+Example output:
+
+```
+% databricks bundle run example_job
+Run URL: https://...
+
+2024-08-09 15:49:17 "Example running a Scala JAR built with sbt" TERMINATED SUCCESS
++-----+
+| word|
++-----+
+|Hello|
+|World|
++-----+
+```
diff --git a/knowledge_base/sbt-example/build.sbt b/knowledge_base/sbt-example/build.sbt
@@ -0,0 +1,10 @@
+name := "sbt-example"
+
+version := "0.1.0-SNAPSHOT"
+
+scalaVersion := "2.12.19"
+
+libraryDependencies ++= Seq(
+  "org.apache.spark" %% "spark-core" % "3.5.0",
+  "org.apache.spark" %% "spark-sql" % "3.5.0"
+)
diff --git a/knowledge_base/sbt-example/databricks.yml b/knowledge_base/sbt-example/databricks.yml
@@ -0,0 +1,28 @@
+bundle:
+  name: sbt_example
+
+include:
+  - ./resources/job.yml
+
+workspace:
+  host: https://myworkspace.cloud.databricks.com
+
+  # JARs must be stored in a Unity Catalog Volume.
+  # Uncomment the line below and replace the path with the path to your Unity Catalog Volume.
+  #
+  # artifact_path: /Volumes/my_catalog/my_schema/my_volume/some_path
+
+artifacts:
+  sbt_example:
+    type: jar
+    build: sbt package
+    files:
+      - source: ./target/scala-2.12/sbt-example*.jar
+
+permissions:
+  - group_name: users
+    level: CAN_VIEW
+
+targets:
+  dev:
+    default: true
diff --git a/knowledge_base/sbt-example/project/build.properties b/knowledge_base/sbt-example/project/build.properties
@@ -0,0 +1 @@
+sbt.version=1.10.1
diff --git a/knowledge_base/sbt-example/resources/job.yml b/knowledge_base/sbt-example/resources/job.yml
@@ -0,0 +1,27 @@
+resources:
+  jobs:
+    example_job:
+      name: "Example running a Scala JAR built with sbt"
+
+      tasks:
+        - task_key: task
+
+          spark_jar_task:
+            main_class_name: SparkApp
+
+          libraries:
+            - jar: ../target/scala-2.12/sbt-example*.jar
+
+          new_cluster:
+            node_type_id: i3.xlarge
+            spark_version: 15.4.x-scala2.12
+            num_workers: 0
+            spark_conf:
+                "spark.databricks.cluster.profile": "singleNode"
+                "spark.master": "local[*, 4]"
+            custom_tags:
+                "ResourceClass": "SingleNode"
+
+            # The cluster must run in single user isolation mode.
+            # This means it is compatible with Unity Catalog and can access Unity Catalog Volumes.
+            data_security_mode: SINGLE_USER
diff --git a/knowledge_base/sbt-example/src/main/scala/example/SparkApp.scala b/knowledge_base/sbt-example/src/main/scala/example/SparkApp.scala
@@ -0,0 +1,12 @@
+import org.apache.spark.sql.SparkSession
+
+object SparkApp {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession.builder().getOrCreate()
+
+    import spark.implicits._
+
+    val data = Seq("Hello", "World").toDF("word")
+    data.show()
+  }
+}