diff --git a/default_python/requirements-dev.txt b/default_python/requirements-dev.txt index 40e79bf..35ca1e1 100644 --- a/default_python/requirements-dev.txt +++ b/default_python/requirements-dev.txt @@ -3,6 +3,9 @@ ## For defining dependencies used by jobs in Databricks Workflows, see ## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html +## Add code completion support for DLT +databricks-dlt + ## pytest is the default package used for testing pytest diff --git a/default_python/scratch/exploration.ipynb b/default_python/scratch/exploration.ipynb index 85c9640..516f1ec 100644 --- a/default_python/scratch/exploration.ipynb +++ b/default_python/scratch/exploration.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, { "cell_type": "code", "execution_count": null, @@ -21,7 +31,7 @@ "sys.path.append('../src')\n", "from default_python import main\n", "\n", - "main.get_taxis().show(10)" + "main.get_taxis(spark).show(10)" ] } ], @@ -41,8 +51,16 @@ "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "3.11.4" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" } }, "nbformat": 4, diff --git a/default_python/src/default_python/main.py b/default_python/src/default_python/main.py index 48a80b0..9f662fa 100644 --- a/default_python/src/default_python/main.py +++ b/default_python/src/default_python/main.py @@ -1,11 +1,12 @@ from pyspark.sql import SparkSession -def get_taxis(): - spark = SparkSession.builder.getOrCreate() +def get_taxis(spark: SparkSession): return spark.read.table("samples.nyctaxi.trips") def main(): - get_taxis().show(5) + from databricks.connect import DatabricksSession as SparkSession + spark = SparkSession.builder.getOrCreate() + get_taxis(spark).show(5) if __name__ == '__main__': main() diff --git a/default_python/src/dlt_pipeline.ipynb b/default_python/src/dlt_pipeline.ipynb index 718160e..f277274 100644 --- a/default_python/src/dlt_pipeline.ipynb +++ b/default_python/src/dlt_pipeline.ipynb @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": {}, @@ -33,14 +33,13 @@ "source": [ "# Import DLT and src/default_python\n", "import dlt\n", - "import sys\n", "from pyspark.sql.functions import expr\n", "from default_python import main" ] }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": {}, @@ -54,7 +53,7 @@ "source": [ "@dlt.view\n", "def taxi_raw():\n", - " return main.get_taxis()\n", + " return main.get_taxis(spark)\n", "\n", "@dlt.table\n", "def filtered_taxis():\n", @@ -79,7 +78,7 @@ }, "language_info": { "name": "python", - "version": "3.11.4" + "version": "3.11.6" } }, "nbformat": 4, diff --git a/default_python/src/notebook.ipynb b/default_python/src/notebook.ipynb index b3886a5..cf50ed7 100644 --- a/default_python/src/notebook.ipynb +++ b/default_python/src/notebook.ipynb @@ -19,7 +19,17 @@ }, { "cell_type": "code", - "execution_count": 0, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "application/vnd.databricks.v1+cell": { "cellMetadata": { @@ -36,7 +46,7 @@ "source": [ "from default_python import main\n", "\n", - "main.get_taxis().show(10)" + "main.get_taxis(spark).show(10)" ] } ], @@ -56,8 +66,16 @@ "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "version": "3.11.4" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" } }, "nbformat": 4, diff --git a/default_python/tests/main_test.py b/default_python/tests/main_test.py index 9c20d7a..682c40b 100644 --- a/default_python/tests/main_test.py +++ b/default_python/tests/main_test.py @@ -1,14 +1,15 @@ -from databricks.connect import DatabricksSession -from pyspark.sql import SparkSession +from databricks.connect import DatabricksSession as SparkSession +from pytest import fixture from default_python import main +from pytest import fixture -# Create a new Databricks Connect session. If this fails, -# check that you have configured Databricks Connect correctly. -# See https://docs.databricks.com/dev-tools/databricks-connect.html. +@fixture(scope="session") +def spark(): + spark = SparkSession.builder.getOrCreate() + yield spark + spark.stop() -SparkSession.builder = DatabricksSession.builder -SparkSession.builder.getOrCreate() -def test_main(): - taxis = main.get_taxis() +def test_main(spark: SparkSession): + taxis = main.get_taxis(spark) assert taxis.count() > 5