Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inner dev loop fixes #18

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions default_python/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
## For defining dependencies used by jobs in Databricks Workflows, see
## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html

## Add code completion support for DLT
databricks-dlt
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we have this, would it be possible/desirable to enable python.analysis.typeCheckingMode"?


## pytest is the default package used for testing
pytest

Expand Down
22 changes: 20 additions & 2 deletions default_python/scratch/exploration.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -21,7 +31,7 @@
"sys.path.append('../src')\n",
"from default_python import main\n",
"\n",
"main.get_taxis().show(10)"
"main.get_taxis(spark).show(10)"
]
}
],
Expand All @@ -41,8 +51,16 @@
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"version": "3.11.4"
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
Expand Down
7 changes: 4 additions & 3 deletions default_python/src/default_python/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from pyspark.sql import SparkSession

def get_taxis():
spark = SparkSession.builder.getOrCreate()
def get_taxis(spark: SparkSession):
return spark.read.table("samples.nyctaxi.trips")

def main():
get_taxis().show(5)
from databricks.connect import DatabricksSession as SparkSession
spark = SparkSession.builder.getOrCreate()
get_taxis(spark).show(5)
fjakobs marked this conversation as resolved.
Show resolved Hide resolved

if __name__ == '__main__':
main()
9 changes: 4 additions & 5 deletions default_python/src/dlt_pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
Expand All @@ -33,14 +33,13 @@
"source": [
"# Import DLT and src/default_python\n",
"import dlt\n",
"import sys\n",
"from pyspark.sql.functions import expr\n",
"from default_python import main"
]
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
Expand All @@ -54,7 +53,7 @@
"source": [
"@dlt.view\n",
"def taxi_raw():\n",
" return main.get_taxis()\n",
" return main.get_taxis(spark)\n",
"\n",
"@dlt.table\n",
"def filtered_taxis():\n",
Expand All @@ -79,7 +78,7 @@
},
"language_info": {
"name": "python",
"version": "3.11.4"
"version": "3.11.6"
}
},
"nbformat": 4,
Expand Down
24 changes: 21 additions & 3 deletions default_python/src/notebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,17 @@
},
{
"cell_type": "code",
"execution_count": 0,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
fjakobs marked this conversation as resolved.
Show resolved Hide resolved
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
Expand All @@ -36,7 +46,7 @@
"source": [
"from default_python import main\n",
"\n",
"main.get_taxis().show(10)"
"main.get_taxis(spark).show(10)"
]
}
],
Expand All @@ -56,8 +66,16 @@
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"version": "3.11.4"
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
}
},
"nbformat": 4,
Expand Down
19 changes: 10 additions & 9 deletions default_python/tests/main_test.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from databricks.connect import DatabricksSession
from pyspark.sql import SparkSession
from databricks.connect import DatabricksSession as SparkSession
from pytest import fixture
from default_python import main
from pytest import fixture

# Create a new Databricks Connect session. If this fails,
# check that you have configured Databricks Connect correctly.
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
@fixture(scope="session")
def spark():
spark = SparkSession.builder.getOrCreate()
yield spark
spark.stop()

SparkSession.builder = DatabricksSession.builder
SparkSession.builder.getOrCreate()

def test_main():
taxis = main.get_taxis()
def test_main(spark: SparkSession):
taxis = main.get_taxis(spark)
assert taxis.count() > 5