Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make a best effort attempt to initialise all Databricks globals #562

Merged
merged 28 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3bf489f
Make a best effort attempt to initialise all Databricks globals
kartikgupta-db Feb 22, 2024
1e55619
lint
kartikgupta-db Feb 22, 2024
ebe0fc0
remove stubs file
kartikgupta-db Feb 22, 2024
7296d83
remove uneeded imports
kartikgupta-db Feb 22, 2024
dd5cb29
remove uneeded imports
kartikgupta-db Feb 22, 2024
62cb415
rename
kartikgupta-db Feb 22, 2024
5c3a78c
lint
kartikgupta-db Feb 22, 2024
7ce8fa3
fix test
kartikgupta-db Feb 22, 2024
f90d141
revert make file change
kartikgupta-db Feb 22, 2024
95508c1
Update databricks/sdk/runtime/__init__.py
kartikgupta-db Feb 26, 2024
ebac19d
Update databricks/sdk/runtime/__init__.py
kartikgupta-db Feb 26, 2024
7afdc6a
trigger imports only on function calls
kartikgupta-db Feb 26, 2024
db36f52
trigger imports only on function calls
kartikgupta-db Feb 26, 2024
12d8170
Merge branch 'all-databricks-globals' of github.com:databricks/databr…
kartikgupta-db Feb 26, 2024
4b9d8e5
address feedback
kartikgupta-db Feb 26, 2024
70a609e
Run integration test against multiple dbconnect install
kartikgupta-db Feb 26, 2024
82e60fa
Update setup.py
kartikgupta-db Feb 26, 2024
d3a4842
Run integration test against multiple dbconnect install
kartikgupta-db Feb 26, 2024
6980373
Run integration test against multiple dbconnect install
kartikgupta-db Feb 26, 2024
b3b2154
Run integration test against multiple dbconnect install
kartikgupta-db Feb 26, 2024
09370eb
Run integration test against multiple dbconnect install
kartikgupta-db Feb 26, 2024
0a585a9
fmt
kartikgupta-db Feb 26, 2024
18a022a
fmt
kartikgupta-db Feb 26, 2024
5fea7a9
fmt
kartikgupta-db Feb 26, 2024
ad501a5
fix no error test
kartikgupta-db Feb 26, 2024
87989f1
fix no error test
kartikgupta-db Feb 26, 2024
70dba8c
Address feedback
kartikgupta-db Feb 28, 2024
045d35f
Merge branch 'main' into all-databricks-globals
kartikgupta-db Feb 28, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 74 additions & 10 deletions databricks/sdk/runtime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,23 +86,87 @@ def inner() -> Dict[str, str]:
_globals[var] = userNamespaceGlobals[var]
is_local_implementation = False
except ImportError:
from typing import cast

# OSS implementation
is_local_implementation = True

try:
# We expect this to fail and only do this for providing types
from pyspark.sql.context import SQLContext
sqlContext: SQLContext = None # type: ignore
sql = sqlContext.sql
table = sqlContext.table
except Exception:
pass
kartikgupta-db marked this conversation as resolved.
Show resolved Hide resolved

# The next few try-except blocks are for initialising globals in a best effort
# mannaer. We separate them to try to get as many of them working as possible
try:
from pyspark.sql.functions import udf # type: ignore
except ImportError as e:
logging.debug(f"Failed to initialise udf global: {e}")

try:
from databricks.connect import DatabricksSession # type: ignore
spark = DatabricksSession.builder.getOrCreate()
sc = spark.sparkContext
except Exception as e:
# We are ignoring all failures here because user might want to initialize
# spark session themselves and we don't want to interfere with that
logging.debug(f"Failed to initialize globals 'spark' and 'sc', continuing. Cause: {e}")

try:
from IPython import display as IPDisplay

def display(input=None, *args, **kwargs) -> None: # type: ignore
"""
Display plots or data.
Display plot:
- display() # no-op
- display(matplotlib.figure.Figure)
Display dataset:
- display(spark.DataFrame)
- display(list) # if list can be converted to DataFrame, e.g., list of named tuples
- display(pandas.DataFrame)
- display(koalas.DataFrame)
- display(pyspark.pandas.DataFrame)
Display any other value that has a _repr_html_() method
For Spark 2.0 and 2.1:
- display(DataFrame, streamName='optional', trigger=optional pyspark.sql.streaming.Trigger,
checkpointLocation='optional')
For Spark 2.2+:
- display(DataFrame, streamName='optional', trigger=optional interval like '1 second',
checkpointLocation='optional')
"""
return IPDisplay.display(input, *args, **kwargs) # type: ignore

def displayHTML(html) -> None: # type: ignore
"""
Display HTML data.
Parameters
----------
data : URL or HTML string
If data is a URL, display the resource at that URL, the resource is loaded dynamically by the browser.
Otherwise data should be the HTML to be displayed.
See also:
IPython.display.HTML
IPython.display.display_html
"""
return IPDisplay.display_html(html, raw=True) # type: ignore

except ImportError as e:
logging.debug(f"Failed to initialise globals 'display' and 'displayHTML', continuing. Cause: {e}")

# We want to propagate the error in initialising dbutils because this is a core
# functionality of the sdk
from typing import cast
kartikgupta-db marked this conversation as resolved.
Show resolved Hide resolved

from databricks.sdk.dbutils import RemoteDbUtils

from . import dbutils_stub

dbutils_type = Union[dbutils_stub.dbutils, RemoteDbUtils]

try:
from .stub import *
except (ImportError, NameError):
# this assumes that all environment variables are set
dbutils = RemoteDbUtils()

dbutils = RemoteDbUtils()
dbutils = cast(dbutils_type, dbutils)
getArgument = dbutils.widgets.getArgument

__all__ = ['dbutils'] if is_local_implementation else dbruntime_objects
__all__ = dbruntime_objects
48 changes: 0 additions & 48 deletions databricks/sdk/runtime/stub.py

This file was deleted.

3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
install_requires=["requests>=2.28.1,<3", "google-auth~=2.0"],
extras_require={"dev": ["pytest", "pytest-cov", "pytest-xdist", "pytest-mock",
"yapf", "pycodestyle", "autoflake", "isort", "wheel",
"ipython", "ipywidgets", "requests-mock", "pyfakefs"],
"ipython", "ipywidgets", "requests-mock", "pyfakefs",
"databricks-connect", "ipython"],
kartikgupta-db marked this conversation as resolved.
Show resolved Hide resolved
"notebook": ["ipython>=8,<9", "ipywidgets>=8,<9"]},
author="Serge Smertin",
author_email="[email protected]",
Expand Down
30 changes: 30 additions & 0 deletions tests/integration/test_local_globals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from contextlib import contextmanager


@contextmanager
def restorable_env():
import os
current_env = os.environ.copy()

try:
yield
finally:
os.environ.clear()
os.environ.update(current_env)
kartikgupta-db marked this conversation as resolved.
Show resolved Hide resolved


def test_local_global_spark(w, env_or_skip):
cluster_id = env_or_skip("SPARK_CONNECT_CLUSTER_ID")
with restorable_env():
import os
os.environ["DATABRICKS_CLUSTER_ID"] = cluster_id
from databricks.sdk.runtime import spark
assert spark.sql("SELECT 1").collect()[0][0] == 1


def test_local_global_display(w, env_or_skip):
from databricks.sdk.runtime import display, displayHTML

# assert no errors
display("test")
displayHTML("test")
Loading