From a700ba1f7395881eb97d6aaad6458afbd2f3fb8c Mon Sep 17 00:00:00 2001 From: Miles Yucht Date: Thu, 31 Oct 2024 11:27:51 +0100 Subject: [PATCH 1/2] Handle PySparkRuntimeError when populating globals in databricks.sdk.runtime --- databricks/sdk/runtime/__init__.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/databricks/sdk/runtime/__init__.py b/databricks/sdk/runtime/__init__.py index 9230c7a83..08f4be2fa 100644 --- a/databricks/sdk/runtime/__init__.py +++ b/databricks/sdk/runtime/__init__.py @@ -2,6 +2,7 @@ import logging from typing import Dict, Optional, Union, cast +from databricks.sdk.errors import DatabricksError logger = logging.getLogger('databricks.sdk') is_local_implementation = True @@ -73,6 +74,19 @@ def inner() -> Dict[str, str]: return None, None +def _is_unexpected_exception_loading_user_namespace(e: Exception) -> bool: + # The dbruntime module is not present outside of DBR + if isinstance(e, ImportError): + return False + # In notebooks, the UserNamespaceInitializer works, but the notebook context is not propagated to + # spawned Python subprocesses, resulting in this class throwing an + # pyspark.errors.exceptions.base.PySparkRuntimeError. The SDK does not depend on PySpark, so we + # need to check the type and module name directly. + if type(e).__name__ == 'PySparkRuntimeError' and e.__module__ == 'pyspark.errors.exceptions.base': + return False + return True + + try: # Internal implementation # Separated from above for backward compatibility @@ -85,7 +99,9 @@ def inner() -> Dict[str, str]: continue _globals[var] = userNamespaceGlobals[var] is_local_implementation = False -except ImportError: +except Exception as e: + if _is_unexpected_exception_loading_user_namespace(e): + raise DatabricksError(f"Failed to initialize runtime globals") from e # OSS implementation is_local_implementation = True From f1d87823783a4120bdb09ede43206d9aa4e42368 Mon Sep 17 00:00:00 2001 From: Miles Yucht Date: Thu, 31 Oct 2024 14:48:42 +0100 Subject: [PATCH 2/2] fmt --- databricks/sdk/runtime/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/databricks/sdk/runtime/__init__.py b/databricks/sdk/runtime/__init__.py index 08f4be2fa..962034ee4 100644 --- a/databricks/sdk/runtime/__init__.py +++ b/databricks/sdk/runtime/__init__.py @@ -2,6 +2,7 @@ import logging from typing import Dict, Optional, Union, cast + from databricks.sdk.errors import DatabricksError logger = logging.getLogger('databricks.sdk') @@ -82,7 +83,7 @@ def _is_unexpected_exception_loading_user_namespace(e: Exception) -> bool: # spawned Python subprocesses, resulting in this class throwing an # pyspark.errors.exceptions.base.PySparkRuntimeError. The SDK does not depend on PySpark, so we # need to check the type and module name directly. - if type(e).__name__ == 'PySparkRuntimeError' and e.__module__ == 'pyspark.errors.exceptions.base': + if type(e).__name__ == 'PySparkRuntimeError' and e.__module__ == 'pyspark.errors.exceptions.base': return False return True