From 5ff9b8699fec7faa83eb99b7136a179b46b416aa Mon Sep 17 00:00:00 2001 From: Miles Yucht Date: Thu, 2 May 2024 17:45:02 +0200 Subject: [PATCH] Better error message for users trying to use `abfss` or `s3` schemes with SDK DBUtils (#634) ## Changes DBUtils in the SDK only supports the `dbfs` and `file` schemes. Within DBFS, only paths in the root filesystem are supported, not mountpoints. Other schemes are not supported. To minimize customer confusion, we can check for this case during all DBUtils FS operations when constructing the path. ## Tests Added a unit test for this case. - [ ] `make test` run locally - [ ] `make fmt` applied - [ ] relevant integration tests applied --- databricks/sdk/mixins/files.py | 21 +++++++++++++-------- tests/test_dbfs_mixins.py | 7 +++++++ 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/databricks/sdk/mixins/files.py b/databricks/sdk/mixins/files.py index e64bc7691..3295aef7a 100644 --- a/databricks/sdk/mixins/files.py +++ b/databricks/sdk/mixins/files.py @@ -11,6 +11,7 @@ from types import TracebackType from typing import (TYPE_CHECKING, AnyStr, BinaryIO, Generator, Iterable, Iterator, Type, Union) +from urllib import parse from .._property import _cached_property from ..errors import NotFound @@ -570,15 +571,19 @@ def exists(self, path: str) -> bool: p = self._path(path) return p.exists() + __ALLOWED_SCHEMES = [None, 'file', 'dbfs'] + def _path(self, src): - src = str(src) - if src.startswith('file:'): - return _LocalPath(src) - if src.startswith('dbfs:'): - src = src[len('dbfs:'):] - if src.startswith('/Volumes'): - return _VolumesPath(self._files_api, src) - return _DbfsPath(self._dbfs_api, src) + src = parse.urlparse(str(src)) + if src.scheme and src.scheme not in self.__ALLOWED_SCHEMES: + raise ValueError( + f'unsupported scheme "{src.scheme}". DBUtils in the SDK only supports local, root DBFS, and ' + 'UC Volumes paths, not external locations or DBFS mount points.') + if src.scheme == 'file': + return _LocalPath(src.geturl()) + if src.path.startswith('/Volumes'): + return _VolumesPath(self._files_api, src.geturl()) + return _DbfsPath(self._dbfs_api, src.geturl()) def copy(self, src: str, dst: str, *, recursive=False, overwrite=False): """Copy files between DBFS and local filesystems""" diff --git a/tests/test_dbfs_mixins.py b/tests/test_dbfs_mixins.py index c00961fcc..427c445fd 100644 --- a/tests/test_dbfs_mixins.py +++ b/tests/test_dbfs_mixins.py @@ -63,3 +63,10 @@ def test_moving_local_dir_to_dbfs(config, tmp_path, mocker): def test_fs_path(config, path, expected_type): dbfs_ext = DbfsExt(config) assert isinstance(dbfs_ext._path(path), expected_type) + + +def test_fs_path_invalid(config): + dbfs_ext = DbfsExt(config) + with pytest.raises(ValueError) as e: + dbfs_ext._path('s3://path/to/file') + assert 'unsupported scheme "s3"' in str(e.value)