Skip to content

Commit

Permalink
Better error message for users trying to use abfss or s3 schemes …
Browse files Browse the repository at this point in the history
…with SDK DBUtils (#634)

## Changes
DBUtils in the SDK only supports the `dbfs` and `file` schemes. Within
DBFS, only paths in the root filesystem are supported, not mountpoints.
Other schemes are not supported. To minimize customer confusion, we can
check for this case during all DBUtils FS operations when constructing
the path.

## Tests
Added a unit test for this case.

- [ ] `make test` run locally
- [ ] `make fmt` applied
- [ ] relevant integration tests applied
  • Loading branch information
mgyucht authored May 2, 2024
1 parent 8d25659 commit 5ff9b86
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 8 deletions.
21 changes: 13 additions & 8 deletions databricks/sdk/mixins/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from types import TracebackType
from typing import (TYPE_CHECKING, AnyStr, BinaryIO, Generator, Iterable,
Iterator, Type, Union)
from urllib import parse

from .._property import _cached_property
from ..errors import NotFound
Expand Down Expand Up @@ -570,15 +571,19 @@ def exists(self, path: str) -> bool:
p = self._path(path)
return p.exists()

__ALLOWED_SCHEMES = [None, 'file', 'dbfs']

def _path(self, src):
src = str(src)
if src.startswith('file:'):
return _LocalPath(src)
if src.startswith('dbfs:'):
src = src[len('dbfs:'):]
if src.startswith('/Volumes'):
return _VolumesPath(self._files_api, src)
return _DbfsPath(self._dbfs_api, src)
src = parse.urlparse(str(src))
if src.scheme and src.scheme not in self.__ALLOWED_SCHEMES:
raise ValueError(
f'unsupported scheme "{src.scheme}". DBUtils in the SDK only supports local, root DBFS, and '
'UC Volumes paths, not external locations or DBFS mount points.')
if src.scheme == 'file':
return _LocalPath(src.geturl())
if src.path.startswith('/Volumes'):
return _VolumesPath(self._files_api, src.geturl())
return _DbfsPath(self._dbfs_api, src.geturl())

def copy(self, src: str, dst: str, *, recursive=False, overwrite=False):
"""Copy files between DBFS and local filesystems"""
Expand Down
7 changes: 7 additions & 0 deletions tests/test_dbfs_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,10 @@ def test_moving_local_dir_to_dbfs(config, tmp_path, mocker):
def test_fs_path(config, path, expected_type):
dbfs_ext = DbfsExt(config)
assert isinstance(dbfs_ext._path(path), expected_type)


def test_fs_path_invalid(config):
dbfs_ext = DbfsExt(config)
with pytest.raises(ValueError) as e:
dbfs_ext._path('s3://path/to/file')
assert 'unsupported scheme "s3"' in str(e.value)

0 comments on commit 5ff9b86

Please sign in to comment.