diff --git a/audb/core/api.py b/audb/core/api.py index c64f27b7..ae942338 100644 --- a/audb/core/api.py +++ b/audb/core/api.py @@ -9,6 +9,7 @@ import audformat from audb.core import define +from audb.core import utils from audb.core.cache import database_cache_root from audb.core.cache import default_cache_root from audb.core.config import config @@ -16,8 +17,6 @@ from audb.core.flavor import Flavor from audb.core.lock import FolderLock from audb.core.repository import Repository -from audb.core.utils import _lookup -from audb.core.utils import lookup_backend def available( @@ -42,32 +41,39 @@ def available( """ # noqa: E501 databases = [] for repository in config.REPOSITORIES: - backend = audbackend.create( - repository.backend, - repository.host, - repository.name, - ) try: - names = backend.ls('') - except FileNotFoundError: - # Handle missing repos + backend = utils.access_backend(repository) + if isinstance(backend, audbackend.Artifactory): + # avoid backend.ls('/') + # which is very slow on Artifactory + # see https://github.com/audeering/audbackend/issues/132 + for p in backend._repo.path: + name = p.name + for version in [str(x).split('/')[-1] for x in p / 'db']: + databases.append( + [ + name, + repository.backend, + repository.host, + repository.name, + version, + ] + ) + else: + for path, version in backend.ls('/'): + if path.endswith(define.HEADER_FILE): + name = path.split('/')[1] + databases.append( + [ + name, + repository.backend, + repository.host, + repository.name, + version, + ] + ) + except audbackend.BackendError: continue - for name in names: - try: - versions = backend.ls(f'{name}/{define.DB}') - for version in versions: - databases.append( - [ - name, - repository.backend, - repository.host, - repository.name, - version, - ] - ) - except FileNotFoundError: - # Handle broken databases - continue df = pd.DataFrame.from_records( databases, @@ -262,9 +268,9 @@ def dependencies( deps.load(deps_path) except (AttributeError, FileNotFoundError, ValueError, EOFError): # If loading pickled cached file fails, load again from backend - backend = lookup_backend(name, version) + backend = utils.lookup_backend(name, version) with tempfile.TemporaryDirectory() as tmp_root: - archive = backend.join(name, define.DB) + archive = backend.join('/', name, define.DB + '.zip') backend.get_archive( archive, tmp_root, @@ -467,12 +473,12 @@ def remove_media( for version in versions(name): - backend = lookup_backend(name, version) + backend = utils.lookup_backend(name, version) with tempfile.TemporaryDirectory() as db_root: # download dependencies - archive = backend.join(name, define.DB) + archive = backend.join('/', name, define.DB + '.zip') deps_path = backend.get_archive( archive, db_root, @@ -495,14 +501,12 @@ def remove_media( # if archive exists in this version, # remove file from it and re-publish remote_archive = backend.join( + '/', name, define.DEPEND_TYPE_NAMES[define.DependType.MEDIA], - archive, + archive + '.zip', ) - if backend.exists( - f'{remote_archive}.zip', - version, - ): + if backend.exists(remote_archive, version): files_in_archive = backend.get_archive( remote_archive, @@ -522,9 +526,9 @@ def remove_media( files_in_archive.remove(file) backend.put_archive( db_root, - files_in_archive, remote_archive, version, + files=files_in_archive, ) # mark file as removed @@ -534,12 +538,12 @@ def remove_media( # upload dependencies if upload: deps.save(deps_path) - remote_archive = backend.join(name, define.DB) + remote_archive = backend.join('/', name, define.DB + '.zip') backend.put_archive( db_root, - define.DEPENDENCIES_FILE, remote_archive, version, + files=define.DEPENDENCIES_FILE, verbose=verbose, ) @@ -563,14 +567,19 @@ def repository( repository that contains the database Raises: - RuntimeError: if database is not found + RuntimeError: if database or version is not found Examples: >>> audb.repository('emodb', '1.4.1') Repository('data-public', 'https://audeering.jfrog.io/artifactory', 'artifactory') """ # noqa: E501 - return _lookup(name, version)[0] + if not versions(name): + raise RuntimeError( + f"Cannot find database " + f"'{name}'." + ) + return utils._lookup(name, version)[0] def versions( @@ -591,11 +600,7 @@ def versions( """ vs = [] for repository in config.REPOSITORIES: - backend = audbackend.create( - repository.backend, - repository.host, - repository.name, - ) - header = backend.join(name, 'db.yaml') - vs.extend(backend.versions(header)) + backend = utils.access_backend(repository) + header = backend.join('/', name, 'db.yaml') + vs.extend(backend.versions(header, suppress_backend_errors=True)) return audeer.sort_versions(vs) diff --git a/audb/core/load.py b/audb/core/load.py index 75f57639..4a2e6b43 100644 --- a/audb/core/load.py +++ b/audb/core/load.py @@ -374,9 +374,10 @@ def job(path: str): archive = deps.archive(path) version = deps.version(path) archive = backend.join( + '/', db.name, define.DEPEND_TYPE_NAMES[define.DependType.ATTACHMENT], - archive, + archive + '.zip', ) backend.get_archive( archive, @@ -441,9 +442,10 @@ def _get_media_from_backend( def job(archive: str, version: str): archive = backend.join( + '/', name, define.DEPEND_TYPE_NAMES[define.DependType.MEDIA], - archive, + archive + '.zip', ) # extract and move all files that are stored in the archive, # even if only a single file from the archive was requested @@ -503,9 +505,10 @@ def _get_tables_from_backend( def job(table: str): archive = backend.join( + '/', db.name, define.DEPEND_TYPE_NAMES[define.DependType.META], - deps.archive(table), + deps.archive(table) + '.zip', ) backend.get_archive( archive, @@ -1350,7 +1353,7 @@ def load_header_to( local_header = os.path.join(db_root, define.HEADER_FILE) if overwrite or not os.path.exists(local_header): backend = lookup_backend(name, version) - remote_header = backend.join(name, define.HEADER_FILE) + remote_header = backend.join('/', name, define.HEADER_FILE) if add_audb_meta: db_root_tmp = database_tmp_root(db_root) local_header = os.path.join(db_root_tmp, define.HEADER_FILE) diff --git a/audb/core/load_to.py b/audb/core/load_to.py index 2b3be662..d218484e 100644 --- a/audb/core/load_to.py +++ b/audb/core/load_to.py @@ -122,9 +122,10 @@ def job(path: str): archive = deps.archive(path) version = deps.version(path) archive = backend.join( + '/', db_name, define.DEPEND_TYPE_NAMES[define.DependType.ATTACHMENT], - archive, + archive + '.zip', ) backend.get_archive( archive, @@ -173,9 +174,10 @@ def _get_media( def job(archive: str, version: str): archive = backend.join( + '/', db_name, define.DEPEND_TYPE_NAMES[define.DependType.MEDIA], - archive, + archive + '.zip', ) files = backend.get_archive( archive, @@ -221,9 +223,10 @@ def job(table: str): if os.path.exists(path_pkl): os.remove(path_pkl) archive = backend.join( + '/', db_name, define.DEPEND_TYPE_NAMES[define.DependType.META], - deps.archive(table), + deps.archive(table) + '.zip', ) backend.get_archive( archive, diff --git a/audb/core/publish.py b/audb/core/publish.py index 588ceb45..dbcf0378 100644 --- a/audb/core/publish.py +++ b/audb/core/publish.py @@ -11,6 +11,7 @@ import audiofile from audb.core import define +from audb.core import utils from audb.core.api import dependencies from audb.core.dependencies import Dependencies from audb.core.repository import Repository @@ -345,12 +346,13 @@ def _put_attachments( ): def job(attachment_id: str): archive_file = backend.join( + '/', db.name, define.DEPEND_TYPE_NAMES[define.DependType.ATTACHMENT], - attachment_id, + attachment_id + '.zip', ) files = db.attachments[attachment_id].files - backend.put_archive(db_root, files, archive_file, version) + backend.put_archive(db_root, archive_file, version, files=files) audeer.run_tasks( job, @@ -389,9 +391,10 @@ def job(archive): update_media.append(file) archive_file = backend.join( + '/', db_name, define.DEPEND_TYPE_NAMES[define.DependType.MEDIA], - archive, + archive + '.zip', ) if previous_version is not None: @@ -423,9 +426,9 @@ def job(archive): backend.put_archive( db_root, - files, archive_file, version, + files=files, ) update_media = [] @@ -451,11 +454,12 @@ def _put_tables( def job(table: str): file = f'db.{table}.csv' archive_file = backend.join( + '/', db_name, define.DEPEND_TYPE_NAMES[define.DependType.META], - table, + table + '.zip', ) - backend.put_archive(db_root, file, archive_file, version) + backend.put_archive(db_root, archive_file, version, files=file) audeer.run_tasks( job, @@ -602,14 +606,10 @@ def publish( verbose=verbose, ) - backend = audbackend.create( - repository.backend, - repository.host, - repository.name, - ) + backend = utils.access_backend(repository) - remote_header = backend.join(db.name, define.HEADER_FILE) - versions = backend.versions(remote_header) + remote_header = backend.join('/', db.name, define.HEADER_FILE) + versions = backend.versions(remote_header, suppress_backend_errors=True) if version in versions: raise RuntimeError( 'A version ' @@ -755,12 +755,16 @@ def publish( # publish dependencies and header deps.save(deps_path) - archive_file = backend.join(db.name, define.DB) - backend.put_archive(db_root, define.DEPENDENCIES_FILE, archive_file, - version) + archive_file = backend.join('/', db.name, define.DB + '.zip') + backend.put_archive( + db_root, + archive_file, + version, + files=define.DEPENDENCIES_FILE, + ) try: local_header = os.path.join(db_root, define.HEADER_FILE) - remote_header = db.name + '/' + define.HEADER_FILE + remote_header = backend.join('/', db.name, define.HEADER_FILE) backend.put_file(local_header, remote_header, version) except Exception: # pragma: no cover # after the header is published diff --git a/audb/core/utils.py b/audb/core/utils.py index 850ce666..88c704d9 100644 --- a/audb/core/utils.py +++ b/audb/core/utils.py @@ -10,6 +10,20 @@ from audb.core.repository import Repository +def access_backend( + repository: Repository, +) -> audbackend.Backend: + r"""Helper function to access backend.""" + backend = audbackend.access( + repository.backend, + repository.host, + repository.name, + ) + if isinstance(backend, audbackend.Artifactory): + backend._use_legacy_file_structure() + return backend + + def lookup_backend( name: str, version: str, @@ -58,20 +72,16 @@ def _lookup( """ for repository in config.REPOSITORIES: - backend = audbackend.create( - repository.backend, - repository.host, - repository.name, - ) - header = backend.join(name, 'db.yaml') + backend = access_backend(repository) + header = backend.join('/', name, 'db.yaml') if backend.exists(header, version): return repository, backend raise RuntimeError( - 'Cannot find version ' - f'{version} ' - f'for database ' + f"Cannot find version " + f"'{version}' " + f"for database " f"'{name}'." ) diff --git a/docs/authentication.rst b/docs/authentication.rst index 923f7af8..6b86ee7e 100644 --- a/docs/authentication.rst +++ b/docs/authentication.rst @@ -1,26 +1,7 @@ Authentication ============== -If you want to use an Artifactory backend, -users need to authenticate. -You could use `anonymous access`_, -but we would only recommend it for downloading public data. - -To authenticate -users have to store their credentials in :file:`~/.artifactory_python.cfg`. - -.. code-block:: cfg - - [your-organization.jfrog.io/artifactory] - username = MY_USERNAME - password = MY_API_KEY - -Alternatively, they can export them as environment variables. - -.. code-block:: bash - - export ARTIFACTORY_USERNAME="MY_USERNAME" - export ARTIFACTORY_API_KEY="MY_API_KEY" - - -.. _anonymous access: https://jfrog.com/help/r/how-to-grant-an-anonymous-user-access-to-specific-repositories/ +Using Artifactory as backend +requires authentication. +For more information, +see :class:`audbackend.Artifactory`. diff --git a/docs/publish.rst b/docs/publish.rst index 1f5b08fc..a4907bb2 100644 --- a/docs/publish.rst +++ b/docs/publish.rst @@ -22,6 +22,10 @@ if os.path.exists(folder): shutil.rmtree(folder) + # create repository + os.mkdir('./data') + os.mkdir('./data/data-local') + .. _publish: diff --git a/pyproject.toml b/pyproject.toml index c8ce80aa..7e0621d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,7 +33,7 @@ classifiers = [ ] requires-python = '>=3.8' dependencies = [ - 'audbackend >=0.3.17, <1.0.0', + 'audbackend[artifactory] >=1.0.0', 'audeer >=1.20.0', 'audformat >=0.16.1', 'audiofile >=1.0.0', diff --git a/tests/conftest.py b/tests/conftest.py index 77e1696f..b36bded3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,8 @@ import pytest +import audeer + import audb @@ -122,11 +124,13 @@ def repository(tmpdir_factory): """ host = tmpdir_factory.mktemp('host') + name = 'data-unittests-local' repository = audb.Repository( - name='data-unittests-local', + name=name, host=host, backend='file-system', ) + audeer.mkdir(audeer.path(host, name)) current_repositories = audb.config.REPOSITORIES audb.config.REPOSITORIES = [repository] @@ -156,11 +160,13 @@ def persistent_repository(tmpdir_factory): """ host = tmpdir_factory.mktemp('host') + name = 'data-unittests-local' repository = audb.Repository( - name='data-unittests-local', + name=name, host=host, backend='file-system', ) + audeer.mkdir(audeer.path(host, name)) current_repositories = audb.config.REPOSITORIES audb.config.REPOSITORIES = [repository] diff --git a/tests/test_cache.py b/tests/test_cache.py index ae35a4fb..da836a8b 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -67,25 +67,25 @@ def test_cached_name(cache): df = audb.cached(name=DB_NAMES[0]) assert len(df) == 0 # Load first database - audb.load(DB_NAMES[0]) + audb.load(DB_NAMES[0], verbose=False) df = audb.cached() assert len(df) == 1 - assert set(df['name']) == set([DB_NAMES[0]]) + assert set(df['name']) == {DB_NAMES[0]} df = audb.cached(name=DB_NAMES[0]) assert len(df) == 1 - assert set(df['name']) == set([DB_NAMES[0]]) + assert set(df['name']) == {DB_NAMES[0]} df = audb.cached(name=DB_NAMES[1]) assert len(df) == 0 # Load second database - audb.load(DB_NAMES[1]) + audb.load(DB_NAMES[1], verbose=False) df = audb.cached() assert len(df) == 2 assert set(df['name']) == set(DB_NAMES) df = audb.cached(name=DB_NAMES[0]) assert len(df) == 1 - assert set(df['name']) == set([DB_NAMES[0]]) + assert set(df['name']) == {DB_NAMES[0]} df = audb.cached(name=DB_NAMES[1]) assert len(df) == 1 - assert set(df['name']) == set([DB_NAMES[1]]) + assert set(df['name']) == {DB_NAMES[1]} df = audb.cached(name='non-existent') assert len(df) == 0 diff --git a/tests/test_load.py b/tests/test_load.py index a009695b..931f01b1 100644 --- a/tests/test_load.py +++ b/tests/test_load.py @@ -817,20 +817,27 @@ def test_load_to_update(tmpdir, dbs, only_metadata): @pytest.mark.parametrize( - 'name, version', + 'name, version, error, error_msg', [ - (DB_NAME, None), - (DB_NAME, '1.0.0'), + (DB_NAME, '1.0.0', None, None), pytest.param( # database does not exist - 'does-not-exist', None, - marks=pytest.mark.xfail(raises=RuntimeError), + 'does-not-exist', + '1.0.0', + RuntimeError, + "Cannot find database 'does-not-exist'.", ), pytest.param( # version does not exist - DB_NAME, 'does-not-exist', - marks=pytest.mark.xfail(raises=RuntimeError), + DB_NAME, + '999.9.9', + RuntimeError, + f"Cannot find version '999.9.9' for database '{DB_NAME}'.", ) ] ) -def test_repository(persistent_repository, name, version): - repository = audb.repository(name, version) - assert repository == persistent_repository +def test_repository(persistent_repository, name, version, error, error_msg): + if error is not None: + with pytest.raises(error, match=error_msg): + repository = audb.repository(name, version) + else: + repository = audb.repository(name, version) + assert repository == persistent_repository diff --git a/tests/test_lock_db.py b/tests/test_lock_db.py index 0185c3de..37703840 100644 --- a/tests/test_lock_db.py +++ b/tests/test_lock_db.py @@ -331,7 +331,7 @@ def test_lock_load( ) def test_lock_load_crash(set_repositories): - with pytest.raises(RuntimeError): + with pytest.raises(audbackend.BackendError): load_db(-1) @@ -384,7 +384,7 @@ def lock_v1(): thread.start() # -> loading missing table from cache fails - with pytest.raises(RuntimeError): + with pytest.raises(audbackend.BackendError): audb.load( DB_NAME, version='2.0.0', @@ -412,7 +412,7 @@ def lock_v1(): thread.start() # -> loading missing media from cache fails - with pytest.raises(RuntimeError): + with pytest.raises(audbackend.BackendError): audb.load( DB_NAME, version='2.0.0', diff --git a/tests/test_publish.py b/tests/test_publish.py index a497ab23..b163c021 100644 --- a/tests/test_publish.py +++ b/tests/test_publish.py @@ -7,6 +7,7 @@ import pandas as pd import pytest +import audbackend import audeer import audformat.testing import audiofile @@ -463,7 +464,7 @@ def test_publish(dbs, persistent_repository, version): for file in db.files: name = archives[file] if file in archives else file - file_path = backend.join(db.name, 'media', name) + file_path = backend.join('/', db.name, 'media', name) backend.exists(file_path, version) path = os.path.join(dbs[version], file) assert deps.checksum(file) == audeer.md5(path) @@ -950,6 +951,17 @@ def test_publish_error_changed_deps_file_type(tmpdir, repository): audeer.rmdir(db_path) +def test_publish_error_repository_does_not_exist(tmpdir, repository): + + db = audformat.Database('test') + db.save(tmpdir) + + repository.name = 'does-not-exist' + with pytest.raises(audbackend.BackendError) as ex: + audb.publish(tmpdir, '1.0.0', repository) + assert 'No such file or directory' in str(ex.value.exception) + + @pytest.mark.parametrize( 'file', [