Skip to content

Commit

Permalink
Switch to new backend interface (#306)
Browse files Browse the repository at this point in the history
* Support audbackend>=1.0.0

* Require audbackend>=1.0.0

* Fix ruff errors

* Use audbackend[artifactory] in dependencies

---------

Co-authored-by: Hagen Wierstorf <[email protected]>
  • Loading branch information
frankenjoe and hagenw authored Oct 16, 2023
1 parent ca1a21c commit 900e3be
Show file tree
Hide file tree
Showing 13 changed files with 161 additions and 126 deletions.
99 changes: 52 additions & 47 deletions audb/core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@
import audformat

from audb.core import define
from audb.core import utils
from audb.core.cache import database_cache_root
from audb.core.cache import default_cache_root
from audb.core.config import config
from audb.core.dependencies import Dependencies
from audb.core.flavor import Flavor
from audb.core.lock import FolderLock
from audb.core.repository import Repository
from audb.core.utils import _lookup
from audb.core.utils import lookup_backend


def available(
Expand All @@ -42,32 +41,39 @@ def available(
""" # noqa: E501
databases = []
for repository in config.REPOSITORIES:
backend = audbackend.create(
repository.backend,
repository.host,
repository.name,
)
try:
names = backend.ls('')
except FileNotFoundError:
# Handle missing repos
backend = utils.access_backend(repository)
if isinstance(backend, audbackend.Artifactory):
# avoid backend.ls('/')
# which is very slow on Artifactory
# see https://github.com/audeering/audbackend/issues/132
for p in backend._repo.path:
name = p.name
for version in [str(x).split('/')[-1] for x in p / 'db']:
databases.append(
[
name,
repository.backend,
repository.host,
repository.name,
version,
]
)
else:
for path, version in backend.ls('/'):
if path.endswith(define.HEADER_FILE):
name = path.split('/')[1]
databases.append(
[
name,
repository.backend,
repository.host,
repository.name,
version,
]
)
except audbackend.BackendError:
continue
for name in names:
try:
versions = backend.ls(f'{name}/{define.DB}')
for version in versions:
databases.append(
[
name,
repository.backend,
repository.host,
repository.name,
version,
]
)
except FileNotFoundError:
# Handle broken databases
continue

df = pd.DataFrame.from_records(
databases,
Expand Down Expand Up @@ -262,9 +268,9 @@ def dependencies(
deps.load(deps_path)
except (AttributeError, FileNotFoundError, ValueError, EOFError):
# If loading pickled cached file fails, load again from backend
backend = lookup_backend(name, version)
backend = utils.lookup_backend(name, version)
with tempfile.TemporaryDirectory() as tmp_root:
archive = backend.join(name, define.DB)
archive = backend.join('/', name, define.DB + '.zip')
backend.get_archive(
archive,
tmp_root,
Expand Down Expand Up @@ -467,12 +473,12 @@ def remove_media(

for version in versions(name):

backend = lookup_backend(name, version)
backend = utils.lookup_backend(name, version)

with tempfile.TemporaryDirectory() as db_root:

# download dependencies
archive = backend.join(name, define.DB)
archive = backend.join('/', name, define.DB + '.zip')
deps_path = backend.get_archive(
archive,
db_root,
Expand All @@ -495,14 +501,12 @@ def remove_media(
# if archive exists in this version,
# remove file from it and re-publish
remote_archive = backend.join(
'/',
name,
define.DEPEND_TYPE_NAMES[define.DependType.MEDIA],
archive,
archive + '.zip',
)
if backend.exists(
f'{remote_archive}.zip',
version,
):
if backend.exists(remote_archive, version):

files_in_archive = backend.get_archive(
remote_archive,
Expand All @@ -522,9 +526,9 @@ def remove_media(
files_in_archive.remove(file)
backend.put_archive(
db_root,
files_in_archive,
remote_archive,
version,
files=files_in_archive,
)

# mark file as removed
Expand All @@ -534,12 +538,12 @@ def remove_media(
# upload dependencies
if upload:
deps.save(deps_path)
remote_archive = backend.join(name, define.DB)
remote_archive = backend.join('/', name, define.DB + '.zip')
backend.put_archive(
db_root,
define.DEPENDENCIES_FILE,
remote_archive,
version,
files=define.DEPENDENCIES_FILE,
verbose=verbose,
)

Expand All @@ -563,14 +567,19 @@ def repository(
repository that contains the database
Raises:
RuntimeError: if database is not found
RuntimeError: if database or version is not found
Examples:
>>> audb.repository('emodb', '1.4.1')
Repository('data-public', 'https://audeering.jfrog.io/artifactory', 'artifactory')
""" # noqa: E501
return _lookup(name, version)[0]
if not versions(name):
raise RuntimeError(
f"Cannot find database "
f"'{name}'."
)
return utils._lookup(name, version)[0]


def versions(
Expand All @@ -591,11 +600,7 @@ def versions(
"""
vs = []
for repository in config.REPOSITORIES:
backend = audbackend.create(
repository.backend,
repository.host,
repository.name,
)
header = backend.join(name, 'db.yaml')
vs.extend(backend.versions(header))
backend = utils.access_backend(repository)
header = backend.join('/', name, 'db.yaml')
vs.extend(backend.versions(header, suppress_backend_errors=True))
return audeer.sort_versions(vs)
11 changes: 7 additions & 4 deletions audb/core/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,10 @@ def job(path: str):
archive = deps.archive(path)
version = deps.version(path)
archive = backend.join(
'/',
db.name,
define.DEPEND_TYPE_NAMES[define.DependType.ATTACHMENT],
archive,
archive + '.zip',
)
backend.get_archive(
archive,
Expand Down Expand Up @@ -441,9 +442,10 @@ def _get_media_from_backend(

def job(archive: str, version: str):
archive = backend.join(
'/',
name,
define.DEPEND_TYPE_NAMES[define.DependType.MEDIA],
archive,
archive + '.zip',
)
# extract and move all files that are stored in the archive,
# even if only a single file from the archive was requested
Expand Down Expand Up @@ -503,9 +505,10 @@ def _get_tables_from_backend(

def job(table: str):
archive = backend.join(
'/',
db.name,
define.DEPEND_TYPE_NAMES[define.DependType.META],
deps.archive(table),
deps.archive(table) + '.zip',
)
backend.get_archive(
archive,
Expand Down Expand Up @@ -1350,7 +1353,7 @@ def load_header_to(
local_header = os.path.join(db_root, define.HEADER_FILE)
if overwrite or not os.path.exists(local_header):
backend = lookup_backend(name, version)
remote_header = backend.join(name, define.HEADER_FILE)
remote_header = backend.join('/', name, define.HEADER_FILE)
if add_audb_meta:
db_root_tmp = database_tmp_root(db_root)
local_header = os.path.join(db_root_tmp, define.HEADER_FILE)
Expand Down
9 changes: 6 additions & 3 deletions audb/core/load_to.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ def job(path: str):
archive = deps.archive(path)
version = deps.version(path)
archive = backend.join(
'/',
db_name,
define.DEPEND_TYPE_NAMES[define.DependType.ATTACHMENT],
archive,
archive + '.zip',
)
backend.get_archive(
archive,
Expand Down Expand Up @@ -173,9 +174,10 @@ def _get_media(

def job(archive: str, version: str):
archive = backend.join(
'/',
db_name,
define.DEPEND_TYPE_NAMES[define.DependType.MEDIA],
archive,
archive + '.zip',
)
files = backend.get_archive(
archive,
Expand Down Expand Up @@ -221,9 +223,10 @@ def job(table: str):
if os.path.exists(path_pkl):
os.remove(path_pkl)
archive = backend.join(
'/',
db_name,
define.DEPEND_TYPE_NAMES[define.DependType.META],
deps.archive(table),
deps.archive(table) + '.zip',
)
backend.get_archive(
archive,
Expand Down
38 changes: 21 additions & 17 deletions audb/core/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import audiofile

from audb.core import define
from audb.core import utils
from audb.core.api import dependencies
from audb.core.dependencies import Dependencies
from audb.core.repository import Repository
Expand Down Expand Up @@ -345,12 +346,13 @@ def _put_attachments(
):
def job(attachment_id: str):
archive_file = backend.join(
'/',
db.name,
define.DEPEND_TYPE_NAMES[define.DependType.ATTACHMENT],
attachment_id,
attachment_id + '.zip',
)
files = db.attachments[attachment_id].files
backend.put_archive(db_root, files, archive_file, version)
backend.put_archive(db_root, archive_file, version, files=files)

audeer.run_tasks(
job,
Expand Down Expand Up @@ -389,9 +391,10 @@ def job(archive):
update_media.append(file)

archive_file = backend.join(
'/',
db_name,
define.DEPEND_TYPE_NAMES[define.DependType.MEDIA],
archive,
archive + '.zip',
)

if previous_version is not None:
Expand Down Expand Up @@ -423,9 +426,9 @@ def job(archive):

backend.put_archive(
db_root,
files,
archive_file,
version,
files=files,
)

update_media = []
Expand All @@ -451,11 +454,12 @@ def _put_tables(
def job(table: str):
file = f'db.{table}.csv'
archive_file = backend.join(
'/',
db_name,
define.DEPEND_TYPE_NAMES[define.DependType.META],
table,
table + '.zip',
)
backend.put_archive(db_root, file, archive_file, version)
backend.put_archive(db_root, archive_file, version, files=file)

audeer.run_tasks(
job,
Expand Down Expand Up @@ -602,14 +606,10 @@ def publish(
verbose=verbose,
)

backend = audbackend.create(
repository.backend,
repository.host,
repository.name,
)
backend = utils.access_backend(repository)

remote_header = backend.join(db.name, define.HEADER_FILE)
versions = backend.versions(remote_header)
remote_header = backend.join('/', db.name, define.HEADER_FILE)
versions = backend.versions(remote_header, suppress_backend_errors=True)
if version in versions:
raise RuntimeError(
'A version '
Expand Down Expand Up @@ -755,12 +755,16 @@ def publish(

# publish dependencies and header
deps.save(deps_path)
archive_file = backend.join(db.name, define.DB)
backend.put_archive(db_root, define.DEPENDENCIES_FILE, archive_file,
version)
archive_file = backend.join('/', db.name, define.DB + '.zip')
backend.put_archive(
db_root,
archive_file,
version,
files=define.DEPENDENCIES_FILE,
)
try:
local_header = os.path.join(db_root, define.HEADER_FILE)
remote_header = db.name + '/' + define.HEADER_FILE
remote_header = backend.join('/', db.name, define.HEADER_FILE)
backend.put_file(local_header, remote_header, version)
except Exception: # pragma: no cover
# after the header is published
Expand Down
Loading

0 comments on commit 900e3be

Please sign in to comment.