Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert zarr dependencies to utils, update numpy chunk encoding #259

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,17 @@ def test_invalid_encoding_chunks_with_dask_raise():
data = dask.array.zeros((10, 20, 30), chunks=expected)
ds = xr.Dataset({'foo': (['x', 'y', 'z'], data)})
ds['foo'].encoding['chunks'] = [8, 5, 1]
with pytest.raises(NotImplementedError) as excinfo:
with pytest.raises(ValueError) as excinfo:
_ = create_zmetadata(ds)
excinfo.match(r'Specified zarr chunks .*')


def test_invalid_encoding_chunks_with_numpy_raise():
def test_ignore_encoding_chunks_with_numpy():
data = np.zeros((10, 20, 30))
ds = xr.Dataset({'foo': (['x', 'y', 'z'], data)})
ds['foo'].encoding['chunks'] = [8, 5, 1]
with pytest.raises(ValueError) as excinfo:
_ = create_zmetadata(ds)
excinfo.match(r'Encoding chunks do not match inferred.*')
zmetadata = create_zmetadata(ds)
assert zmetadata['metadata']['foo/.zarray']['chunks'] == [10, 20, 30]


def test_get_data_chunk_numpy():
Expand Down
74 changes: 8 additions & 66 deletions xpublish/dependencies.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,18 @@
"""Helper functions to use a FastAPI dependencies."""
from typing import (
TYPE_CHECKING,
Dict,
List,
)
"""Helper functions to use a FastAPI dependencies.
"""
from typing import TYPE_CHECKING, Dict, List

import cachey
import pluggy
import xarray as xr
from fastapi import Depends

from .utils.api import DATASET_ID_ATTR_KEY
from .utils.zarr import ZARR_METADATA_KEY, create_zmetadata, create_zvariables

if TYPE_CHECKING:
from .plugins import Plugin # pragma: no cover


def get_dataset_ids() -> List[str]:
"""FastAPI dependency for getting the list of ids (string keys) of the collection of datasets being served.
"""FastAPI dependency for getting the list of ids (string keys)
of the collection of datasets being served.

Use this callable as dependency in any FastAPI path operation
function where you need access to those ids.
Expand All @@ -28,6 +22,7 @@ def get_dataset_ids() -> List[str]:

Returns:
A list of unique keys for datasets

"""
return [] # pragma: no cover

Expand Down Expand Up @@ -66,60 +61,8 @@ def get_cache() -> cachey.Cache:
return cachey.Cache(available_bytes=1e6) # pragma: no cover


def get_zvariables(
dataset: xr.Dataset = Depends(get_dataset),
cache: cachey.Cache = Depends(get_cache),
) -> dict:
"""FastAPI dependency that returns a dictionary of zarr encoded variables.

Args:
dataset: The dataset to get the zvariables from.
cache: The cache to use for storing the zvariables.

Returns:
A dictionary of zarr encoded variables.
"""
cache_key = dataset.attrs.get(DATASET_ID_ATTR_KEY, '') + '/' + 'zvariables'
zvariables = cache.get(cache_key)

if zvariables is None:
zvariables = create_zvariables(dataset)

# we want to permanently cache this: set high cost value
cache.put(cache_key, zvariables, 99999)

return zvariables


def get_zmetadata(
dataset: xr.Dataset = Depends(get_dataset),
cache: cachey.Cache = Depends(get_cache),
zvariables: dict = Depends(get_zvariables),
) -> dict:
"""FastAPI dependency that returns a consolidated zmetadata dictionary.

Args:
dataset: The dataset to get the zmetadata from.
cache: The cache to use for storing the zmetadata.
zvariables: The zvariables to use for creating the zmetadata.

Returns:
A consolidated zmetadata dictionary.
"""
cache_key = dataset.attrs.get(DATASET_ID_ATTR_KEY, '') + '/' + ZARR_METADATA_KEY
zmeta = cache.get(cache_key)

if zmeta is None:
zmeta = create_zmetadata(dataset)

# we want to permanently cache this: set high cost value
cache.put(cache_key, zmeta, 99999)

return zmeta


def get_plugins() -> Dict[str, 'Plugin']:
"""FastAPI dependency that returns the a dictionary of loaded plugins.
"""FastAPI dependency that returns the a dictionary of loaded plugins

Returns:
Dictionary of names to initialized plugins.
Expand All @@ -128,5 +71,4 @@ def get_plugins() -> Dict[str, 'Plugin']:


def get_plugin_manager() -> pluggy.PluginManager:
"""Return the active plugin manager."""
...
"""Return the active plugin manager"""
11 changes: 5 additions & 6 deletions xpublish/plugins/included/dataset_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@

from xpublish.utils.api import JSONResponse

from ...dependencies import get_zmetadata, get_zvariables
from ...utils.zarr import get_zmetadata, get_zvariables
from .. import Dependencies, Plugin, hookimpl


class DatasetInfoPlugin(Plugin):
"""Dataset metadata and schema routes."""
"""Dataset metadata"""

name: str = 'dataset_info'

Expand All @@ -21,7 +21,6 @@ class DatasetInfoPlugin(Plugin):

@hookimpl
def dataset_router(self, deps: Dependencies) -> APIRouter:
"""Returns a router with dataset metadata and schema routes."""
router = APIRouter(
prefix=self.dataset_router_prefix,
tags=list(self.dataset_router_tags),
Expand All @@ -39,22 +38,22 @@ def html_representation(
def list_keys(
dataset=Depends(deps.dataset),
) -> list[str]:
"""Returns a of the keys in a dataset."""
"""List of the keys in a dataset"""
return JSONResponse(list(dataset.variables))

@router.get('/dict')
def to_dict(
dataset=Depends(deps.dataset),
) -> dict:
"""Returns the full dataset as a dictionary."""
"""The full dataset as a dictionary"""
return JSONResponse(dataset.to_dict(data=False))

@router.get('/info')
def info(
dataset=Depends(deps.dataset),
cache=Depends(deps.cache),
) -> dict:
"""Returns the dataset schema (close to the NCO-JSON schema)."""
"""Dataset schema (close to the NCO-JSON schema)."""
zvariables = get_zvariables(dataset, cache)
zmetadata = get_zmetadata(dataset, cache, zvariables)

Expand Down
12 changes: 6 additions & 6 deletions xpublish/plugins/included/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@

from xpublish.utils.api import JSONResponse

from ...dependencies import get_zmetadata, get_zvariables
from ...utils.api import DATASET_ID_ATTR_KEY
from ...utils.cache import CostTimer
from ...utils.zarr import (
ZARR_METADATA_KEY,
encode_chunk,
get_data_chunk,
get_zmetadata,
get_zvariables,
jsonify_zmetadata,
)
from .. import Dependencies, Plugin, hookimpl
Expand All @@ -24,7 +25,7 @@


class ZarrPlugin(Plugin):
"""Adds Zarr-like accessing endpoints for datasets."""
"""Adds Zarr-like accessing endpoints for datasets"""

name: str = 'zarr'

Expand All @@ -33,7 +34,6 @@ class ZarrPlugin(Plugin):

@hookimpl
def dataset_router(self, deps: Dependencies) -> APIRouter:
"""Returns a router with Zarr-like accessing endpoints for datasets."""
router = APIRouter(
prefix=self.dataset_router_prefix,
tags=list(self.dataset_router_tags),
Expand All @@ -44,7 +44,7 @@ def get_zarr_metadata(
dataset=Depends(deps.dataset),
cache=Depends(deps.cache),
) -> dict:
"""Returns consolidated Zarr metadata."""
"""Consolidated Zarr metadata"""
zvariables = get_zvariables(dataset, cache)
zmetadata = get_zmetadata(dataset, cache, zvariables)

Expand All @@ -57,7 +57,7 @@ def get_zarr_group(
dataset=Depends(deps.dataset),
cache=Depends(deps.cache),
) -> dict:
"""Returns Zarr group data."""
"""Zarr group data"""
zvariables = get_zvariables(dataset, cache)
zmetadata = get_zmetadata(dataset, cache, zvariables)

Expand All @@ -68,7 +68,7 @@ def get_zarr_attrs(
dataset=Depends(deps.dataset),
cache=Depends(deps.cache),
) -> dict:
"""Returns Zarr attributes."""
"""Zarr attributes"""
zvariables = get_zvariables(dataset, cache)
zmetadata = get_zmetadata(dataset, cache, zvariables)

Expand Down
Loading
Loading