diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 9e973f7..387fed2 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -13,7 +13,7 @@ jobs: python-version: ['3.12'] steps: - - name: Check out smap-l2-gridder code + - name: Check out harmony-SMAP-L2-gridding-service code uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -25,7 +25,7 @@ jobs: run: | python -m pip install --upgrade pip pip install mypy - pip install -r pip_requirements.txt -r pip_dev_requirements.txt + pip install -r pip_requirements.txt -r tests/pip_test_requirements.txt - name: Run mypy run: mypy . diff --git a/.gitignore b/.gitignore index eeb8a6e..3909a9e 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ **/__pycache__ +/.coverage +/reports/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0d5379f..4b694fa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,13 +10,10 @@ repos: - id: check-yaml - id: check-added-large-files - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.1 + rev: v0.8.0 hooks: - id: ruff - args: ["--fix", "--show-fixes", "--extend-select", "I"] - - repo: https://github.com/psf/black-pre-commit-mirror - rev: 24.10.0 - hooks: - - id: black-jupyter - args: ["--skip-string-normalization"] - language_version: python3.12 + args: ["--fix", "--show-fixes"] + types_or: [python, jupyter] + - id: ruff-format + types_or: [python, jupyter] diff --git a/CHANGELOG.md b/CHANGELOG.md index 472976c..b9ba90f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,3 +10,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Initial codebase that transforms SPL2SMP_E granules into NetCDF4-CF grids. [#1](https://github.com/nasa/harmony-SMAP-L2-gridding-service/pull/1) +- Code and configuration to wrap gridding logic into a Harmony Service [#3](https://github.com/nasa/harmony-SMAP-L2-gridding-service/pull/3 ) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..a49d08d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,47 @@ +# Contributing to SMAP-L2-Gridding-Service + +Thanks for contributing! + +## Making Changes + +To allow us to incorporate your changes, please use the +[Fork-and-Pull](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/getting-started/about-collaborative-development-models#fork-and-pull-model) +development model: + +1. Fork this repository to your personal account. +2. Create a branch and make your changes. +3. Test the changes locally/in your personal fork. +4. Submit a pull request to open a discussion about your proposed changes. +5. The maintainers will talk with you about it and decide to merge or request + additional changes. + +For larger items, consider contacting the maintainers first to coordinate +development efforts. + +## Commits + +Our ticketing and CI/CD tools are configured to sync statuses amongst each +other. Commits play an important role in this process. Please start all commits +with the Jira ticket number associated with your feature, task, or bug. All +commit messages should follow the format +"[Jira Project]-XXXX - [Your commit message here]" + +## General coding practices: + +This repository adheres to Python coding style recommendations from +[PEP8](https://peps.python.org/pep-0008/). Additionally, type hints are +required in all function signatures. + +When adding or updating functionality, please ensure unit tests are added to +an appropriate module in the `tests` sub directories, which cover each branch +of the code. + +## Disclaimer + +SMAP-L2-Gridding-Service maintainers will review all pull requests submitted. Only requests that +meet the standard of quality set forth by existing code, following the patterns +set forth by existing code, and adhering to existing design patterns will be +considered and/or accepted. + +For general tips on open source contributions, see +[Contributing to Open Source on GitHub](https://guides.github.com/activities/contributing-to-open-source/). diff --git a/README.md b/README.md index 4fb3d73..faa89c6 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,88 @@ -# smap-l2-gridder +# SMAP L2 Gridding Service -This is a python service to transform NASA level 2 Grid trajectory data into gridded NetCDF4-CF output files. +This repository contains the code for the SMAP-L2-Gridding-Service, which is a python service that transforms NASA level 2 gridded trajectory data into gridded NetCDF4-CF output files. +This code currently works on `SPL2SMP_E` data and will be adapted for other SMAP collections of gridded trajectory data. -## Transform Data -To run the regridder you can create an isolated python 3.12 environment installing packages from the `pip_requirements.txt` file. +## Transforming Data -From the commandline run: +The logic of transforming data is contained in the `smap_l2_gridder` directory. It reads NASA L2 Gridded trajectory data and writes output NetCDF-CF files with the trajecotry style data correctly populated into EASE2 grids. + +### Commandline invocation +To run the regridder on an input file. Create an isolated python 3.12 environment using packages from the `pip_requirements.txt` file and then from the commandline run: ```python python -m smap_l2_gridder --input path/to/granule.h5 --output path/to/output_granule.nc ``` +`smap_l2_gridder/__main__.py` is the entrypoint to the science logic module and can be used for testing and development. + +## Directory structure + +``` +📁 +├── CHANGELOG.md +├── CONTRIBUTING.md +├── LICENSE +├── README.md +├── 📁 bin +├── 📁 docker +├── 📁 harmony_service +├── pip_requirements.txt +├── pyproject.toml +├── 📁 smap_l2_gridder +└── 📁 tests +``` + +* `CHANGELOG.md` - Contains a record of changes applied to each new release of the SMAP-L2-Gridding-Service. +* `CONTRIBUTING.md` - Instructions on how to contribute to the repository. +* `LICENSE` - Required for distribution under NASA open-source approval. Details conditions for use, reproduction and distribution. +* `README.md` - This file, containing guidance on developing the library and service. +* `bin` - A directory containing utility scripts to build the service and test images. A script to extract the release notes for the most recent version, as contained in `CHANGELOG.md` is also in this directory. +* `docker` - A directory containing the Dockerfiles for the service and test images. It also contains `service_version.txt`, which contains the semantic version number of the library and service image. Update this file with a new version to trigger a release. +* `harmony_service` - A directory containing the Harmony Service specific python code. `adapter.py` contains the `SMAPL2GridderAdapter` class that is invoked by calls to the Harmony service. +* `pip_requirements.txt` - Contains a list of python packages needed to run the service. +* `pyproject.toml` - Configuration file used by packaging tools, and other tools such as linters, type checkers, etc. +* `smap_l2_gridder` - Python package containing the logic for reformatting L2G data. +* `tests` - Contains the pytest test suite. + + +## Local development + +Local testing of service functionality can be achieved via a local instance of +[Harmony](https://github.com/nasa/harmony) aka Harmony-In-A-Box. Please see instructions there +regarding creation of a local Harmony instance. + +For local development and testing of library modifications or small functions independent of the main Harmony application: + +1. Create a Python virtual environment +1. Install the dependencies in `pip_requirements.txt`, and `tests/pip_test_requirements.txt` +1. Install the pre-commit hooks ([described below](#pre-commit-hooks)). + + +## Tests + +This service utilises the Python `pytest` package to perform unit tests on +classes and functions in the service. After local development is complete, and +test have been updated, they can be run in Docker via: + +```bash +$ ./bin/build-image +$ ./bin/build-test +$ ./bin/run-test +``` + +It is also possible to run the test scripts directly (without docker) by just running the `run_tests.sh` script with a proper python environment. Do note that the `reports` directory will appear in the directory you call the script from. -## pre-commit hooks: +The `tests/run_tests.sh` script will also generate a coverage report, rendered +in HTML, and scan the code with `pylint`. + +Currently, the `pytest` suite is run automatically within a GitHub workflow +as part of a CI/CD pipeline. These tests are run for all changes made in a PR +against the `main` branch. The tests must pass in order to merge the PR. + +## `pre-commit` hooks This repository uses [pre-commit](https://pre-commit.com/) to enable pre-commit checks that enforce coding standard best practices. These include: @@ -35,3 +103,23 @@ pip install pre-commit # Install the git hook scripts: pre-commit install ``` + +## Versioning: + +Docker service images for the `smap_l2_gridder` adhere to [semantic +version](https://semver.org/) numbers: major.minor.patch. + +* Major increments: These are non-backwards compatible API changes. +* Minor increments: These are backwards compatible API changes. +* Patch increments: These updates do not affect the API to the service. + +## CI/CD: + +The CI/CD for SMAP-L2-Gridding-Service is run on github actions with the workflows in the +`.github/workflows` directory: + +* [TODO: complete this section when the above statement is true] + +## Releasing + +* [TODO: complete when implemented] diff --git a/bin/build-image b/bin/build-image new file mode 100755 index 0000000..bd54891 --- /dev/null +++ b/bin/build-image @@ -0,0 +1,27 @@ +#!/bin/bash +############################################################################### +# +# Build a Docker image of the Harmony-SMAP-L2-Gridder service. +# +############################################################################### + +image="ghcr.io/nasa/harmony-smap-l2-gridder" + +# Retrieve the tag from the script arguments, or default to "latest". Bamboo +# will pass the contents of `docker/service_version.txt` as this argument, +# which contains a semantic version number. +# +tag=${1:-latest} + + +# Remove old versions of: ghcr.io/nasa/harmony-smap-l2-gridder and +# ghcr.io/nasa/harmony-smap-l2-gridder-test images +./bin/clean-images + +# Build the image +# This command tags the image with two tags: +# - The one supplied as a script argument. Bamboo will provide the semantic +# version number from `docker/service_version.txt`. +# - "latest", so the test Dockerfile can use the service image as a base image. +# +docker build -t ${image}:${tag} -t ${image}:latest -f docker/service.Dockerfile . diff --git a/bin/build-test b/bin/build-test new file mode 100755 index 0000000..dc063b2 --- /dev/null +++ b/bin/build-test @@ -0,0 +1,23 @@ +#!/bin/bash +############################################################################### +# +# Build a Docker container to run the test suite for the SMAP-L2-Gridding-Service +# +# To build the ghcr.io/nasa/harmony-smap-l2-gridder-test image, the +# ghcr.io/nasa/harmony-smap-l2-gridder image must also be present +# locally, as it is used as the base image in `docker/tests.Dockerfile`. +# +############################################################################### + +image="ghcr.io/nasa/harmony-smap-l2-gridder-test" +tag=${1:-latest} + + +# Look for old version of image and remove +old=$(docker images | grep "$image" | grep "$tag" | awk '{print $3}') +if [ ! -z "$old" ] && [ "$2" != "--no-delete" ]; then + docker rmi "$old" +fi + +# Build the image +docker build -t ${image}:${tag} -f docker/tests.Dockerfile . diff --git a/bin/clean-images b/bin/clean-images new file mode 100755 index 0000000..c30f649 --- /dev/null +++ b/bin/clean-images @@ -0,0 +1,24 @@ +#!/bin/bash +############################################################################### +# +# remove all images containing the string +# "ghcr.io/nasa/harmony-smap-l2-gridder" and remove them. +# This is used for clean-up after development. +# +############################################################################### +remove_image_by_name() { + echo "Removing images for: $1" + matching_images=$(docker images --filter reference="$1" --format="{{.ID}}") + + if [ ! -z "${matching_images}" ]; then + docker rmi "${matching_images}" + fi +} + +image_base_name="ghcr.io/nasa/harmony-smap-l2-gridder" + +# First remove test images: +remove_image_by_name "${image_base_name}-test" + +# Next remove service images: +remove_image_by_name "${image_base_name}" diff --git a/bin/run-test b/bin/run-test new file mode 100755 index 0000000..3036d30 --- /dev/null +++ b/bin/run-test @@ -0,0 +1,24 @@ +#!/bin/bash +############################################################################### +# +# Execute the ghcr.io/nasa/harmony-smap-l2-gridder-test Docker image +# +############################################################################### + +set -ex + +# Remove cached bytecode Python files, to ensure latest code is used +find . | grep -E "(__pycache__|\.pyc|\.pyo$)" | xargs rm -rf + +# Make the directory into which XML format test reports will be saved +mkdir -p reports/test-reports + +# Make the directory into which coverage reports will be saved +mkdir -p reports/coverage + +# Run the tests in a Docker container with mounted volumes for XML report +# output and test coverage reporting +docker run --rm \ + -v $(pwd)/reports/test-reports:/home/reports/test-reports \ + -v $(pwd)/reports/coverage:/home/reports/coverage \ + ghcr.io/nasa/harmony-smap-l2-gridder-test "$@" diff --git a/docker/service.Dockerfile b/docker/service.Dockerfile new file mode 100644 index 0000000..30a520a --- /dev/null +++ b/docker/service.Dockerfile @@ -0,0 +1,29 @@ +############################################################################### +# +# Service image for ghcr.io/nasa/harmony-smap-l2-gridder + +# Harmony-SMAP-L2-Gridder backend service that transforms L2G (gridded +# trajectory) data into actual gridded data. +# +# This image installs dependencies via Pip. The service code is then copied +# into the Docker image. +# +############################################################################### +FROM python:3.12-slim-bookworm + +WORKDIR "/home" + +RUN apt-get update + +# Install Pip dependencies +COPY pip_requirements.txt /home/ + +RUN pip install --no-input --no-cache-dir \ + -r pip_requirements.txt + +# Copy service code. +COPY ./harmony_service harmony_service +COPY ./smap_l2_gridder smap_l2_gridder + +# Configure a container to be executable via the `docker run` command. +ENTRYPOINT ["python", "-m", "harmony_service"] diff --git a/docker/service_version.txt b/docker/service_version.txt new file mode 100644 index 0000000..77d6f4c --- /dev/null +++ b/docker/service_version.txt @@ -0,0 +1 @@ +0.0.0 diff --git a/docker/tests.Dockerfile b/docker/tests.Dockerfile new file mode 100644 index 0000000..d5b572c --- /dev/null +++ b/docker/tests.Dockerfile @@ -0,0 +1,20 @@ +############################################################################### +# +# Test image for the SMAP-L2-Gridding-Service. This test image uses the main +# service image as a base layer for the tests. This ensures that the contents +# of the service image are tested, preventing discrepancies between the service +# and test environments. +############################################################################### +FROM ghcr.io/nasa/harmony-smap-l2-gridder + +# Install additional Pip requirements (for testing) +COPY tests/pip_test_requirements.txt . + +RUN pip install --no-input --no-cache-dir \ + -r pip_test_requirements.txt + +# Copy test directory containing Python unittest suite, test data and utilities +COPY ./tests tests + +# Configure a container to be executable via the `docker run` command. +ENTRYPOINT ["/home/tests/run_tests.sh"] diff --git a/harmony_service/__init__.py b/harmony_service/__init__.py new file mode 100644 index 0000000..c1da372 --- /dev/null +++ b/harmony_service/__init__.py @@ -0,0 +1 @@ +"""Initialize the harmony_service package.""" diff --git a/harmony_service/__main__.py b/harmony_service/__main__.py new file mode 100644 index 0000000..d81129d --- /dev/null +++ b/harmony_service/__main__.py @@ -0,0 +1,26 @@ +"""Run the Harmony-SMAP-L2-Gridder via the Harmony CLI.""" + +from argparse import ArgumentParser +from sys import argv + +from harmony_service_lib import is_harmony_cli, run_cli, setup_cli + +from harmony_service.adapter import SMAPL2GridderAdapter +from harmony_service.exceptions import SERVICE_NAME + + +def main(arguments: list[str]): + """Parse command line arguments and invoke the appropriate method.""" + parser = ArgumentParser(prog=SERVICE_NAME, description='Run SMAP L2 Gridder.') + + setup_cli(parser) + harmony_arguments, _ = parser.parse_known_args(arguments[1:]) + + if is_harmony_cli(harmony_arguments): + run_cli(parser, harmony_arguments, SMAPL2GridderAdapter) + else: + parser.error('Only --harmony CLIs are supported') + + +if __name__ == '__main__': + main(argv) diff --git a/harmony_service/adapter.py b/harmony_service/adapter.py new file mode 100644 index 0000000..36383d2 --- /dev/null +++ b/harmony_service/adapter.py @@ -0,0 +1,78 @@ +"""`HarmonyAdapter` for SMAP-L2-Gridding-Service. + +The class in this file is the top level of abstraction for a service that will +accept an input L2G input granule and transform it into a fully gridded +NetCDF-CF output file + +Currently, the service works with SPL2SMP_E granules. + +""" + +from pathlib import Path +from tempfile import TemporaryDirectory + +from harmony_service_lib import BaseHarmonyAdapter +from harmony_service_lib.message import Source as HarmonySource +from harmony_service_lib.util import download, generate_output_filename, stage +from pystac import Asset, Item + +from smap_l2_gridder.grid import transform_l2g_input + + +class SMAPL2GridderAdapter(BaseHarmonyAdapter): + """Custom adapter for Harmony-SMAP-L2-Gridder Service.""" + + def process_item(self, item: Item, source: HarmonySource) -> Item: + """Process single input STAC item.""" + with TemporaryDirectory() as working_directory: + try: + results = item.clone() + results.assets = {} + + asset = next( + item_asset + for item_asset in item.assets.values() + if 'data' in (item_asset.roles or []) + ) + + # Download the input: + input_filepath = download( + asset.href, + working_directory, + logger=self.logger, + cfg=self.config, + access_token=self.message.accessToken, + ) + + working_filename = Path(input_filepath).parent / 'working_gridded.nc' + + final_target_filename = generate_output_filename( + asset.href, is_regridded=True, ext='.nc' + ) + + transform_l2g_input( + input_filepath, working_filename, logger=self.logger + ) + + # Stage the transformed output: + staged_url = stage( + working_filename, + final_target_filename, + 'application/x-netcdf4', + location=self.message.stagingLocation, + logger=self.logger, + cfg=self.config, + ) + + # Add the asset to the results Item + results.assets['data'] = Asset( + staged_url, + title=Path(staged_url).name, + media_type='application/x-netcdf4', + roles=['data'], + ) + return results + + except Exception as exception: + self.logger.exception(exception) + raise exception diff --git a/harmony_service/exceptions.py b/harmony_service/exceptions.py new file mode 100644 index 0000000..9270688 --- /dev/null +++ b/harmony_service/exceptions.py @@ -0,0 +1,13 @@ +"""Define harmony service errors raised by SMAP-L2-Gridding-Service.""" + +from harmony_service_lib.util import HarmonyException + +SERVICE_NAME = 'Harmony-SMAP-L2-Gridder' + + +class SMAPL2GridderServiceError(HarmonyException): + """Base service exception.""" + + def __init__(self, message=None): + """All service errors are assocated with SERVICE_NAME.""" + super().__init__(message=message, category=SERVICE_NAME) diff --git a/pip_requirements.txt b/pip_requirements.txt index 41f3ccc..44a4028 100644 --- a/pip_requirements.txt +++ b/pip_requirements.txt @@ -1,3 +1,5 @@ +harmony-service-lib==2.3.0 netcdf4==1.7.2 -xarray==2024.10.0 pyproj==3.7.0 +pystac==1.11.0 +xarray==2024.10.0 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5ef65be --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[tool.ruff] +lint.select = [ + "E", # pycodestyle + "F", # pyflakes + "UP", # pyupgrade + "I", # organize imports + "D", # docstyle +] + +[tool.ruff.format] +quote-style = "single" + +[tool.ruff.lint.pydocstyle] +convention = "google" + +[[tool.mypy.overrides]] +module = "harmony_service_lib.*" +ignore_missing_imports = true diff --git a/smap_l2_gridder/__init__.py b/smap_l2_gridder/__init__.py index e69de29..4b98aa0 100644 --- a/smap_l2_gridder/__init__.py +++ b/smap_l2_gridder/__init__.py @@ -0,0 +1 @@ +"""Initialize the Harmony-SMAP-L2-Gridder package.""" diff --git a/smap_l2_gridder/__main__.py b/smap_l2_gridder/__main__.py index d838a58..ac172de 100644 --- a/smap_l2_gridder/__main__.py +++ b/smap_l2_gridder/__main__.py @@ -1,6 +1,7 @@ """Module to allow running from a commandline.""" import argparse +from pathlib import Path from xarray import open_datatree @@ -20,13 +21,13 @@ def main(): try: args = parse_args() with open_datatree(args.input, decode_times=False) as in_data: - process_input(in_data, args.output) + process_input(in_data, Path(args.output)) except Exception as e: - print(f"Error occurred: {e}") + print(f'Error occurred: {e}') raise e print(f'successfully processed {args.input} into {args.output}') return 0 if __name__ == '__main__': - exit(main()) + main() diff --git a/smap_l2_gridder/crs.py b/smap_l2_gridder/crs.py index 6b7ff76..b9de5c4 100644 --- a/smap_l2_gridder/crs.py +++ b/smap_l2_gridder/crs.py @@ -45,14 +45,14 @@ def col_row_to_xy(self, col: int, row: int) -> tuple[np.float64, np.float64]: # files CRS metadata may not match the authoritative value because of the # different varieties of WKT. That said, the CRS created by pyproj is the same. # i.e. -# pyproj.crs.CRS.from_wkt(epsg_6933_wkt).to_wkt() != epsg_6933_wkt +# pyproj.crs.CRS.from_wkt(EPSG_6933_WKT).to_wkt() != EPSG_6933_WKT # but -# pyproj.crs.CRS.from_wkt(pyproj.crs.CRS.from_wkt(epsg_6933_wkt).to_wkt()) -# == pyproj.crs.CRS.from_wkt(epsg_6933_wkt) +# pyproj.crs.CRS.from_wkt(pyproj.crs.CRS.from_wkt(EPSG_6933_WKT).to_wkt()) +# == pyproj.crs.CRS.from_wkt(EPSG_6933_WKT) # NSIDC EASE-Grid 2.0 Global CRS definition # from: https://epsg.org/crs/wkt/id/6933 -epsg_6933_wkt = ( +EPSG_6933_WKT = ( 'PROJCRS["WGS 84 / NSIDC EASE-Grid 2.0 Global",' 'BASEGEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble", ' 'MEMBER["World Geodetic System 1984 (Transit)", ID["EPSG",1166]], ' @@ -82,7 +82,7 @@ def col_row_to_xy(self, col: int, row: int) -> tuple[np.float64, np.float64]: # NSIDC EASE-Grid 2.0 North CRS definition # from: https://epsg.org/crs/wkt/id/6931 -epsg_6931_wkt = ( +EPSG_6931_WKT = ( 'PROJCRS["WGS 84 / NSIDC EASE-Grid 2.0 North",' 'BASEGEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble", ' 'MEMBER["World Geodetic System 1984 (Transit)", ID["EPSG",1166]], ' @@ -113,8 +113,8 @@ def col_row_to_xy(self, col: int, row: int) -> tuple[np.float64, np.float64]: ) GPD_TO_WKT = { - "EASE2_N09km.gpd": epsg_6931_wkt, - "EASE2_M09km.gpd": epsg_6933_wkt, + 'EASE2_N09km.gpd': EPSG_6931_WKT, + 'EASE2_M09km.gpd': EPSG_6933_WKT, } @@ -156,8 +156,7 @@ def convert_value(value: str) -> str | np.float64 | np.long: try: if '.' in value: return np.float64(value) - else: - return np.long(value) + return np.long(value) except ValueError: return value @@ -190,7 +189,6 @@ def parse_gpd_file(gpd_name: str) -> dict: with open(filename, encoding='utf-8') as f: for line in f: - line = line.strip() if not line or line.startswith(';'): continue @@ -223,8 +221,8 @@ def compute_dims(target_info: dict) -> tuple[DataArray, DataArray]: target grid information dictionary. """ - n_cols = target_info["Grid Width"] - n_rows = target_info["Grid Height"] + n_cols = target_info['Grid Width'] + n_rows = target_info['Grid Height'] geotransform = geotransform_from_target_info(target_info) # compute the x,y locations along a column and row diff --git a/smap_l2_gridder/exceptions.py b/smap_l2_gridder/exceptions.py index dd5b7db..539e78c 100644 --- a/smap_l2_gridder/exceptions.py +++ b/smap_l2_gridder/exceptions.py @@ -5,7 +5,7 @@ class SMAPL2GridderError(Exception): """Base error class for exceptions raised by smap_l2_gridder library.""" def __init__(self, message=None): - """All smap-l2-gridder errors have a message field.""" + """All Harmony-SMAP-L2-Gridder errors have a message field.""" self.message = message diff --git a/smap_l2_gridder/grid.py b/smap_l2_gridder/grid.py index 4540a3d..2c57013 100644 --- a/smap_l2_gridder/grid.py +++ b/smap_l2_gridder/grid.py @@ -4,15 +4,28 @@ routines to translate the 1D intput arrays into the EASE grid output format """ -from typing import Iterable +from collections.abc import Iterable +from logging import Logger +from pathlib import Path import numpy as np -from xarray import DataArray, DataTree +from xarray import DataArray, DataTree, open_datatree -from .crs import compute_dims, create_crs, epsg_6931_wkt, epsg_6933_wkt, parse_gpd_file +from .crs import EPSG_6931_WKT, EPSG_6933_WKT, compute_dims, create_crs, parse_gpd_file -def process_input(in_data: DataTree, output_file: str): +def transform_l2g_input( + input_filename: Path, output_filename: Path, logger: Logger +) -> None: + """Entrypoint for L2G-Gridding-Service. + + Opens input and processes the data to a new output file. + """ + with open_datatree(input_filename, decode_times=False) as in_data: + process_input(in_data, output_filename, logger=logger) + + +def process_input(in_data: DataTree, output_file: Path, logger: None | Logger = None): """Process input file to generate gridded output file.""" out_data = DataTree() @@ -22,7 +35,6 @@ def process_input(in_data: DataTree, output_file: str): data_node_names = set(in_data['/'].children) - set(get_metadata_children(in_data)) for node_name in data_node_names: - grid_info = get_grid_information(in_data, node_name) vars_to_grid = get_target_variables(in_data, node_name) @@ -45,7 +57,7 @@ def process_input(in_data: DataTree, output_file: str): def prepare_variable(var: DataTree | DataArray, grid_info: dict) -> DataArray: """Grid and annotate intput variable.""" grid_data = grid_variable(var, grid_info) - grid_data.attrs = {**var.attrs, 'grid_mapping': "crs"} + grid_data.attrs = {**var.attrs, 'grid_mapping': 'crs'} encoding = { '_FillValue': variable_fill_value(var), 'coordinates': var.encoding.get('coordinates', None), @@ -62,13 +74,13 @@ def grid_variable(var: DataTree | DataArray, grid_info: dict) -> DataArray: grid = np.full( (grid_info['target']['Grid Height'], grid_info['target']['Grid Width']), fill_val, - dtype=var.encoding['dtype'], + dtype=(var.encoding.get('dtype', var.dtype)), ) try: valid_mask = ~np.isnan(var.data) except TypeError: # tb_time_utc is type string - valid_mask = var.data != "" + valid_mask = var.data != '' valid_rows = grid_info['src']['rows'].data[valid_mask] valid_cols = grid_info['src']['cols'].data[valid_mask] valid_values = var.data[valid_mask] @@ -93,7 +105,7 @@ def variable_fill_value(var: DataTree | DataArray) -> np.integer | np.floating | if fill_value is None: fill_value = var.attrs.get('missing_value') if fill_value is None: - fill_value = default_fill_value(var.encoding.get('dtype')) + fill_value = default_fill_value(var.encoding.get('dtype', var.dtype)) return fill_value @@ -110,11 +122,12 @@ def default_fill_value(data_type: np.dtype | None) -> np.integer | np.floating | """ if not np.issubdtype(data_type, np.number): return None - elif np.issubdtype(data_type, np.floating): + + if np.issubdtype(data_type, np.floating): return np.dtype(data_type).type(-9999.0) - else: - # np.issubdtype(data_type, np.integer): - return np.dtype(data_type).type(np.iinfo(data_type).max) + + # np.issubdtype(data_type, np.integer): + return np.dtype(data_type).type(np.iinfo(data_type).max) def get_target_variables(in_data: DataTree, node: str) -> Iterable[str]: @@ -150,16 +163,13 @@ def get_grid_information(in_data: DataTree, node: str) -> dict: def get_target_grid_information(node: str) -> dict: - """Return the target grid informaton. - - TODO [MHS, 11/13/2024] This might be in the wrong file. - """ + """Return the target grid informaton.""" if is_polar_node(node): gpd_name = 'EASE2_N09km.gpd' - wkt = epsg_6931_wkt + wkt = EPSG_6931_WKT else: gpd_name = 'EASE2_M09km.gpd' - wkt = epsg_6933_wkt + wkt = EPSG_6933_WKT target_grid_info = parse_gpd_file(gpd_name) target_grid_info['wkt'] = wkt diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..fc4c97e --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Initialize tests package.""" diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..524cfb9 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,136 @@ +"""Set up common pytest fixtures.""" + +from datetime import datetime + +import numpy as np +import pytest +from harmony_service_lib.util import bbox_to_geometry +from pystac import Asset, Catalog, Item +from xarray import DataArray, DataTree, open_datatree + + +# Fixtures +@pytest.fixture +def sample_datatree_file(tmp_path) -> str: + """Create a sample DataTree file for testing. + + A sample DataTree is created + + The test data is repeated for both global and polar nodes. + + The tree is written to disk and the filename is returned. + """ + dt = DataTree() + dt['Metadata/Lineage/DEMSLP'] = DataTree() + dt['Metadata/Lineage/DEMSLP'].attrs['Description'] = ( + 'Representative surface slope data for each of the 9 km cells' + ) + + nodes = ['Soil_Moisture_Retrieval_Data', 'Soil_Moisture_Retrieval_Data_Polar'] + for node in nodes: + dt[f'{node}'] = DataTree() + dt[f'{node}/EASE_column_index'] = DataArray( + data=np.array([1175, 1175, 1175, 1175, 1175], dtype=np.uint16), + dims=['phony_dim_0'], + name='EASE_column_index', + attrs={ + 'long_name': 'The column index of the 9 km EASE grid cell...', + 'valid_min': 0, + 'valid_max': 3855, + '_FillValue': np.uint16(65534), + }, + ) + + dt[f'{node}/EASE_row_index'] = DataArray( + data=np.array([1603, 1604, 1605, 1606, 1607], dtype=np.uint16), + dims=['phony_dim_0'], + attrs={ + 'long_name': 'The row index of the 9 km EASE grid cell...', + 'valid_min': 0, + 'valid_max': 1623, + '_FillValue': np.uint16(65534), + }, + ) + + dt[f'{node}/albedo'] = DataArray( + data=np.array( + [0.0009434, 0.00136986, 0.0025, 0.0, -9999.0], dtype=np.float32 + ), + dims=['phony_dim_0'], + attrs={ + 'long_name': 'Diffuse reflecting power of the Earth's...', + 'valid_min': 0.0, + 'valid_max': 1.0, + '_FillValue': np.float32(-9999.0), + }, + ) + + # This part of the fixture REALLY slow when running tests. Using it + # adds 7 seconds(!!) to the run time. Really there is only one useful + # test that needs this. I tested this function outside of pytest and + # it's not acting like a bottle neck so I'm not sure what to do. But + # for now I'll remove this and use mark to skip the test. + + # dt[f'{node}/tb_time_utc'] = DataArray( + # data=np.array( + # [ + # '2024-11-06T03:59:27.313Z', + # '2024-11-06T03:59:25.754Z', + # '2024-11-06T03:59:24.374Z', + # '2024-11-06T03:59:22.735Z', + # '2024-11-06T03:59:21.191Z', + # ], + # dtype=' DataTree: + """A sample datatree fixture is generated, opened and returned. + + This approximates the expected shape of an SPL2SMP_E granule. + """ + dt2 = open_datatree(sample_datatree_file, decode_times=False) + return dt2 + + +@pytest.fixture +def sample_stac() -> Catalog: + """Creates a generic SpatioTemporal Asset Catalog (STAC). + + Used as a valid input for SMAPL2GridderAdapter initialization + + For simplicity the geometric and temporal properties of each item are + set to default values. + + """ + catalog = Catalog(id='input catalog', description='test input') + + item = Item( + id='input granule', + bbox=[-180, -90, 180, 90], + geometry=bbox_to_geometry([-180, -90, 180, 90]), + datetime=datetime(2020, 1, 1), + properties={'props': 'None'}, + ) + + item.add_asset( + 'input data', + Asset( + 'https://www.example.com/input.h5', + media_type='application/x-hdf', + roles=['data'], + ), + ) + catalog.add_item(item) + + return catalog diff --git a/pip_dev_requirements.txt b/tests/pip_test_requirements.txt similarity index 59% rename from pip_dev_requirements.txt rename to tests/pip_test_requirements.txt index 8b1a6ab..40971db 100644 --- a/pip_dev_requirements.txt +++ b/tests/pip_test_requirements.txt @@ -1,3 +1,5 @@ -pytest==8.3.3 -pytest-mock==3.14.0 mypy==1.13.0 +pylint==3.3.1 +pytest-cov==6.0.0 +pytest-mock==3.14.0 +pytest==8.3.3 diff --git a/tests/run_tests.sh b/tests/run_tests.sh new file mode 100755 index 0000000..0c29666 --- /dev/null +++ b/tests/run_tests.sh @@ -0,0 +1,27 @@ +#!/bin/sh +############################################################################### +# +# A script invoked by the test Dockerfile to run the Python `unittest` suite +# for the SMAP-L2-Gridding-Service. The script first runs the test suite, +# then it checks for linting errors. +############################################################################### + +# Exit status used to report back to caller +STATUS=0 + +# Run the standard set of unit tests, producing JUnit compatible output +pytest --cov=smap_l2_gridder --cov=harmony_service \ + --cov-report=html:reports/coverage \ + --cov-report term \ + --junitxml=reports/test-reports/test-results-"$(date +'%Y%m%d%H%M%S')".xml || STATUS=1 + +# Run pylint +pylint smap_l2_gridder harmony_service --disable=W1203 +RESULT=$((3 & $?)) + +if [ "$RESULT" -ne "0" ]; then + STATUS=1 + echo "ERROR: pylint generated errors" +fi + +exit $STATUS diff --git a/tests/test_service/__init__.py b/tests/test_service/__init__.py new file mode 100644 index 0000000..02ebdf8 --- /dev/null +++ b/tests/test_service/__init__.py @@ -0,0 +1 @@ +"""Initialize service tests package.""" diff --git a/tests/test_service/test_adapter.py b/tests/test_service/test_adapter.py new file mode 100644 index 0000000..8a3dc81 --- /dev/null +++ b/tests/test_service/test_adapter.py @@ -0,0 +1,153 @@ +"""End-to-end tests of the SMAP-L2-Gridding-Service.""" + +import pytest +from harmony_service_lib.message import Message as HarmonyMessage +from harmony_service_lib.util import config +from xarray import open_datatree + +from harmony_service.adapter import SMAPL2GridderAdapter +from smap_l2_gridder.exceptions import InvalidGPDError + + +def test_process_sample_file(tmp_path, sample_datatree_file, sample_stac, mocker): + """Run a sample file through the adapter.""" + # override the adapter's working dir + temp_dir_mock = mocker.patch('harmony_service.adapter.TemporaryDirectory') + temp_dir_mock.return_value.__enter__.return_value = tmp_path + + # use a datatree fixture as the downloaded file + download_mock = mocker.patch('harmony_service.adapter.download') + download_mock.return_value = sample_datatree_file + + # set the output filename + filename_mock = mocker.patch('harmony_service.adapter.generate_output_filename') + output_filename = tmp_path / 'test-gridded.nc' + filename_mock.return_value = output_filename + + stage_mock = mocker.patch('harmony_service.adapter.stage') + stage_mock.return_value = 's3://bucketname/staged-location' + staging_dir = tmp_path / 'staging' + + asset_mock = mocker.patch('harmony_service.adapter.Asset') + + message = HarmonyMessage( + { + 'accessToken': 'fake_token', + 'callback': 'https://example.com/', + 'sources': [{'collection': 'C1234-EEDTEST', 'shortName': 'smap_test'}], + 'stagingLocation': staging_dir, + 'user': 'fakeUsername', + 'format': {'mime': 'application/x-netcdf4'}, + } + ) + + # Set up Adapter class + the_config = config(validate=False) + smap_l2_gridding_service = SMAPL2GridderAdapter( + message, config=the_config, catalog=sample_stac + ) + + # Invoke the adapter. + smap_l2_gridding_service.invoke() + + asset_href = sample_stac.get_item('input granule').assets['input data'].href + + download_mock.assert_called_once_with( + asset_href, + tmp_path, + logger=mocker.ANY, + cfg=the_config, + access_token=message.accessToken, + ) + + stage_mock.assert_called_once_with( + tmp_path / 'working_gridded.nc', + output_filename, + 'application/x-netcdf4', + location=message.stagingLocation, + logger=mocker.ANY, + cfg=mocker.ANY, + ) + + asset_mock.assert_called_once_with( + 's3://bucketname/staged-location', + title=mocker.ANY, + media_type='application/x-netcdf4', + roles=['data'], + ) + + # Validate the gridded output data + results = open_datatree(tmp_path / 'working_gridded.nc') + assert set(results.groups) == set( + ( + '/', + '/Metadata', + '/Soil_Moisture_Retrieval_Data', + '/Soil_Moisture_Retrieval_Data_Polar', + '/Metadata/Lineage', + '/Metadata/Lineage/DEMSLP', + ) + ) + assert results['Soil_Moisture_Retrieval_Data'].coords['x-dim'].name == 'x-dim' + assert len(results['Soil_Moisture_Retrieval_Data'].coords['x-dim']) == 3856 + assert ( + results['Soil_Moisture_Retrieval_Data'].crs.attrs['projected_crs_name'] + == 'WGS 84 / NSIDC EASE-Grid 2.0 Global' + ) + assert ( + results['Soil_Moisture_Retrieval_Data_Polar'].crs.attrs['projected_crs_name'] + == 'WGS 84 / NSIDC EASE-Grid 2.0 North' + ) + assert set(results['Soil_Moisture_Retrieval_Data_Polar'].variables) == set( + [ + 'EASE_column_index', + 'EASE_row_index', + 'albedo', + 'crs', + #'tb_time_utc', + 'x-dim', + 'y-dim', + ] + ) + assert set(results['Soil_Moisture_Retrieval_Data'].variables) == set( + [ + 'EASE_column_index', + 'EASE_row_index', + 'albedo', + 'crs', + #'tb_time_utc', + 'x-dim', + 'y-dim', + ] + ) + + +def test_process_sample_file_failure( + tmp_path, sample_stac, sample_datatree_file, mocker +): + """Test failure.""" + # use a datatree fixture as the downloaded file + download_mock = mocker.patch('harmony_service.adapter.download') + download_mock.return_value = sample_datatree_file + mocker.patch('harmony_service.adapter.generate_output_filename') + mocker.patch('harmony_service.adapter.stage') + + get_grid_info_mock = mocker.patch('smap_l2_gridder.grid.get_grid_information') + get_grid_info_mock.side_effect = InvalidGPDError('invalid gpd') + + message = HarmonyMessage( + { + 'sources': [{'collection': 'C1234-EEDTEST'}], + } + ) + + # Set up Adapter class + smap_l2_gridding_service = SMAPL2GridderAdapter( + message, config=config(validate=False), catalog=sample_stac + ) + + # Invoke the adapter. + with pytest.raises(InvalidGPDError) as error_info: + smap_l2_gridding_service.invoke() + + assert 'invalid gpd' in str(error_info.value) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..8624935 --- /dev/null +++ b/tests/unit/__init__.py @@ -0,0 +1 @@ +"""Initialize unit tests package.""" diff --git a/tests/unit/test_crs.py b/tests/unit/test_crs.py index f3b1a73..0b42cce 100644 --- a/tests/unit/test_crs.py +++ b/tests/unit/test_crs.py @@ -8,11 +8,11 @@ from xarray import DataArray from smap_l2_gridder.crs import ( + EPSG_6933_WKT, Geotransform, compute_dims, convert_value, create_crs, - epsg_6933_wkt, geotransform_from_target_info, parse_gpd_file, validate_gpd_style, @@ -32,7 +32,7 @@ 'Grid Height': 10, 'Grid Map Origin Column': -0.5, 'Grid Map Origin Row': -0.5, - 'wkt': epsg_6933_wkt, + 'wkt': EPSG_6933_WKT, } @@ -93,7 +93,7 @@ def test_parse_gpd_file(tmp_path): """ ).strip() - gpd_file = tmp_path / "test.gpd" + gpd_file = tmp_path / 'test.gpd' gpd_file.write_text(gpd_content) result = parse_gpd_file(str(gpd_file)) diff --git a/tests/unit/test_grid.py b/tests/unit/test_grid.py index 98c30bb..2392ec0 100644 --- a/tests/unit/test_grid.py +++ b/tests/unit/test_grid.py @@ -19,85 +19,6 @@ ) -# Fixtures -@pytest.fixture -def sample_datatree(tmp_path): - """Create a sample DataTree for testing. - - It is round tripped to a temporary disk location for easy of setting the - correct NetCDF attributes. - - This represents the expected shape of an SPL2SMP_E granule. - The data is repeated for both global and polar nodes. - - """ - dt = DataTree() - dt["Metadata/Lineage/DEMSLP"] = DataTree() - dt["Metadata/Lineage/DEMSLP"].attrs[ - "Description" - ] = "Representative surface slope data for each of the 9 km cells" - - nodes = ["Soil_Moisture_Retrieval_Data", "Soil_Moisture_Retrieval_Data_Polar"] - for node in nodes: - dt[f"{node}"] = DataTree() - dt[f"{node}/EASE_column_index"] = DataArray( - data=np.array([1175, 1175, 1175, 1175, 1175], dtype=np.uint16), - dims=["phony_dim_0"], - name="EASE_column_index", - attrs={ - "long_name": "The column index of the 9 km EASE grid cell...", - "valid_min": 0, - "valid_max": 3855, - "_FillValue": np.uint16(65534), - }, - ) - - dt[f"{node}/EASE_row_index"] = DataArray( - data=np.array([1603, 1604, 1605, 1606, 1607], dtype=np.uint16), - dims=["phony_dim_0"], - attrs={ - "long_name": "The row index of the 9 km EASE grid cell...", - "valid_min": 0, - "valid_max": 1623, - "_FillValue": np.uint16(65534), - }, - ) - - dt[f'{node}/albedo'] = DataArray( - data=np.array( - [0.0009434, 0.00136986, 0.0025, 0.0, -9999.0], dtype=np.float32 - ), - dims=['phony_dim_0'], - attrs={ - 'long_name': 'Diffuse reflecting power of the Earth's...', - 'valid_min': 0.0, - 'valid_max': 1.0, - '_FillValue': np.float32(-9999.0), - }, - ) - - dt[f'{node}/tb_time_utc'] = DataArray( - data=np.array( - [ - '2024-11-06T03:59:27.313Z', - '2024-11-06T03:59:25.754Z', - '2024-11-06T03:59:24.374Z', - '2024-11-06T03:59:22.735Z', - '2024-11-06T03:59:21.191Z', - ], - dtype='