From b4eb1fbc69f06e69772be92d3d842952de547237 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 20 Nov 2024 11:23:59 -0700 Subject: [PATCH 01/26] DAS-2254: Add __init__.py files to get tests working with pytest --- tests/__init__.py | 0 tests/unit/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/unit/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 From 2b31be6f9c5a1b5c6b7689f769360defc405fd16 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Fri, 22 Nov 2024 09:29:01 -0700 Subject: [PATCH 02/26] DAS-2254: Adds HarmonyAdapter and harmony_service. --- harmony_service/__init__.py | 1 + harmony_service/__main__.py | 26 ++++++++++++ harmony_service/adapter.py | 78 +++++++++++++++++++++++++++++++++++ harmony_service/exceptions.py | 13 ++++++ pip_requirements.txt | 4 +- smap_l2_gridder/__init__.py | 1 + smap_l2_gridder/__main__.py | 3 +- smap_l2_gridder/grid.py | 26 ++++++++---- tests/unit/test_grid.py | 42 +++++++++++-------- 9 files changed, 167 insertions(+), 27 deletions(-) create mode 100644 harmony_service/__init__.py create mode 100644 harmony_service/__main__.py create mode 100644 harmony_service/adapter.py create mode 100644 harmony_service/exceptions.py diff --git a/harmony_service/__init__.py b/harmony_service/__init__.py new file mode 100644 index 0000000..c1da372 --- /dev/null +++ b/harmony_service/__init__.py @@ -0,0 +1 @@ +"""Initialize the harmony_service package.""" diff --git a/harmony_service/__main__.py b/harmony_service/__main__.py new file mode 100644 index 0000000..0fdc1fa --- /dev/null +++ b/harmony_service/__main__.py @@ -0,0 +1,26 @@ +"""Run the SMAP-L2-Gridder via the Harmony CLI.""" + +from argparse import ArgumentParser +from sys import argv + +from harmony_service_lib import is_harmony_cli, run_cli, setup_cli + +from harmony_service.adapter import SMAPL2GridderAdapter +from harmony_service.exceptions import SERVICE_NAME + + +def main(arguments: list[str]): + """Parse command line arguments and invoke the appropriate method.""" + parser = ArgumentParser(prog=SERVICE_NAME, description='Run SMAP L2 Gridder.') + + setup_cli(parser) + harmony_arguments, _ = parser.parse_known_args(arguments[1:]) + + if is_harmony_cli(harmony_arguments): + run_cli(parser, harmony_arguments, SMAPL2GridderAdapter) + else: + parser.error('Only --harmony CLIs are supported') + + +if __name__ == '__main__': + main(argv) diff --git a/harmony_service/adapter.py b/harmony_service/adapter.py new file mode 100644 index 0000000..35754c4 --- /dev/null +++ b/harmony_service/adapter.py @@ -0,0 +1,78 @@ +"""`HarmonyAdapter` for SMAP-L2-Gridding-Service. + +The class in this file is the top level of abstraction for a service that will +accept an input L2G input granule and transform it into a fully gridded +NetCDF-CF output file + +Currently, the service works with SPL2SMP_E granules. + +""" + +from pathlib import Path +from tempfile import TemporaryDirectory + +from harmony_service_lib import BaseHarmonyAdapter +from harmony_service_lib.message import Source as HarmonySource +from harmony_service_lib.util import download, generate_output_filename, stage +from pystac import Asset, Item + +from smap_l2_gridder.grid import transform_l2g_input + + +class SMAPL2GridderAdapter(BaseHarmonyAdapter): + """Custom adapter for SMAP-L2-Gridder Service.""" + + def process_item(self, item: Item, source: HarmonySource) -> Item: + """Process single input STAC item.""" + with TemporaryDirectory() as working_directory: + try: + results = item.clone() + results.assets = {} + + asset = next( + item_asset + for item_asset in item.assets.values() + if 'data' in (item_asset.roles or []) + ) + + # Download the input: + input_filepath = download( + asset.href, + working_directory, + logger=self.logger, + cfg=self.config, + access_token=self.message.accessToken, + ) + + working_filename = Path(input_filepath).parent / 'working_gridded.nc' + + final_target_gridfile = generate_output_filename( + asset.href, is_regridded=True + ) + + transform_l2g_input( + input_filepath, working_filename, logger=self.logger + ) + + # Stage the transformed output: + staged_url = stage( + working_filename, + final_target_gridfile, + 'application/x-netcdf4', + location=self.message.stagingLocation, + logger=self.logger, + cfg=self.config, + ) + + # Add the asset to the results Item + results.assets['data'] = Asset( + staged_url, + title=Path(staged_url).name, + media_type='application/x-netcdf4', + roles=['data'], + ) + return results + + except Exception as exception: + self.logger.exception(exception) + raise exception diff --git a/harmony_service/exceptions.py b/harmony_service/exceptions.py new file mode 100644 index 0000000..3bba894 --- /dev/null +++ b/harmony_service/exceptions.py @@ -0,0 +1,13 @@ +"""Define harmony service errors raised by SMAP-L2-Gridding-Service.""" + +from harmony_service_lib.util import HarmonyException + +SERVICE_NAME = 'SMAP-L2-Gridder' + + +class SMAPL2GridderServiceError(HarmonyException): + """Base service exception.""" + + def __init__(self, message=None): + """All service errors are assocated with SERVICE_NAME.""" + super().__init__(message=message, category=SERVICE_NAME) diff --git a/pip_requirements.txt b/pip_requirements.txt index 41f3ccc..4b2c51a 100644 --- a/pip_requirements.txt +++ b/pip_requirements.txt @@ -1,3 +1,5 @@ +harmony-service-lib==2.3.0 netcdf4==1.7.2 -xarray==2024.10.0 pyproj==3.7.0 +pystac~=1.11.0 +xarray==2024.10.0 diff --git a/smap_l2_gridder/__init__.py b/smap_l2_gridder/__init__.py index e69de29..8cacfd4 100644 --- a/smap_l2_gridder/__init__.py +++ b/smap_l2_gridder/__init__.py @@ -0,0 +1 @@ +"""Initialize the SMAP-L2-Gridder package.""" diff --git a/smap_l2_gridder/__main__.py b/smap_l2_gridder/__main__.py index d838a58..a195435 100644 --- a/smap_l2_gridder/__main__.py +++ b/smap_l2_gridder/__main__.py @@ -1,6 +1,7 @@ """Module to allow running from a commandline.""" import argparse +from pathlib import Path from xarray import open_datatree @@ -20,7 +21,7 @@ def main(): try: args = parse_args() with open_datatree(args.input, decode_times=False) as in_data: - process_input(in_data, args.output) + process_input(in_data, Path(args.output)) except Exception as e: print(f"Error occurred: {e}") raise e diff --git a/smap_l2_gridder/grid.py b/smap_l2_gridder/grid.py index 4540a3d..f4832da 100644 --- a/smap_l2_gridder/grid.py +++ b/smap_l2_gridder/grid.py @@ -4,15 +4,28 @@ routines to translate the 1D intput arrays into the EASE grid output format """ +from logging import Logger +from pathlib import Path from typing import Iterable import numpy as np -from xarray import DataArray, DataTree +from xarray import DataArray, DataTree, open_datatree from .crs import compute_dims, create_crs, epsg_6931_wkt, epsg_6933_wkt, parse_gpd_file -def process_input(in_data: DataTree, output_file: str): +def transform_l2g_input( + input_filename: Path, output_filename: Path, logger: Logger +) -> None: + """Entrypoint for L2G-Gridding-Service. + + Opens input and processes the data to a new output_file. + """ + with open_datatree(input_filename, decode_times=False) as in_data: + process_input(in_data, output_filename, logger=logger) + + +def process_input(in_data: DataTree, output_file: Path, logger: None | Logger = None): """Process input file to generate gridded output file.""" out_data = DataTree() @@ -62,7 +75,7 @@ def grid_variable(var: DataTree | DataArray, grid_info: dict) -> DataArray: grid = np.full( (grid_info['target']['Grid Height'], grid_info['target']['Grid Width']), fill_val, - dtype=var.encoding['dtype'], + dtype=(var.encoding.get('dtype', var.dtype)), ) try: valid_mask = ~np.isnan(var.data) @@ -93,7 +106,7 @@ def variable_fill_value(var: DataTree | DataArray) -> np.integer | np.floating | if fill_value is None: fill_value = var.attrs.get('missing_value') if fill_value is None: - fill_value = default_fill_value(var.encoding.get('dtype')) + fill_value = default_fill_value(var.encoding.get('dtype', var.dtype)) return fill_value @@ -150,10 +163,7 @@ def get_grid_information(in_data: DataTree, node: str) -> dict: def get_target_grid_information(node: str) -> dict: - """Return the target grid informaton. - - TODO [MHS, 11/13/2024] This might be in the wrong file. - """ + """Return the target grid informaton.""" if is_polar_node(node): gpd_name = 'EASE2_N09km.gpd' wkt = epsg_6931_wkt diff --git a/tests/unit/test_grid.py b/tests/unit/test_grid.py index 98c30bb..1ef3542 100644 --- a/tests/unit/test_grid.py +++ b/tests/unit/test_grid.py @@ -76,25 +76,32 @@ def sample_datatree(tmp_path): }, ) - dt[f'{node}/tb_time_utc'] = DataArray( - data=np.array( - [ - '2024-11-06T03:59:27.313Z', - '2024-11-06T03:59:25.754Z', - '2024-11-06T03:59:24.374Z', - '2024-11-06T03:59:22.735Z', - '2024-11-06T03:59:21.191Z', - ], - dtype=' Date: Fri, 22 Nov 2024 11:54:40 -0700 Subject: [PATCH 03/26] DAS-2254: Add Dockerization around the service. --- .gitignore | 2 ++ bin/build-image | 27 ++++++++++++++++ bin/build-test | 23 +++++++++++++ bin/clean-images | 24 ++++++++++++++ bin/run-test | 24 ++++++++++++++ docker/service.Dockerfile | 29 +++++++++++++++++ docker/service_version.txt | 1 + docker/tests.Dockerfile | 20 ++++++++++++ .../pip_test_requirements.txt | 1 + tests/run_tests.sh | 32 +++++++++++++++++++ 10 files changed, 183 insertions(+) create mode 100755 bin/build-image create mode 100755 bin/build-test create mode 100755 bin/clean-images create mode 100755 bin/run-test create mode 100644 docker/service.Dockerfile create mode 100644 docker/service_version.txt create mode 100644 docker/tests.Dockerfile rename pip_dev_requirements.txt => tests/pip_test_requirements.txt (77%) create mode 100755 tests/run_tests.sh diff --git a/.gitignore b/.gitignore index eeb8a6e..46398e3 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ **/__pycache__ +/tests/reports/ +/.coverage diff --git a/bin/build-image b/bin/build-image new file mode 100755 index 0000000..d0998b8 --- /dev/null +++ b/bin/build-image @@ -0,0 +1,27 @@ +#!/bin/bash +############################################################################### +# +# Build a Docker image of the SMAP-L2-Gridder service. +# +############################################################################### + +image="ghcr.io/nasa/smap-l2-gridder" + +# Retrieve the tag from the script arguments, or default to "latest". Bamboo +# will pass the contents of `docker/service_version.txt` as this argument, +# which contains a semantic version number. +# +tag=${1:-latest} + + +# Remove old versions of: ghcr.io/nasa/smap-l2-gridder and +# ghcr.io/nasa/smap-l2-gridder-test images +./bin/clean-images + +# Build the image +# This command tags the image with two tags: +# - The one supplied as a script argument. Bamboo will provide the semantic +# version number from `docker/service_version.txt`. +# - "latest", so the test Dockerfile can use the service image as a base image. +# +docker build -t ${image}:${tag} -t ${image}:latest -f docker/service.Dockerfile . diff --git a/bin/build-test b/bin/build-test new file mode 100755 index 0000000..f8539da --- /dev/null +++ b/bin/build-test @@ -0,0 +1,23 @@ +#!/bin/bash +############################################################################### +# +# Build a Docker container to run the test suite for the SMAP-L2-Gridding-Service +# +# To build the ghcr.io/nasa/smap-l2-gridder-test image, the +# ghcr.io/nasa/smap-l2-gridder image must also be present +# locally, as it is used as the base image in `docker/tests.Dockerfile`. +# +############################################################################### + +image="ghcr.io/nasa/smap-l2-gridder-test" +tag=${1:-latest} + + +# Look for old version of image and remove +old=$(docker images | grep "$image" | grep "$tag" | awk '{print $3}') +if [ ! -z "$old" ] && [ "$2" != "--no-delete" ]; then + docker rmi "$old" +fi + +# Build the image +docker build -t ${image}:${tag} -f docker/tests.Dockerfile . diff --git a/bin/clean-images b/bin/clean-images new file mode 100755 index 0000000..2b2a121 --- /dev/null +++ b/bin/clean-images @@ -0,0 +1,24 @@ +#!/bin/bash +############################################################################### +# +# remove all images containing the string +# "ghcr.io/nasa/smap-l2-gridder" and remove them. +# This is used for clean-up after development. +# +############################################################################### +remove_image_by_name() { + echo "Removing images for: $1" + matching_images=$(docker images --filter reference="$1" --format="{{.ID}}") + + if [ ! -z "${matching_images}" ]; then + docker rmi "${matching_images}" + fi +} + +image_base_name="ghcr.io/nasa/smap-l2-gridder" + +# First remove test images: +remove_image_by_name "${image_base_name}-test" + +# Next remove service images: +remove_image_by_name "${image_base_name}" diff --git a/bin/run-test b/bin/run-test new file mode 100755 index 0000000..3ff4759 --- /dev/null +++ b/bin/run-test @@ -0,0 +1,24 @@ +#!/bin/bash +############################################################################### +# +# Execute the ghcr.io/nasa/smap-l2-gridder-test Docker image +# +############################################################################### + +set -ex + +# Remove cached bytecode Python files, to ensure latest code is used +find . | grep -E "(__pycache__|\.pyc|\.pyo$)" | xargs rm -rf + +# Make the directory into which XML format test reports will be saved +mkdir -p test-reports + +# Make the directory into which coverage reports will be saved +mkdir -p coverage + +# Run the tests in a Docker container with mounted volumes for XML report +# output and test coverage reporting +docker run --rm \ + -v $(pwd)/test-reports:/home/tests/reports \ + -v $(pwd)/coverage:/home/tests/coverage \ + ghcr.io/nasa/smap-l2-gridder-test "$@" diff --git a/docker/service.Dockerfile b/docker/service.Dockerfile new file mode 100644 index 0000000..4210364 --- /dev/null +++ b/docker/service.Dockerfile @@ -0,0 +1,29 @@ +############################################################################### +# +# Service image for ghcr.io/nasa/harmony-smap-l2-gridder + +# SMAP-L2-Gridder backend service that transforms L2G (gridded trajectory) data +# into actual gridded data. +# +# This image installs dependencies via Pip. The service code is then copied +# into the Docker image. +# +############################################################################### +FROM python:3.12-slim-bookworm + +WORKDIR "/home" + +RUN apt-get update + +# Install Pip dependencies +COPY pip_requirements.txt /home/ + +RUN pip install --no-input --no-cache-dir \ + -r pip_requirements.txt + +# Copy service code. +COPY ./harmony_service harmony_service +COPY ./smap_l2_gridder smap_l2_gridder + +# Configure a container to be executable via the `docker run` command. +ENTRYPOINT ["python", "-m", "harmony_service"] diff --git a/docker/service_version.txt b/docker/service_version.txt new file mode 100644 index 0000000..77d6f4c --- /dev/null +++ b/docker/service_version.txt @@ -0,0 +1 @@ +0.0.0 diff --git a/docker/tests.Dockerfile b/docker/tests.Dockerfile new file mode 100644 index 0000000..b3025ee --- /dev/null +++ b/docker/tests.Dockerfile @@ -0,0 +1,20 @@ +############################################################################### +# +# Test image for the SMAP-L2-Gridding-Service. This test image uses the main +# service image as a base layer for the tests. This ensures that the contents +# of the service image are tested, preventing discrepancies between the service +# and test environments. +############################################################################### +FROM ghcr.io/nasa/smap-l2-gridder + +# Install additional Pip requirements (for testing) +COPY tests/pip_test_requirements.txt . + +RUN pip install --no-input --no-cache-dir \ + -r pip_test_requirements.txt + +# Copy test directory containing Python unittest suite, test data and utilities +COPY ./tests tests + +# Configure a container to be executable via the `docker run` command. +ENTRYPOINT ["/home/tests/run_tests.sh"] diff --git a/pip_dev_requirements.txt b/tests/pip_test_requirements.txt similarity index 77% rename from pip_dev_requirements.txt rename to tests/pip_test_requirements.txt index 8b1a6ab..51e4a0d 100644 --- a/pip_dev_requirements.txt +++ b/tests/pip_test_requirements.txt @@ -1,3 +1,4 @@ pytest==8.3.3 pytest-mock==3.14.0 mypy==1.13.0 +pylint==3.3.1 diff --git a/tests/run_tests.sh b/tests/run_tests.sh new file mode 100755 index 0000000..748f7f0 --- /dev/null +++ b/tests/run_tests.sh @@ -0,0 +1,32 @@ +#!/bin/sh +############################################################################### +# +# A script invoked by the test Dockerfile to run the Python `unittest` suite +# for the SMAP-L2-Gridding-Service. The script first runs the test suite, +# then it checks for linting errors. +############################################################################### + +# Exit status used to report back to caller +STATUS=0 + +# Run the standard set of unit tests, producing JUnit compatible output +pytest --cov=smap_l2_gridder --cov=harmony_service \ + --cov-report=html:coverage/reports \ + --junitxml=tests/reports/test-results-"$(date +'%Y%m%d%H%M%S')".xml || STATUS=1 + + +# Run pylint +# Ignored errors/warnings: +# W1203 - use of f-strings in log statements. This warning is leftover from +# using ''.format() vs % notation. For more information, see: +# https://github.com/PyCQA/pylint/issues/2354#issuecomment-414526879 +pylint smap_l2_gridder harmony_service --disable=W1203 +RESULT=$? +RESULT=$((3 & $RESULT)) + +if [ "$RESULT" -ne "0" ]; then + STATUS=1 + echo "ERROR: pylint generated errors" +fi + +exit $STATUS From 07ac89eb1a7ebbb2b58189836f54b52d1aa5bdac Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Fri, 22 Nov 2024 16:28:31 -0700 Subject: [PATCH 04/26] DAS-2254: pylint fixes --- smap_l2_gridder/__main__.py | 2 +- smap_l2_gridder/crs.py | 17 ++++++++--------- smap_l2_gridder/grid.py | 18 ++++++++++-------- tests/run_tests.sh | 8 +------- tests/unit/test_crs.py | 4 ++-- 5 files changed, 22 insertions(+), 27 deletions(-) diff --git a/smap_l2_gridder/__main__.py b/smap_l2_gridder/__main__.py index a195435..737210a 100644 --- a/smap_l2_gridder/__main__.py +++ b/smap_l2_gridder/__main__.py @@ -30,4 +30,4 @@ def main(): if __name__ == '__main__': - exit(main()) + main() diff --git a/smap_l2_gridder/crs.py b/smap_l2_gridder/crs.py index 6b7ff76..5d9a498 100644 --- a/smap_l2_gridder/crs.py +++ b/smap_l2_gridder/crs.py @@ -45,14 +45,14 @@ def col_row_to_xy(self, col: int, row: int) -> tuple[np.float64, np.float64]: # files CRS metadata may not match the authoritative value because of the # different varieties of WKT. That said, the CRS created by pyproj is the same. # i.e. -# pyproj.crs.CRS.from_wkt(epsg_6933_wkt).to_wkt() != epsg_6933_wkt +# pyproj.crs.CRS.from_wkt(EPSG_6933_WKT).to_wkt() != EPSG_6933_WKT # but -# pyproj.crs.CRS.from_wkt(pyproj.crs.CRS.from_wkt(epsg_6933_wkt).to_wkt()) -# == pyproj.crs.CRS.from_wkt(epsg_6933_wkt) +# pyproj.crs.CRS.from_wkt(pyproj.crs.CRS.from_wkt(EPSG_6933_WKT).to_wkt()) +# == pyproj.crs.CRS.from_wkt(EPSG_6933_WKT) # NSIDC EASE-Grid 2.0 Global CRS definition # from: https://epsg.org/crs/wkt/id/6933 -epsg_6933_wkt = ( +EPSG_6933_WKT = ( 'PROJCRS["WGS 84 / NSIDC EASE-Grid 2.0 Global",' 'BASEGEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble", ' 'MEMBER["World Geodetic System 1984 (Transit)", ID["EPSG",1166]], ' @@ -82,7 +82,7 @@ def col_row_to_xy(self, col: int, row: int) -> tuple[np.float64, np.float64]: # NSIDC EASE-Grid 2.0 North CRS definition # from: https://epsg.org/crs/wkt/id/6931 -epsg_6931_wkt = ( +EPSG_6931_WKT = ( 'PROJCRS["WGS 84 / NSIDC EASE-Grid 2.0 North",' 'BASEGEOGCRS["WGS 84",ENSEMBLE["World Geodetic System 1984 ensemble", ' 'MEMBER["World Geodetic System 1984 (Transit)", ID["EPSG",1166]], ' @@ -113,8 +113,8 @@ def col_row_to_xy(self, col: int, row: int) -> tuple[np.float64, np.float64]: ) GPD_TO_WKT = { - "EASE2_N09km.gpd": epsg_6931_wkt, - "EASE2_M09km.gpd": epsg_6933_wkt, + "EASE2_N09km.gpd": EPSG_6931_WKT, + "EASE2_M09km.gpd": EPSG_6933_WKT, } @@ -156,8 +156,7 @@ def convert_value(value: str) -> str | np.float64 | np.long: try: if '.' in value: return np.float64(value) - else: - return np.long(value) + return np.long(value) except ValueError: return value diff --git a/smap_l2_gridder/grid.py b/smap_l2_gridder/grid.py index f4832da..100e630 100644 --- a/smap_l2_gridder/grid.py +++ b/smap_l2_gridder/grid.py @@ -11,7 +11,7 @@ import numpy as np from xarray import DataArray, DataTree, open_datatree -from .crs import compute_dims, create_crs, epsg_6931_wkt, epsg_6933_wkt, parse_gpd_file +from .crs import EPSG_6931_WKT, EPSG_6933_WKT, compute_dims, create_crs, parse_gpd_file def transform_l2g_input( @@ -19,8 +19,9 @@ def transform_l2g_input( ) -> None: """Entrypoint for L2G-Gridding-Service. - Opens input and processes the data to a new output_file. + Opens input and processes the data to a new output file. """ + logger.debug(f'Is this right {output_filename}?') with open_datatree(input_filename, decode_times=False) as in_data: process_input(in_data, output_filename, logger=logger) @@ -123,11 +124,12 @@ def default_fill_value(data_type: np.dtype | None) -> np.integer | np.floating | """ if not np.issubdtype(data_type, np.number): return None - elif np.issubdtype(data_type, np.floating): + + if np.issubdtype(data_type, np.floating): return np.dtype(data_type).type(-9999.0) - else: - # np.issubdtype(data_type, np.integer): - return np.dtype(data_type).type(np.iinfo(data_type).max) + + # np.issubdtype(data_type, np.integer): + return np.dtype(data_type).type(np.iinfo(data_type).max) def get_target_variables(in_data: DataTree, node: str) -> Iterable[str]: @@ -166,10 +168,10 @@ def get_target_grid_information(node: str) -> dict: """Return the target grid informaton.""" if is_polar_node(node): gpd_name = 'EASE2_N09km.gpd' - wkt = epsg_6931_wkt + wkt = EPSG_6931_WKT else: gpd_name = 'EASE2_M09km.gpd' - wkt = epsg_6933_wkt + wkt = EPSG_6933_WKT target_grid_info = parse_gpd_file(gpd_name) target_grid_info['wkt'] = wkt diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 748f7f0..cedfb35 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -14,15 +14,9 @@ pytest --cov=smap_l2_gridder --cov=harmony_service \ --cov-report=html:coverage/reports \ --junitxml=tests/reports/test-results-"$(date +'%Y%m%d%H%M%S')".xml || STATUS=1 - # Run pylint -# Ignored errors/warnings: -# W1203 - use of f-strings in log statements. This warning is leftover from -# using ''.format() vs % notation. For more information, see: -# https://github.com/PyCQA/pylint/issues/2354#issuecomment-414526879 pylint smap_l2_gridder harmony_service --disable=W1203 -RESULT=$? -RESULT=$((3 & $RESULT)) +RESULT=$((3 & $?)) if [ "$RESULT" -ne "0" ]; then STATUS=1 diff --git a/tests/unit/test_crs.py b/tests/unit/test_crs.py index f3b1a73..27190ff 100644 --- a/tests/unit/test_crs.py +++ b/tests/unit/test_crs.py @@ -8,11 +8,11 @@ from xarray import DataArray from smap_l2_gridder.crs import ( + EPSG_6933_WKT, Geotransform, compute_dims, convert_value, create_crs, - epsg_6933_wkt, geotransform_from_target_info, parse_gpd_file, validate_gpd_style, @@ -32,7 +32,7 @@ 'Grid Height': 10, 'Grid Map Origin Column': -0.5, 'Grid Map Origin Row': -0.5, - 'wkt': epsg_6933_wkt, + 'wkt': EPSG_6933_WKT, } From 2c465cafb76112eb65dd54797d186d57099dffd1 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 25 Nov 2024 11:28:20 -0700 Subject: [PATCH 05/26] DAS-2254: Add pytest-cov to test requirements --- tests/pip_test_requirements.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/pip_test_requirements.txt b/tests/pip_test_requirements.txt index 51e4a0d..40971db 100644 --- a/tests/pip_test_requirements.txt +++ b/tests/pip_test_requirements.txt @@ -1,4 +1,5 @@ -pytest==8.3.3 -pytest-mock==3.14.0 mypy==1.13.0 pylint==3.3.1 +pytest-cov==6.0.0 +pytest-mock==3.14.0 +pytest==8.3.3 From 0e5aa8afa00f9651d395ebe5a2fc166209219544 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 25 Nov 2024 12:45:22 -0700 Subject: [PATCH 06/26] DAS-2254: Output the coverage and test-report into reports This also lets a user run this from the commandline with or without docker and the results go to the same place. --- bin/run-test | 8 ++++---- tests/run_tests.sh | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bin/run-test b/bin/run-test index 3ff4759..aaf067d 100755 --- a/bin/run-test +++ b/bin/run-test @@ -11,14 +11,14 @@ set -ex find . | grep -E "(__pycache__|\.pyc|\.pyo$)" | xargs rm -rf # Make the directory into which XML format test reports will be saved -mkdir -p test-reports +mkdir -p reports/test-reports # Make the directory into which coverage reports will be saved -mkdir -p coverage +mkdir -p reports/coverage # Run the tests in a Docker container with mounted volumes for XML report # output and test coverage reporting docker run --rm \ - -v $(pwd)/test-reports:/home/tests/reports \ - -v $(pwd)/coverage:/home/tests/coverage \ + -v $(pwd)/reports/test-reports:/home/reports/test-reports \ + -v $(pwd)/reports/coverage:/home/reports/coverage \ ghcr.io/nasa/smap-l2-gridder-test "$@" diff --git a/tests/run_tests.sh b/tests/run_tests.sh index cedfb35..6510934 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -11,8 +11,8 @@ STATUS=0 # Run the standard set of unit tests, producing JUnit compatible output pytest --cov=smap_l2_gridder --cov=harmony_service \ - --cov-report=html:coverage/reports \ - --junitxml=tests/reports/test-results-"$(date +'%Y%m%d%H%M%S')".xml || STATUS=1 + --cov-report=html:reports/coverage \ + --junitxml=reports/test-reports/test-results-"$(date +'%Y%m%d%H%M%S')".xml || STATUS=1 # Run pylint pylint smap_l2_gridder harmony_service --disable=W1203 From fd48c6fc4fe9550a7cc60c80048f940ce68c7904 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Tue, 26 Nov 2024 11:55:32 -0700 Subject: [PATCH 07/26] DAS-2254: Add adapter tests. --- harmony_service/adapter.py | 7 +- smap_l2_gridder/grid.py | 1 - tests/conftest.py | 136 +++++++++++++++++++++++++++++ tests/test_service/__init__.py | 1 + tests/test_service/test_adapter.py | 126 ++++++++++++++++++++++++++ tests/unit/test_grid.py | 86 ------------------ 6 files changed, 267 insertions(+), 90 deletions(-) create mode 100644 tests/conftest.py create mode 100644 tests/test_service/__init__.py create mode 100644 tests/test_service/test_adapter.py diff --git a/harmony_service/adapter.py b/harmony_service/adapter.py index 35754c4..f8870c3 100644 --- a/harmony_service/adapter.py +++ b/harmony_service/adapter.py @@ -43,12 +43,13 @@ def process_item(self, item: Item, source: HarmonySource) -> Item: cfg=self.config, access_token=self.message.accessToken, ) - + print(f'input_filepath: {input_filepath}') working_filename = Path(input_filepath).parent / 'working_gridded.nc' - final_target_gridfile = generate_output_filename( + final_target_filename = generate_output_filename( asset.href, is_regridded=True ) + print(f'final_target_filename:{final_target_filename}') transform_l2g_input( input_filepath, working_filename, logger=self.logger @@ -57,7 +58,7 @@ def process_item(self, item: Item, source: HarmonySource) -> Item: # Stage the transformed output: staged_url = stage( working_filename, - final_target_gridfile, + final_target_filename, 'application/x-netcdf4', location=self.message.stagingLocation, logger=self.logger, diff --git a/smap_l2_gridder/grid.py b/smap_l2_gridder/grid.py index 100e630..da6bff6 100644 --- a/smap_l2_gridder/grid.py +++ b/smap_l2_gridder/grid.py @@ -21,7 +21,6 @@ def transform_l2g_input( Opens input and processes the data to a new output file. """ - logger.debug(f'Is this right {output_filename}?') with open_datatree(input_filename, decode_times=False) as in_data: process_input(in_data, output_filename, logger=logger) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..31b793d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,136 @@ +"""Set up common pytest fixtures.""" + +from datetime import datetime + +import numpy as np +import pytest +from harmony_service_lib.util import bbox_to_geometry +from pystac import Asset, Catalog, Item +from xarray import DataArray, DataTree, open_datatree + + +# Fixtures +@pytest.fixture +def sample_datatree_file(tmp_path) -> str: + """Create a sample DataTree file for testing. + + The test data is repeated for both global and polar nodes. + + A sample DataTree is created and written to disk. + + The filename is returned. + """ + dt = DataTree() + dt["Metadata/Lineage/DEMSLP"] = DataTree() + dt["Metadata/Lineage/DEMSLP"].attrs[ + "Description" + ] = "Representative surface slope data for each of the 9 km cells" + + nodes = ["Soil_Moisture_Retrieval_Data", "Soil_Moisture_Retrieval_Data_Polar"] + for node in nodes: + dt[f"{node}"] = DataTree() + dt[f"{node}/EASE_column_index"] = DataArray( + data=np.array([1175, 1175, 1175, 1175, 1175], dtype=np.uint16), + dims=["phony_dim_0"], + name="EASE_column_index", + attrs={ + "long_name": "The column index of the 9 km EASE grid cell...", + "valid_min": 0, + "valid_max": 3855, + "_FillValue": np.uint16(65534), + }, + ) + + dt[f"{node}/EASE_row_index"] = DataArray( + data=np.array([1603, 1604, 1605, 1606, 1607], dtype=np.uint16), + dims=["phony_dim_0"], + attrs={ + "long_name": "The row index of the 9 km EASE grid cell...", + "valid_min": 0, + "valid_max": 1623, + "_FillValue": np.uint16(65534), + }, + ) + + dt[f'{node}/albedo'] = DataArray( + data=np.array( + [0.0009434, 0.00136986, 0.0025, 0.0, -9999.0], dtype=np.float32 + ), + dims=['phony_dim_0'], + attrs={ + 'long_name': 'Diffuse reflecting power of the Earth's...', + 'valid_min': 0.0, + 'valid_max': 1.0, + '_FillValue': np.float32(-9999.0), + }, + ) + + # This part of the fixture REALLY slow. Including it adds 7 seconds(!!) + # to the run time of the pytests and there is only a single test that + # needs this. I tested this function outside of pytest and it's not + # acting like a bottle neck so I'm not sure what to do. But for now + # I'll remove this and skip the test. + + # dt[f'{node}/tb_time_utc'] = DataArray( + # data=np.array( + # [ + # '2024-11-06T03:59:27.313Z', + # '2024-11-06T03:59:25.754Z', + # '2024-11-06T03:59:24.374Z', + # '2024-11-06T03:59:22.735Z', + # '2024-11-06T03:59:21.191Z', + # ], + # dtype=' DataTree: + """A sample datatree is gennerated and returned after being read from disk. + + This represents the expected shape of an SPL2SMP_E granule. + """ + dt2 = open_datatree(sample_datatree_file, decode_times=False) + return dt2 + + +@pytest.fixture +def stack_catalog() -> Catalog: + """Creates a generic SpatioTemporal Asset Catalog (STAC). + + Used as a valid input for SMAPL2GridderAdapter initialization + + For simplicity the geometric and temporal properties of each item are + set to default values. + + """ + catalog = Catalog(id='input catalog', description='test input') + + item = Item( + id='input granule', + bbox=[-180, -90, 180, 90], + geometry=bbox_to_geometry([-180, -90, 180, 90]), + datetime=datetime(2020, 1, 1), + properties=None, + ) + + item.add_asset( + 'input data', + Asset( + 'https://www.example.com/input.h5', + media_type='application/x-hdf', + roles=['data'], + ), + ) + catalog.add_item(item) + + return catalog diff --git a/tests/test_service/__init__.py b/tests/test_service/__init__.py new file mode 100644 index 0000000..0508cf4 --- /dev/null +++ b/tests/test_service/__init__.py @@ -0,0 +1 @@ +"""Initialize service tests module.""" diff --git a/tests/test_service/test_adapter.py b/tests/test_service/test_adapter.py new file mode 100644 index 0000000..096c09a --- /dev/null +++ b/tests/test_service/test_adapter.py @@ -0,0 +1,126 @@ +"""End-to-end tests of the SMAP-L2-Gridding-Service.""" + +import pytest +from harmony_service_lib.message import Message as HarmonyMessage +from harmony_service_lib.util import config +from xarray import open_datatree + +from harmony_service.adapter import SMAPL2GridderAdapter +from smap_l2_gridder.exceptions import InvalidGPDError + + +def test_process_sample_file(tmp_path, sample_datatree_file, stack_catalog, mocker): + """Run a sample file through the adapter.""" + # override the adapter's working dir + temp_dir_mock = mocker.patch('harmony_service.adapter.TemporaryDirectory') + temp_dir_mock.return_value.__enter__.return_value = tmp_path + + # use a datatree fixture as the downloaded file + download_mock = mocker.patch('harmony_service.adapter.download') + download_mock.return_value = sample_datatree_file + + # set the output filename + filename_mock = mocker.patch('harmony_service.adapter.generate_output_filename') + output_filename = tmp_path / 'test-gridded.nc' + filename_mock.return_value = output_filename + + stage_mock = mocker.patch('harmony_service.adapter.stage') + staging_dir = tmp_path / 'staging' + + message = HarmonyMessage( + { + 'accessToken': 'fake_token', + 'callback': 'https://example.com/', + 'sources': [{'collection': 'C1234-EEDTEST', 'shortName': 'smap_test'}], + 'stagingLocation': staging_dir, + 'user': 'fakeUsername', + 'format': {'mime': 'application/x-netcdf4'}, + } + ) + + # Set up Adapter class + smap_l2_gridding_service = SMAPL2GridderAdapter( + message, config=config(validate=False), catalog=stack_catalog + ) + + # Invoke the adapter. + _, _ = smap_l2_gridding_service.invoke() + + asset_href = stack_catalog.get_item('input granule').assets['input data'].href + + download_mock.assert_called_once_with( + asset_href, + tmp_path, + logger=mocker.ANY, + cfg=mocker.ANY, + access_token=message.accessToken, + ) + + stage_mock.assert_called_once_with( + tmp_path / 'working_gridded.nc', + output_filename, + 'application/x-netcdf4', + location=message.stagingLocation, + logger=mocker.ANY, + cfg=mocker.ANY, + ) + + # Validate the gridded output data + results = open_datatree(tmp_path / 'working_gridded.nc') + assert set(results.groups) == set( + ( + '/', + '/Metadata', + '/Soil_Moisture_Retrieval_Data', + '/Soil_Moisture_Retrieval_Data_Polar', + '/Metadata/Lineage', + '/Metadata/Lineage/DEMSLP', + ) + ) + assert results['Soil_Moisture_Retrieval_Data'].coords['x-dim'].name == 'x-dim' + assert len(results['Soil_Moisture_Retrieval_Data'].coords['x-dim']) == 3856 + assert ( + results['Soil_Moisture_Retrieval_Data'].crs.attrs['projected_crs_name'] + == 'WGS 84 / NSIDC EASE-Grid 2.0 Global' + ) + assert ( + results['Soil_Moisture_Retrieval_Data_Polar'].crs.attrs['projected_crs_name'] + == 'WGS 84 / NSIDC EASE-Grid 2.0 North' + ) + assert set(results['Soil_Moisture_Retrieval_Data_Polar'].variables) == set( + ['EASE_column_index', 'EASE_row_index', 'albedo', 'crs', 'x-dim', 'y-dim'] + ) + assert set(results['Soil_Moisture_Retrieval_Data'].variables) == set( + ['EASE_column_index', 'EASE_row_index', 'albedo', 'crs', 'x-dim', 'y-dim'] + ) + + +def test_process_sample_file_failure( + tmp_path, stack_catalog, sample_datatree_file, mocker +): + """Test failure.""" + # use a datatree fixture as the downloaded file + download_mock = mocker.patch('harmony_service.adapter.download') + download_mock.return_value = sample_datatree_file + mocker.patch('harmony_service.adapter.generate_output_filename') + mocker.patch('harmony_service.adapter.stage') + + get_grid_info_mock = mocker.patch('smap_l2_gridder.grid.get_grid_information') + get_grid_info_mock.side_effect = InvalidGPDError('invalid gpd') + + message = HarmonyMessage( + { + 'sources': [{'collection': 'C1234-EEDTEST'}], + } + ) + + # Set up Adapter class + smap_l2_gridding_service = SMAPL2GridderAdapter( + message, config=config(validate=False), catalog=stack_catalog + ) + + # Invoke the adapter. + with pytest.raises(InvalidGPDError) as error_info: + smap_l2_gridding_service.invoke() + + assert 'invalid gpd' in str(error_info.value) diff --git a/tests/unit/test_grid.py b/tests/unit/test_grid.py index 1ef3542..215931c 100644 --- a/tests/unit/test_grid.py +++ b/tests/unit/test_grid.py @@ -19,92 +19,6 @@ ) -# Fixtures -@pytest.fixture -def sample_datatree(tmp_path): - """Create a sample DataTree for testing. - - It is round tripped to a temporary disk location for easy of setting the - correct NetCDF attributes. - - This represents the expected shape of an SPL2SMP_E granule. - The data is repeated for both global and polar nodes. - - """ - dt = DataTree() - dt["Metadata/Lineage/DEMSLP"] = DataTree() - dt["Metadata/Lineage/DEMSLP"].attrs[ - "Description" - ] = "Representative surface slope data for each of the 9 km cells" - - nodes = ["Soil_Moisture_Retrieval_Data", "Soil_Moisture_Retrieval_Data_Polar"] - for node in nodes: - dt[f"{node}"] = DataTree() - dt[f"{node}/EASE_column_index"] = DataArray( - data=np.array([1175, 1175, 1175, 1175, 1175], dtype=np.uint16), - dims=["phony_dim_0"], - name="EASE_column_index", - attrs={ - "long_name": "The column index of the 9 km EASE grid cell...", - "valid_min": 0, - "valid_max": 3855, - "_FillValue": np.uint16(65534), - }, - ) - - dt[f"{node}/EASE_row_index"] = DataArray( - data=np.array([1603, 1604, 1605, 1606, 1607], dtype=np.uint16), - dims=["phony_dim_0"], - attrs={ - "long_name": "The row index of the 9 km EASE grid cell...", - "valid_min": 0, - "valid_max": 1623, - "_FillValue": np.uint16(65534), - }, - ) - - dt[f'{node}/albedo'] = DataArray( - data=np.array( - [0.0009434, 0.00136986, 0.0025, 0.0, -9999.0], dtype=np.float32 - ), - dims=['phony_dim_0'], - attrs={ - 'long_name': 'Diffuse reflecting power of the Earth's...', - 'valid_min': 0.0, - 'valid_max': 1.0, - '_FillValue': np.float32(-9999.0), - }, - ) - - # This part of the fixture REALLY slow. Including it adds 7 seconds!! - # to the run time of the pytests and there is only a single test that - # needs this. I tested this function outside of pytest and it's not - # acting like a bottle neck so I'm not sure what to do. But for now - # I'll remove this and skip the test. - - # dt[f'{node}/tb_time_utc'] = DataArray( - # data=np.array( - # [ - # '2024-11-06T03:59:27.313Z', - # '2024-11-06T03:59:25.754Z', - # '2024-11-06T03:59:24.374Z', - # '2024-11-06T03:59:22.735Z', - # '2024-11-06T03:59:21.191Z', - # ], - # dtype=' Date: Tue, 26 Nov 2024 13:01:11 -0700 Subject: [PATCH 08/26] DAS-2254: change ' to " and remove print statements. --- .gitignore | 2 +- harmony_service/adapter.py | 3 +-- pip_requirements.txt | 2 +- smap_l2_gridder/crs.py | 4 +-- smap_l2_gridder/grid.py | 4 +-- tests/__init__.py | 1 + tests/conftest.py | 48 +++++++++++++++++----------------- tests/test_service/__init__.py | 2 +- tests/unit/__init__.py | 1 + 9 files changed, 34 insertions(+), 33 deletions(-) diff --git a/.gitignore b/.gitignore index 46398e3..3909a9e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ **/__pycache__ -/tests/reports/ /.coverage +/reports/ diff --git a/harmony_service/adapter.py b/harmony_service/adapter.py index f8870c3..3f3d688 100644 --- a/harmony_service/adapter.py +++ b/harmony_service/adapter.py @@ -43,13 +43,12 @@ def process_item(self, item: Item, source: HarmonySource) -> Item: cfg=self.config, access_token=self.message.accessToken, ) - print(f'input_filepath: {input_filepath}') + working_filename = Path(input_filepath).parent / 'working_gridded.nc' final_target_filename = generate_output_filename( asset.href, is_regridded=True ) - print(f'final_target_filename:{final_target_filename}') transform_l2g_input( input_filepath, working_filename, logger=self.logger diff --git a/pip_requirements.txt b/pip_requirements.txt index 4b2c51a..44a4028 100644 --- a/pip_requirements.txt +++ b/pip_requirements.txt @@ -1,5 +1,5 @@ harmony-service-lib==2.3.0 netcdf4==1.7.2 pyproj==3.7.0 -pystac~=1.11.0 +pystac==1.11.0 xarray==2024.10.0 diff --git a/smap_l2_gridder/crs.py b/smap_l2_gridder/crs.py index 5d9a498..bd9bd83 100644 --- a/smap_l2_gridder/crs.py +++ b/smap_l2_gridder/crs.py @@ -222,8 +222,8 @@ def compute_dims(target_info: dict) -> tuple[DataArray, DataArray]: target grid information dictionary. """ - n_cols = target_info["Grid Width"] - n_rows = target_info["Grid Height"] + n_cols = target_info['Grid Width'] + n_rows = target_info['Grid Height'] geotransform = geotransform_from_target_info(target_info) # compute the x,y locations along a column and row diff --git a/smap_l2_gridder/grid.py b/smap_l2_gridder/grid.py index da6bff6..8db6e31 100644 --- a/smap_l2_gridder/grid.py +++ b/smap_l2_gridder/grid.py @@ -58,7 +58,7 @@ def process_input(in_data: DataTree, output_file: Path, logger: None | Logger = def prepare_variable(var: DataTree | DataArray, grid_info: dict) -> DataArray: """Grid and annotate intput variable.""" grid_data = grid_variable(var, grid_info) - grid_data.attrs = {**var.attrs, 'grid_mapping': "crs"} + grid_data.attrs = {**var.attrs, 'grid_mapping': 'crs'} encoding = { '_FillValue': variable_fill_value(var), 'coordinates': var.encoding.get('coordinates', None), @@ -81,7 +81,7 @@ def grid_variable(var: DataTree | DataArray, grid_info: dict) -> DataArray: valid_mask = ~np.isnan(var.data) except TypeError: # tb_time_utc is type string - valid_mask = var.data != "" + valid_mask = var.data != '' valid_rows = grid_info['src']['rows'].data[valid_mask] valid_cols = grid_info['src']['cols'].data[valid_mask] valid_values = var.data[valid_mask] diff --git a/tests/__init__.py b/tests/__init__.py index e69de29..fc4c97e 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Initialize tests package.""" diff --git a/tests/conftest.py b/tests/conftest.py index 31b793d..4d1e16b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,41 +14,41 @@ def sample_datatree_file(tmp_path) -> str: """Create a sample DataTree file for testing. - The test data is repeated for both global and polar nodes. + A sample DataTree is created - A sample DataTree is created and written to disk. + The test data is repeated for both global and polar nodes. - The filename is returned. + The tree is written to disk and the filename is returned. """ dt = DataTree() - dt["Metadata/Lineage/DEMSLP"] = DataTree() - dt["Metadata/Lineage/DEMSLP"].attrs[ - "Description" - ] = "Representative surface slope data for each of the 9 km cells" + dt['Metadata/Lineage/DEMSLP'] = DataTree() + dt['Metadata/Lineage/DEMSLP'].attrs[ + 'Description' + ] = 'Representative surface slope data for each of the 9 km cells' - nodes = ["Soil_Moisture_Retrieval_Data", "Soil_Moisture_Retrieval_Data_Polar"] + nodes = ['Soil_Moisture_Retrieval_Data', 'Soil_Moisture_Retrieval_Data_Polar'] for node in nodes: - dt[f"{node}"] = DataTree() - dt[f"{node}/EASE_column_index"] = DataArray( + dt[f'{node}'] = DataTree() + dt[f'{node}/EASE_column_index'] = DataArray( data=np.array([1175, 1175, 1175, 1175, 1175], dtype=np.uint16), - dims=["phony_dim_0"], - name="EASE_column_index", + dims=['phony_dim_0'], + name='EASE_column_index', attrs={ - "long_name": "The column index of the 9 km EASE grid cell...", - "valid_min": 0, - "valid_max": 3855, - "_FillValue": np.uint16(65534), + 'long_name': 'The column index of the 9 km EASE grid cell...', + 'valid_min': 0, + 'valid_max': 3855, + '_FillValue': np.uint16(65534), }, ) - dt[f"{node}/EASE_row_index"] = DataArray( + dt[f'{node}/EASE_row_index'] = DataArray( data=np.array([1603, 1604, 1605, 1606, 1607], dtype=np.uint16), - dims=["phony_dim_0"], + dims=['phony_dim_0'], attrs={ - "long_name": "The row index of the 9 km EASE grid cell...", - "valid_min": 0, - "valid_max": 1623, - "_FillValue": np.uint16(65534), + 'long_name': 'The row index of the 9 km EASE grid cell...', + 'valid_min': 0, + 'valid_max': 1623, + '_FillValue': np.uint16(65534), }, ) @@ -95,9 +95,9 @@ def sample_datatree_file(tmp_path) -> str: @pytest.fixture def sample_datatree(sample_datatree_file) -> DataTree: - """A sample datatree is gennerated and returned after being read from disk. + """A sample datatree fixture is generated, opened and returned. - This represents the expected shape of an SPL2SMP_E granule. + This approximates the expected shape of an SPL2SMP_E granule. """ dt2 = open_datatree(sample_datatree_file, decode_times=False) return dt2 diff --git a/tests/test_service/__init__.py b/tests/test_service/__init__.py index 0508cf4..02ebdf8 100644 --- a/tests/test_service/__init__.py +++ b/tests/test_service/__init__.py @@ -1 +1 @@ -"""Initialize service tests module.""" +"""Initialize service tests package.""" diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index e69de29..8624935 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -0,0 +1 @@ +"""Initialize unit tests package.""" From e0672ca7fef3b81d15b50a45de2284bdc626697f Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Tue, 26 Nov 2024 13:48:47 -0700 Subject: [PATCH 09/26] DAS-2254: Resolve slow fixture. That really didn't make sense, but the test time went from 7+ to 1.1 sec with this change. --- tests/conftest.py | 34 ++++++++++++------------------ tests/test_service/test_adapter.py | 20 ++++++++++++++++-- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 4d1e16b..6b0fbf4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,6 +3,7 @@ from datetime import datetime import numpy as np +import pandas as pd import pytest from harmony_service_lib.util import bbox_to_geometry from pystac import Asset, Catalog, Item @@ -65,26 +66,19 @@ def sample_datatree_file(tmp_path) -> str: }, ) - # This part of the fixture REALLY slow. Including it adds 7 seconds(!!) - # to the run time of the pytests and there is only a single test that - # needs this. I tested this function outside of pytest and it's not - # acting like a bottle neck so I'm not sure what to do. But for now - # I'll remove this and skip the test. - - # dt[f'{node}/tb_time_utc'] = DataArray( - # data=np.array( - # [ - # '2024-11-06T03:59:27.313Z', - # '2024-11-06T03:59:25.754Z', - # '2024-11-06T03:59:24.374Z', - # '2024-11-06T03:59:22.735Z', - # '2024-11-06T03:59:21.191Z', - # ], - # dtype=' Date: Tue, 26 Nov 2024 14:01:35 -0700 Subject: [PATCH 10/26] DAS-2254: That didn't actually fix the problem of slow fixture. --- tests/conftest.py | 34 ++++++++++++++++++------------ tests/test_service/test_adapter.py | 4 ++-- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6b0fbf4..92be48a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,6 @@ from datetime import datetime import numpy as np -import pandas as pd import pytest from harmony_service_lib.util import bbox_to_geometry from pystac import Asset, Catalog, Item @@ -66,19 +65,26 @@ def sample_datatree_file(tmp_path) -> str: }, ) - dt[f'{node}/tb_time_utc'] = DataArray( - data=pd.to_datetime( - [ - '2024-11-06T03:59:27.313Z', - '2024-11-06T03:59:25.754Z', - '2024-11-06T03:59:24.374Z', - '2024-11-06T03:59:22.735Z', - '2024-11-06T03:59:21.191Z', - ] - ).values, - dims=['phony_dim_0'], - attrs={'long_name': 'Arithmetic average of the acquisition time...'}, - ) + # This part of the fixture REALLY slow when running tests. Using it + # adds 7 seconds(!!) to the run time. Really there is only one useful + # test that needs this. I tested this function outside of pytest and + # it's not acting like a bottle neck so I'm not sure what to do. But + # for now I'll remove this and use mark to skip the test. + + # dt[f'{node}/tb_time_utc'] = DataArray( + # data=np.array( + # [ + # '2024-11-06T03:59:27.313Z', + # '2024-11-06T03:59:25.754Z', + # '2024-11-06T03:59:24.374Z', + # '2024-11-06T03:59:22.735Z', + # '2024-11-06T03:59:21.191Z', + # ], + # dtype=' Date: Tue, 26 Nov 2024 14:22:00 -0700 Subject: [PATCH 11/26] DAS-2254: Fix output filename. --- harmony_service/adapter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/harmony_service/adapter.py b/harmony_service/adapter.py index 3f3d688..04d40b3 100644 --- a/harmony_service/adapter.py +++ b/harmony_service/adapter.py @@ -47,7 +47,7 @@ def process_item(self, item: Item, source: HarmonySource) -> Item: working_filename = Path(input_filepath).parent / 'working_gridded.nc' final_target_filename = generate_output_filename( - asset.href, is_regridded=True + asset.href, is_regridded=True, ext='.nc' ) transform_l2g_input( From dc8e66f82116d74409ab338a0795c4bb4272690c Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Tue, 26 Nov 2024 15:31:13 -0700 Subject: [PATCH 12/26] DAS-2254: Fix mypy tests. --- .github/workflows/mypy.yml | 2 +- CHANGELOG.md | 1 + README.md | 13 +++++++++---- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 9e973f7..31195b2 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -25,7 +25,7 @@ jobs: run: | python -m pip install --upgrade pip pip install mypy - pip install -r pip_requirements.txt -r pip_dev_requirements.txt + pip install -r pip_requirements.txt -r tests/pip_test_requirements.txt - name: Run mypy run: mypy . diff --git a/CHANGELOG.md b/CHANGELOG.md index 472976c..097b6b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,3 +10,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Initial codebase that transforms SPL2SMP_E granules into NetCDF4-CF grids. [#1](https://github.com/nasa/harmony-SMAP-L2-gridding-service/pull/1) +- Code and configuration to wrap gridding code into a Harmony Service [#3](https://github.com/nasa/harmony-SMAP-L2-gridding-service/pull/3 ) diff --git a/README.md b/README.md index 4fb3d73..3c9d3f6 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,16 @@ -# smap-l2-gridder +# SMAP L2 Gridding Service + +This repository contains the code for the SMAP-L2-Gridding-Service, which is a python service that transforms NASA level 2 gridded trajectory data into gridded NetCDF4-CF output files. + +This code currently works on `SPL2SMP_E` data and will be adapted for other gridded trajectory data. -This is a python service to transform NASA level 2 Grid trajectory data into gridded NetCDF4-CF output files. -## Transform Data -To run the regridder you can create an isolated python 3.12 environment installing packages from the `pip_requirements.txt` file. + + +## Transform Data +To run the regridder, first create an isolated python 3.12 environment using packages from the `pip_requirements.txt` file. From the commandline run: From defb7504286b2abf3caaba4e4dc4641f1f058e4c Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Tue, 26 Nov 2024 16:23:15 -0700 Subject: [PATCH 13/26] DAS-2254: Fix mypy errors. --- pyproject.toml | 4 ++++ tests/conftest.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..197e9af --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,4 @@ + +[[tool.mypy.overrides]] +module = "harmony_service_lib.*" +ignore_missing_imports = true diff --git a/tests/conftest.py b/tests/conftest.py index 92be48a..cb04f16 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -120,7 +120,7 @@ def stack_catalog() -> Catalog: bbox=[-180, -90, 180, 90], geometry=bbox_to_geometry([-180, -90, 180, 90]), datetime=datetime(2020, 1, 1), - properties=None, + properties={'props': 'None'}, ) item.add_asset( From a3dbd8a0f8e400ab9a293c27aa9deebe595de259 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 08:25:51 -0700 Subject: [PATCH 14/26] DAS-2254: Updates README. Adds CONTRIBUTING. - Adds ruff config to pyproject.toml --- CONTRIBUTING.md | 47 +++++++++++++++++++++++++++++++++++++++++++++++ README.md | 43 ++++++++++++++++++++++++++++++++++++------- pyproject.toml | 3 +++ 3 files changed, 86 insertions(+), 7 deletions(-) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..a49d08d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,47 @@ +# Contributing to SMAP-L2-Gridding-Service + +Thanks for contributing! + +## Making Changes + +To allow us to incorporate your changes, please use the +[Fork-and-Pull](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/getting-started/about-collaborative-development-models#fork-and-pull-model) +development model: + +1. Fork this repository to your personal account. +2. Create a branch and make your changes. +3. Test the changes locally/in your personal fork. +4. Submit a pull request to open a discussion about your proposed changes. +5. The maintainers will talk with you about it and decide to merge or request + additional changes. + +For larger items, consider contacting the maintainers first to coordinate +development efforts. + +## Commits + +Our ticketing and CI/CD tools are configured to sync statuses amongst each +other. Commits play an important role in this process. Please start all commits +with the Jira ticket number associated with your feature, task, or bug. All +commit messages should follow the format +"[Jira Project]-XXXX - [Your commit message here]" + +## General coding practices: + +This repository adheres to Python coding style recommendations from +[PEP8](https://peps.python.org/pep-0008/). Additionally, type hints are +required in all function signatures. + +When adding or updating functionality, please ensure unit tests are added to +an appropriate module in the `tests` sub directories, which cover each branch +of the code. + +## Disclaimer + +SMAP-L2-Gridding-Service maintainers will review all pull requests submitted. Only requests that +meet the standard of quality set forth by existing code, following the patterns +set forth by existing code, and adhering to existing design patterns will be +considered and/or accepted. + +For general tips on open source contributions, see +[Contributing to Open Source on GitHub](https://guides.github.com/activities/contributing-to-open-source/). diff --git a/README.md b/README.md index 3c9d3f6..9d55216 100644 --- a/README.md +++ b/README.md @@ -2,24 +2,53 @@ This repository contains the code for the SMAP-L2-Gridding-Service, which is a python service that transforms NASA level 2 gridded trajectory data into gridded NetCDF4-CF output files. -This code currently works on `SPL2SMP_E` data and will be adapted for other gridded trajectory data. +This code currently works on `SPL2SMP_E` data and will be adapted for other SMAP collections of gridded trajectory data. +## Transforming Data +The logic of transforming data is contained in the `smap_l2_gridder` directory. It reads NASA L2 Gridded trajectory data and writes output NetCDF-CF files with the trajecotry style data correctly populated into EASE2 grids. +### Commandline invocation +To run the regridder on an input file. Create an isolated python 3.12 environment using packages from the `pip_requirements.txt` file and then from the commandline run: +```python +python -m smap_l2_gridder --input path/to/granule.h5 --output path/to/output_granule.nc +``` -## Transform Data -To run the regridder, first create an isolated python 3.12 environment using packages from the `pip_requirements.txt` file. +`smap_l2_gridder/__main__.py` is the entrypoint to the science logic module and can be used for testing and development. -From the commandline run: +## Directory structure -```python -python -m smap_l2_gridder --input path/to/granule.h5 --output path/to/output_granule.nc ``` +📁 +├── CHANGELOG.md +├── CONTRIBUTING.md +├── LICENSE +├── README.md +├── 📁 bin +├── 📁 docker +├── 📁 harmony_service +├── pip_requirements.txt +├── pyproject.toml +├── 📁 smap_l2_gridder +└── 📁 tests +``` + +* `CHANGELOG.md` - Contains a record of changes applied to each new release of the SMAP-L2-Gridding-Service. +* `CONTRIBUTING.md` - Instructions on how to contribute to the repository. +* `LICENSE` - Required for distribution under NASA open-source approval. Details conditions for use, reproduction and distribution. +* `README.md` - This file, containing guidance on developing the library and service. +* `bin` - A directory containing utility scripts to build the service and test images. A script to extract the release notes for the most recent version, as contained in `CHANGELOG.md` is also in this directory. +* `docker` - A directory containing the Dockerfiles for the service and test images. It also contains `service_version.txt`, which contains the semantic version number of the library and service image. Update this file with a new version to trigger a release. +* `harmony_service` - A directory containing the Harmony Service specific python code. `adapter.py` contains the `SMAPL2GridderAdapter` class that is invoked by calls to the Harmony service. +* `pip_requirements.txt` - Contains a list of python packages needed to run the service. +* `pyproject.toml` - Configuration file used by packaging tools, and other tools such as linters, type checkers, etc. +* `smap_l2_gridder` - Python package containing the logic for reformatting L2G data. +* `tests` - Contains the pytest test suite. -## pre-commit hooks: +## `pre-commit` hooks This repository uses [pre-commit](https://pre-commit.com/) to enable pre-commit checks that enforce coding standard best practices. These include: diff --git a/pyproject.toml b/pyproject.toml index 197e9af..f94992a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,6 @@ +[tool.ruff.format] +quote-style = "single" + [[tool.mypy.overrides]] module = "harmony_service_lib.*" From 9f700beb85c744c3554ed6318498fdfd31c594f8 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 09:02:15 -0700 Subject: [PATCH 15/26] DAS-2254: Update README. Just adding instructions. --- README.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/README.md b/README.md index 9d55216..faa89c6 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,40 @@ python -m smap_l2_gridder --input path/to/granule.h5 --output path/to/output_gra * `tests` - Contains the pytest test suite. +## Local development + +Local testing of service functionality can be achieved via a local instance of +[Harmony](https://github.com/nasa/harmony) aka Harmony-In-A-Box. Please see instructions there +regarding creation of a local Harmony instance. + +For local development and testing of library modifications or small functions independent of the main Harmony application: + +1. Create a Python virtual environment +1. Install the dependencies in `pip_requirements.txt`, and `tests/pip_test_requirements.txt` +1. Install the pre-commit hooks ([described below](#pre-commit-hooks)). + + +## Tests + +This service utilises the Python `pytest` package to perform unit tests on +classes and functions in the service. After local development is complete, and +test have been updated, they can be run in Docker via: + +```bash +$ ./bin/build-image +$ ./bin/build-test +$ ./bin/run-test +``` + +It is also possible to run the test scripts directly (without docker) by just running the `run_tests.sh` script with a proper python environment. Do note that the `reports` directory will appear in the directory you call the script from. + +The `tests/run_tests.sh` script will also generate a coverage report, rendered +in HTML, and scan the code with `pylint`. + +Currently, the `pytest` suite is run automatically within a GitHub workflow +as part of a CI/CD pipeline. These tests are run for all changes made in a PR +against the `main` branch. The tests must pass in order to merge the PR. + ## `pre-commit` hooks This repository uses [pre-commit](https://pre-commit.com/) to enable pre-commit @@ -69,3 +103,23 @@ pip install pre-commit # Install the git hook scripts: pre-commit install ``` + +## Versioning: + +Docker service images for the `smap_l2_gridder` adhere to [semantic +version](https://semver.org/) numbers: major.minor.patch. + +* Major increments: These are non-backwards compatible API changes. +* Minor increments: These are backwards compatible API changes. +* Patch increments: These updates do not affect the API to the service. + +## CI/CD: + +The CI/CD for SMAP-L2-Gridding-Service is run on github actions with the workflows in the +`.github/workflows` directory: + +* [TODO: complete this section when the above statement is true] + +## Releasing + +* [TODO: complete when implemented] From 3fb011223d2b32ba07c16aeae38a06e27bec6454 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 09:03:39 -0700 Subject: [PATCH 16/26] DAS-2254: Rewording on last reading --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 097b6b2..b9ba90f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,4 +10,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Initial codebase that transforms SPL2SMP_E granules into NetCDF4-CF grids. [#1](https://github.com/nasa/harmony-SMAP-L2-gridding-service/pull/1) -- Code and configuration to wrap gridding code into a Harmony Service [#3](https://github.com/nasa/harmony-SMAP-L2-gridding-service/pull/3 ) +- Code and configuration to wrap gridding logic into a Harmony Service [#3](https://github.com/nasa/harmony-SMAP-L2-gridding-service/pull/3 ) From e2fa5518252e76e77e060598968f1346a1cca467 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 09:15:29 -0700 Subject: [PATCH 17/26] DAS-2254: Ruff fix double quotes. Just ran `ruff format .` with the single-quote config on pyproject.toml. --- .pre-commit-config.yaml | 2 +- smap_l2_gridder/__main__.py | 2 +- smap_l2_gridder/crs.py | 5 ++--- smap_l2_gridder/grid.py | 1 - tests/unit/test_crs.py | 2 +- tests/unit/test_grid.py | 12 ++++++------ 6 files changed, 11 insertions(+), 13 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0d5379f..86fa3a7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,7 @@ repos: - id: check-yaml - id: check-added-large-files - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.7.1 + rev: v0.8.0 hooks: - id: ruff args: ["--fix", "--show-fixes", "--extend-select", "I"] diff --git a/smap_l2_gridder/__main__.py b/smap_l2_gridder/__main__.py index 737210a..ac172de 100644 --- a/smap_l2_gridder/__main__.py +++ b/smap_l2_gridder/__main__.py @@ -23,7 +23,7 @@ def main(): with open_datatree(args.input, decode_times=False) as in_data: process_input(in_data, Path(args.output)) except Exception as e: - print(f"Error occurred: {e}") + print(f'Error occurred: {e}') raise e print(f'successfully processed {args.input} into {args.output}') return 0 diff --git a/smap_l2_gridder/crs.py b/smap_l2_gridder/crs.py index bd9bd83..b9de5c4 100644 --- a/smap_l2_gridder/crs.py +++ b/smap_l2_gridder/crs.py @@ -113,8 +113,8 @@ def col_row_to_xy(self, col: int, row: int) -> tuple[np.float64, np.float64]: ) GPD_TO_WKT = { - "EASE2_N09km.gpd": EPSG_6931_WKT, - "EASE2_M09km.gpd": EPSG_6933_WKT, + 'EASE2_N09km.gpd': EPSG_6931_WKT, + 'EASE2_M09km.gpd': EPSG_6933_WKT, } @@ -189,7 +189,6 @@ def parse_gpd_file(gpd_name: str) -> dict: with open(filename, encoding='utf-8') as f: for line in f: - line = line.strip() if not line or line.startswith(';'): continue diff --git a/smap_l2_gridder/grid.py b/smap_l2_gridder/grid.py index 8db6e31..56746b6 100644 --- a/smap_l2_gridder/grid.py +++ b/smap_l2_gridder/grid.py @@ -35,7 +35,6 @@ def process_input(in_data: DataTree, output_file: Path, logger: None | Logger = data_node_names = set(in_data['/'].children) - set(get_metadata_children(in_data)) for node_name in data_node_names: - grid_info = get_grid_information(in_data, node_name) vars_to_grid = get_target_variables(in_data, node_name) diff --git a/tests/unit/test_crs.py b/tests/unit/test_crs.py index 27190ff..0b42cce 100644 --- a/tests/unit/test_crs.py +++ b/tests/unit/test_crs.py @@ -93,7 +93,7 @@ def test_parse_gpd_file(tmp_path): """ ).strip() - gpd_file = tmp_path / "test.gpd" + gpd_file = tmp_path / 'test.gpd' gpd_file.write_text(gpd_content) result = parse_gpd_file(str(gpd_file)) diff --git a/tests/unit/test_grid.py b/tests/unit/test_grid.py index 215931c..2392ec0 100644 --- a/tests/unit/test_grid.py +++ b/tests/unit/test_grid.py @@ -40,7 +40,7 @@ def test_process_input(sample_datatree, tmp_path): as well as some of the metadata was generated properly. """ - out_file = tmp_path / "output.nc" + out_file = tmp_path / 'output.nc' process_input(sample_datatree, out_file) assert Path(out_file).exists() out_dt = xr.open_datatree(out_file) @@ -65,14 +65,14 @@ def test_transfer_metadata(sample_datatree): test_metadata = {'size': 3.5, 'age': 23, 'processing': 'complete'} additional_metadata = DataTree() additional_metadata.attrs = test_metadata - sample_datatree["Metadata/testing"] = additional_metadata + sample_datatree['Metadata/testing'] = additional_metadata out_dt = transfer_metadata(sample_datatree, out_dt) assert ( out_dt['Metadata/Lineage/DEMSLP'].attrs['Description'] == 'Representative surface slope data for each of the 9 km cells' ) - assert out_dt["Metadata/testing"].attrs == test_metadata + assert out_dt['Metadata/testing'].attrs == test_metadata def test_prepare_variable_albedo(sample_datatree, sample_grid_info): @@ -115,7 +115,7 @@ def test_prepare_variable_encoding_of_utc_time(sample_datatree, sample_grid_info def test_get_grid_information(sample_datatree, mocker): """Verify correct information is returned as grid_info.""" target_grid_info = mocker.patch('smap_l2_gridder.grid.get_target_grid_information') - node = "Soil_Moisture_Retrieval_Data_Polar" + node = 'Soil_Moisture_Retrieval_Data_Polar' actual_grid_info = get_grid_information(sample_datatree, node) np.testing.assert_array_almost_equal( @@ -131,10 +131,10 @@ def test_get_target_grid_information(mocker): """Test that the node name correctly identifies the gpd file to parse.""" parse_gpd_file_mock = mocker.patch('smap_l2_gridder.grid.parse_gpd_file') - get_target_grid_information("any-node-name") + get_target_grid_information('any-node-name') parse_gpd_file_mock.assert_called_with('EASE2_M09km.gpd') - get_target_grid_information("any-node-name_Polar") + get_target_grid_information('any-node-name_Polar') parse_gpd_file_mock.assert_called_with('EASE2_N09km.gpd') From dbce4bd6ae7744ad97bf499602b384dc32353df9 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 10:02:00 -0700 Subject: [PATCH 18/26] DAS-2254: Add in ruff preferences and pydocstyle --- pyproject.toml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index f94992a..5ef65be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,17 @@ +[tool.ruff] +lint.select = [ + "E", # pycodestyle + "F", # pyflakes + "UP", # pyupgrade + "I", # organize imports + "D", # docstyle +] + [tool.ruff.format] quote-style = "single" +[tool.ruff.lint.pydocstyle] +convention = "google" [[tool.mypy.overrides]] module = "harmony_service_lib.*" From 1ffafb1e5ed70bc4f6bbff43a909f1177348f780 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 10:06:57 -0700 Subject: [PATCH 19/26] DAS-2254: lint fix. --- smap_l2_gridder/grid.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smap_l2_gridder/grid.py b/smap_l2_gridder/grid.py index 56746b6..2c57013 100644 --- a/smap_l2_gridder/grid.py +++ b/smap_l2_gridder/grid.py @@ -4,9 +4,9 @@ routines to translate the 1D intput arrays into the EASE grid output format """ +from collections.abc import Iterable from logging import Logger from pathlib import Path -from typing import Iterable import numpy as np from xarray import DataArray, DataTree, open_datatree From b43e21b909f1937597b08740b554cba64318261a Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 10:26:02 -0700 Subject: [PATCH 20/26] DAS-2254: Remove pre-commit dependency on jupyter-black and use only ruff. --- .pre-commit-config.yaml | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 86fa3a7..4b694fa 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,10 +13,7 @@ repos: rev: v0.8.0 hooks: - id: ruff - args: ["--fix", "--show-fixes", "--extend-select", "I"] - - repo: https://github.com/psf/black-pre-commit-mirror - rev: 24.10.0 - hooks: - - id: black-jupyter - args: ["--skip-string-normalization"] - language_version: python3.12 + args: ["--fix", "--show-fixes"] + types_or: [python, jupyter] + - id: ruff-format + types_or: [python, jupyter] From d3d0877db0d0cedf755ca98d495912c316007603 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 11:09:08 -0700 Subject: [PATCH 21/26] DAS-2254: ruff pre-commit.ci fix --- tests/conftest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index cb04f16..bd70a52 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,9 +22,9 @@ def sample_datatree_file(tmp_path) -> str: """ dt = DataTree() dt['Metadata/Lineage/DEMSLP'] = DataTree() - dt['Metadata/Lineage/DEMSLP'].attrs[ - 'Description' - ] = 'Representative surface slope data for each of the 9 km cells' + dt['Metadata/Lineage/DEMSLP'].attrs['Description'] = ( + 'Representative surface slope data for each of the 9 km cells' + ) nodes = ['Soil_Moisture_Retrieval_Data', 'Soil_Moisture_Retrieval_Data_Polar'] for node in nodes: From f20de37ce7c33fa3d6ac523fb00c2365d9cbcb38 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 14:35:10 -0700 Subject: [PATCH 22/26] DAS-2254: smap-l2-gridder -> harmony-SMAP-L2-gridder prepend `harmony-` to the service name to be consistent with other services. --- .github/workflows/mypy.yml | 2 +- bin/build-image | 8 ++++---- bin/build-test | 6 +++--- bin/clean-images | 4 ++-- bin/run-test | 4 ++-- docker/service.Dockerfile | 4 ++-- docker/tests.Dockerfile | 2 +- harmony_service/__main__.py | 2 +- harmony_service/adapter.py | 2 +- harmony_service/exceptions.py | 2 +- smap_l2_gridder/__init__.py | 2 +- smap_l2_gridder/exceptions.py | 2 +- 12 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 31195b2..387fed2 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -13,7 +13,7 @@ jobs: python-version: ['3.12'] steps: - - name: Check out smap-l2-gridder code + - name: Check out harmony-SMAP-L2-gridding-service code uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/bin/build-image b/bin/build-image index d0998b8..bd54891 100755 --- a/bin/build-image +++ b/bin/build-image @@ -1,11 +1,11 @@ #!/bin/bash ############################################################################### # -# Build a Docker image of the SMAP-L2-Gridder service. +# Build a Docker image of the Harmony-SMAP-L2-Gridder service. # ############################################################################### -image="ghcr.io/nasa/smap-l2-gridder" +image="ghcr.io/nasa/harmony-smap-l2-gridder" # Retrieve the tag from the script arguments, or default to "latest". Bamboo # will pass the contents of `docker/service_version.txt` as this argument, @@ -14,8 +14,8 @@ image="ghcr.io/nasa/smap-l2-gridder" tag=${1:-latest} -# Remove old versions of: ghcr.io/nasa/smap-l2-gridder and -# ghcr.io/nasa/smap-l2-gridder-test images +# Remove old versions of: ghcr.io/nasa/harmony-smap-l2-gridder and +# ghcr.io/nasa/harmony-smap-l2-gridder-test images ./bin/clean-images # Build the image diff --git a/bin/build-test b/bin/build-test index f8539da..dc063b2 100755 --- a/bin/build-test +++ b/bin/build-test @@ -3,13 +3,13 @@ # # Build a Docker container to run the test suite for the SMAP-L2-Gridding-Service # -# To build the ghcr.io/nasa/smap-l2-gridder-test image, the -# ghcr.io/nasa/smap-l2-gridder image must also be present +# To build the ghcr.io/nasa/harmony-smap-l2-gridder-test image, the +# ghcr.io/nasa/harmony-smap-l2-gridder image must also be present # locally, as it is used as the base image in `docker/tests.Dockerfile`. # ############################################################################### -image="ghcr.io/nasa/smap-l2-gridder-test" +image="ghcr.io/nasa/harmony-smap-l2-gridder-test" tag=${1:-latest} diff --git a/bin/clean-images b/bin/clean-images index 2b2a121..c30f649 100755 --- a/bin/clean-images +++ b/bin/clean-images @@ -2,7 +2,7 @@ ############################################################################### # # remove all images containing the string -# "ghcr.io/nasa/smap-l2-gridder" and remove them. +# "ghcr.io/nasa/harmony-smap-l2-gridder" and remove them. # This is used for clean-up after development. # ############################################################################### @@ -15,7 +15,7 @@ remove_image_by_name() { fi } -image_base_name="ghcr.io/nasa/smap-l2-gridder" +image_base_name="ghcr.io/nasa/harmony-smap-l2-gridder" # First remove test images: remove_image_by_name "${image_base_name}-test" diff --git a/bin/run-test b/bin/run-test index aaf067d..3036d30 100755 --- a/bin/run-test +++ b/bin/run-test @@ -1,7 +1,7 @@ #!/bin/bash ############################################################################### # -# Execute the ghcr.io/nasa/smap-l2-gridder-test Docker image +# Execute the ghcr.io/nasa/harmony-smap-l2-gridder-test Docker image # ############################################################################### @@ -21,4 +21,4 @@ mkdir -p reports/coverage docker run --rm \ -v $(pwd)/reports/test-reports:/home/reports/test-reports \ -v $(pwd)/reports/coverage:/home/reports/coverage \ - ghcr.io/nasa/smap-l2-gridder-test "$@" + ghcr.io/nasa/harmony-smap-l2-gridder-test "$@" diff --git a/docker/service.Dockerfile b/docker/service.Dockerfile index 4210364..30a520a 100644 --- a/docker/service.Dockerfile +++ b/docker/service.Dockerfile @@ -2,8 +2,8 @@ # # Service image for ghcr.io/nasa/harmony-smap-l2-gridder -# SMAP-L2-Gridder backend service that transforms L2G (gridded trajectory) data -# into actual gridded data. +# Harmony-SMAP-L2-Gridder backend service that transforms L2G (gridded +# trajectory) data into actual gridded data. # # This image installs dependencies via Pip. The service code is then copied # into the Docker image. diff --git a/docker/tests.Dockerfile b/docker/tests.Dockerfile index b3025ee..d5b572c 100644 --- a/docker/tests.Dockerfile +++ b/docker/tests.Dockerfile @@ -5,7 +5,7 @@ # of the service image are tested, preventing discrepancies between the service # and test environments. ############################################################################### -FROM ghcr.io/nasa/smap-l2-gridder +FROM ghcr.io/nasa/harmony-smap-l2-gridder # Install additional Pip requirements (for testing) COPY tests/pip_test_requirements.txt . diff --git a/harmony_service/__main__.py b/harmony_service/__main__.py index 0fdc1fa..d81129d 100644 --- a/harmony_service/__main__.py +++ b/harmony_service/__main__.py @@ -1,4 +1,4 @@ -"""Run the SMAP-L2-Gridder via the Harmony CLI.""" +"""Run the Harmony-SMAP-L2-Gridder via the Harmony CLI.""" from argparse import ArgumentParser from sys import argv diff --git a/harmony_service/adapter.py b/harmony_service/adapter.py index 04d40b3..36383d2 100644 --- a/harmony_service/adapter.py +++ b/harmony_service/adapter.py @@ -20,7 +20,7 @@ class SMAPL2GridderAdapter(BaseHarmonyAdapter): - """Custom adapter for SMAP-L2-Gridder Service.""" + """Custom adapter for Harmony-SMAP-L2-Gridder Service.""" def process_item(self, item: Item, source: HarmonySource) -> Item: """Process single input STAC item.""" diff --git a/harmony_service/exceptions.py b/harmony_service/exceptions.py index 3bba894..9270688 100644 --- a/harmony_service/exceptions.py +++ b/harmony_service/exceptions.py @@ -2,7 +2,7 @@ from harmony_service_lib.util import HarmonyException -SERVICE_NAME = 'SMAP-L2-Gridder' +SERVICE_NAME = 'Harmony-SMAP-L2-Gridder' class SMAPL2GridderServiceError(HarmonyException): diff --git a/smap_l2_gridder/__init__.py b/smap_l2_gridder/__init__.py index 8cacfd4..4b98aa0 100644 --- a/smap_l2_gridder/__init__.py +++ b/smap_l2_gridder/__init__.py @@ -1 +1 @@ -"""Initialize the SMAP-L2-Gridder package.""" +"""Initialize the Harmony-SMAP-L2-Gridder package.""" diff --git a/smap_l2_gridder/exceptions.py b/smap_l2_gridder/exceptions.py index dd5b7db..539e78c 100644 --- a/smap_l2_gridder/exceptions.py +++ b/smap_l2_gridder/exceptions.py @@ -5,7 +5,7 @@ class SMAPL2GridderError(Exception): """Base error class for exceptions raised by smap_l2_gridder library.""" def __init__(self, message=None): - """All smap-l2-gridder errors have a message field.""" + """All Harmony-SMAP-L2-Gridder errors have a message field.""" self.message = message From 34b0bd30c3743cb8cd096006679d27848e32f838 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 14:48:18 -0700 Subject: [PATCH 23/26] DAS-2254: change stack to stac --- tests/conftest.py | 2 +- tests/test_service/test_adapter.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index bd70a52..524cfb9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -104,7 +104,7 @@ def sample_datatree(sample_datatree_file) -> DataTree: @pytest.fixture -def stack_catalog() -> Catalog: +def sample_stac() -> Catalog: """Creates a generic SpatioTemporal Asset Catalog (STAC). Used as a valid input for SMAPL2GridderAdapter initialization diff --git a/tests/test_service/test_adapter.py b/tests/test_service/test_adapter.py index d5922f0..3c15c6b 100644 --- a/tests/test_service/test_adapter.py +++ b/tests/test_service/test_adapter.py @@ -9,7 +9,7 @@ from smap_l2_gridder.exceptions import InvalidGPDError -def test_process_sample_file(tmp_path, sample_datatree_file, stack_catalog, mocker): +def test_process_sample_file(tmp_path, sample_datatree_file, sample_stac, mocker): """Run a sample file through the adapter.""" # override the adapter's working dir temp_dir_mock = mocker.patch('harmony_service.adapter.TemporaryDirectory') @@ -40,13 +40,13 @@ def test_process_sample_file(tmp_path, sample_datatree_file, stack_catalog, mock # Set up Adapter class smap_l2_gridding_service = SMAPL2GridderAdapter( - message, config=config(validate=False), catalog=stack_catalog + message, config=config(validate=False), catalog=sample_stac ) # Invoke the adapter. _, _ = smap_l2_gridding_service.invoke() - asset_href = stack_catalog.get_item('input granule').assets['input data'].href + asset_href = sample_stac.get_item('input granule').assets['input data'].href download_mock.assert_called_once_with( asset_href, @@ -112,7 +112,7 @@ def test_process_sample_file(tmp_path, sample_datatree_file, stack_catalog, mock def test_process_sample_file_failure( - tmp_path, stack_catalog, sample_datatree_file, mocker + tmp_path, sample_stac, sample_datatree_file, mocker ): """Test failure.""" # use a datatree fixture as the downloaded file @@ -132,7 +132,7 @@ def test_process_sample_file_failure( # Set up Adapter class smap_l2_gridding_service = SMAPL2GridderAdapter( - message, config=config(validate=False), catalog=stack_catalog + message, config=config(validate=False), catalog=sample_stac ) # Invoke the adapter. From 532d447c98ebba3bd024e742dcc6abc8e9f0bf53 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 15:13:49 -0700 Subject: [PATCH 24/26] DAS-2254: Rename Item -> Catalog Because that's what it is apparently. --- harmony_service/adapter.py | 8 ++++---- tests/run_tests.sh | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/harmony_service/adapter.py b/harmony_service/adapter.py index 36383d2..c93203f 100644 --- a/harmony_service/adapter.py +++ b/harmony_service/adapter.py @@ -14,7 +14,7 @@ from harmony_service_lib import BaseHarmonyAdapter from harmony_service_lib.message import Source as HarmonySource from harmony_service_lib.util import download, generate_output_filename, stage -from pystac import Asset, Item +from pystac import Asset, Catalog from smap_l2_gridder.grid import transform_l2g_input @@ -22,16 +22,16 @@ class SMAPL2GridderAdapter(BaseHarmonyAdapter): """Custom adapter for Harmony-SMAP-L2-Gridder Service.""" - def process_item(self, item: Item, source: HarmonySource) -> Item: + def process_item(self, catalog: Catalog, source: HarmonySource) -> Catalog: """Process single input STAC item.""" with TemporaryDirectory() as working_directory: try: - results = item.clone() + results = catalog.clone() results.assets = {} asset = next( item_asset - for item_asset in item.assets.values() + for item_asset in catalog.assets.values() if 'data' in (item_asset.roles or []) ) diff --git a/tests/run_tests.sh b/tests/run_tests.sh index 6510934..0c29666 100755 --- a/tests/run_tests.sh +++ b/tests/run_tests.sh @@ -12,6 +12,7 @@ STATUS=0 # Run the standard set of unit tests, producing JUnit compatible output pytest --cov=smap_l2_gridder --cov=harmony_service \ --cov-report=html:reports/coverage \ + --cov-report term \ --junitxml=reports/test-reports/test-results-"$(date +'%Y%m%d%H%M%S')".xml || STATUS=1 # Run pylint From 66e5620b2e4268ee845d2a2d544a6c66c0adcdf0 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 15:41:01 -0700 Subject: [PATCH 25/26] DAS-2254: PR review updates use a config and verify it was used in a mock. Mock Asset() call --- tests/test_service/test_adapter.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/test_service/test_adapter.py b/tests/test_service/test_adapter.py index 3c15c6b..8a3dc81 100644 --- a/tests/test_service/test_adapter.py +++ b/tests/test_service/test_adapter.py @@ -25,8 +25,11 @@ def test_process_sample_file(tmp_path, sample_datatree_file, sample_stac, mocker filename_mock.return_value = output_filename stage_mock = mocker.patch('harmony_service.adapter.stage') + stage_mock.return_value = 's3://bucketname/staged-location' staging_dir = tmp_path / 'staging' + asset_mock = mocker.patch('harmony_service.adapter.Asset') + message = HarmonyMessage( { 'accessToken': 'fake_token', @@ -39,12 +42,13 @@ def test_process_sample_file(tmp_path, sample_datatree_file, sample_stac, mocker ) # Set up Adapter class + the_config = config(validate=False) smap_l2_gridding_service = SMAPL2GridderAdapter( - message, config=config(validate=False), catalog=sample_stac + message, config=the_config, catalog=sample_stac ) # Invoke the adapter. - _, _ = smap_l2_gridding_service.invoke() + smap_l2_gridding_service.invoke() asset_href = sample_stac.get_item('input granule').assets['input data'].href @@ -52,7 +56,7 @@ def test_process_sample_file(tmp_path, sample_datatree_file, sample_stac, mocker asset_href, tmp_path, logger=mocker.ANY, - cfg=mocker.ANY, + cfg=the_config, access_token=message.accessToken, ) @@ -65,6 +69,13 @@ def test_process_sample_file(tmp_path, sample_datatree_file, sample_stac, mocker cfg=mocker.ANY, ) + asset_mock.assert_called_once_with( + 's3://bucketname/staged-location', + title=mocker.ANY, + media_type='application/x-netcdf4', + roles=['data'], + ) + # Validate the gridded output data results = open_datatree(tmp_path / 'working_gridded.nc') assert set(results.groups) == set( From 54cf40a880816ec8bf8989022232251f17858c75 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 27 Nov 2024 15:50:04 -0700 Subject: [PATCH 26/26] DAS-2254: Nope those are Items. --- harmony_service/adapter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/harmony_service/adapter.py b/harmony_service/adapter.py index c93203f..36383d2 100644 --- a/harmony_service/adapter.py +++ b/harmony_service/adapter.py @@ -14,7 +14,7 @@ from harmony_service_lib import BaseHarmonyAdapter from harmony_service_lib.message import Source as HarmonySource from harmony_service_lib.util import download, generate_output_filename, stage -from pystac import Asset, Catalog +from pystac import Asset, Item from smap_l2_gridder.grid import transform_l2g_input @@ -22,16 +22,16 @@ class SMAPL2GridderAdapter(BaseHarmonyAdapter): """Custom adapter for Harmony-SMAP-L2-Gridder Service.""" - def process_item(self, catalog: Catalog, source: HarmonySource) -> Catalog: + def process_item(self, item: Item, source: HarmonySource) -> Item: """Process single input STAC item.""" with TemporaryDirectory() as working_directory: try: - results = catalog.clone() + results = item.clone() results.assets = {} asset = next( item_asset - for item_asset in catalog.assets.values() + for item_asset in item.assets.values() if 'data' in (item_asset.roles or []) )