From 28140924dcd11decfd437d31b9e0805634961e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=BDiga=20Luk=C5=A1i=C4=8D?= <31988337+zigaLuksic@users.noreply.github.com> Date: Mon, 29 Jan 2024 15:10:44 +0100 Subject: [PATCH] Remove automatic pipeline retries and add better timestamp parsing support (#327) * disable pipeline retries in pipeline chains * add better timestamp parsing support * update and rerun pre-commit * update init and changelog * add dateutil * add stubs * correct name --- .pre-commit-config.yaml | 4 ++-- CHANGELOG.md | 8 +++++++- eogrow/__init__.py | 2 +- eogrow/types.py | 3 +-- eogrow/utils/pipeline_chain.py | 2 +- eogrow/utils/validators.py | 8 ++++---- pyproject.toml | 3 +++ tests/core/area/test_batch.py | 1 + tests/utils/test_validators.py | 17 +++++++++-------- 9 files changed, 29 insertions(+), 19 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a708d55..a48bc256 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,13 +20,13 @@ repos: types_or: [json] - repo: https://github.com/psf/black - rev: 23.12.1 + rev: 24.1.1 hooks: - id: black language_version: python3 - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: "v0.1.11" + rev: "v0.1.14" hooks: - id: ruff diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ed7e750..481e032e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,10 @@ -## [Version 1.7.4] - 2024-01-10 +## [Version 1.7.6] - 2024-01-29 + +- Pipelines that are run as part of a pipeline-chain execution will now no longer be retried by ray in the case when an exception occurs. +- Parsing time ranges now has support for more formats. + + +## [Version 1.7.5] - 2024-01-10 - Parameter `raise_if_failed` renamed to `raise_on_failure` and is now enabled by default. - Numpy version restricted in anticipation of numpy 2.0 release. diff --git a/eogrow/__init__.py b/eogrow/__init__.py index 8030e953..e014549f 100644 --- a/eogrow/__init__.py +++ b/eogrow/__init__.py @@ -1,3 +1,3 @@ """The main module of the eo-grow package.""" -__version__ = "1.7.5" +__version__ = "1.7.6" diff --git a/eogrow/types.py b/eogrow/types.py index 7f543dc2..d66ce1f0 100644 --- a/eogrow/types.py +++ b/eogrow/types.py @@ -1,5 +1,4 @@ -""" Includes custom types used in schemas -""" +"""Includes custom types used in schemas""" import datetime import sys diff --git a/eogrow/utils/pipeline_chain.py b/eogrow/utils/pipeline_chain.py index ecde54de..f876c372 100644 --- a/eogrow/utils/pipeline_chain.py +++ b/eogrow/utils/pipeline_chain.py @@ -44,6 +44,6 @@ def run_pipeline_chain(pipeline_chain: list[RawConfig]) -> None: ray.get(runner.remote(run_schema.pipeline_config)) -@ray.remote +@ray.remote(max_retries=0) def _pipeline_runner(config: RawConfig) -> None: return load_pipeline_class(config).from_raw_config(config).run() diff --git a/eogrow/utils/validators.py b/eogrow/utils/validators.py index 4e050a27..5060fc9c 100644 --- a/eogrow/utils/validators.py +++ b/eogrow/utils/validators.py @@ -4,11 +4,11 @@ from __future__ import annotations -import datetime as dt import inspect from typing import TYPE_CHECKING, Any, Callable, Iterable, Tuple, Union import numpy as np +from dateutil.parser import isoparse from pydantic import BaseModel, Field, validator from eolearn.core import FeatureType @@ -144,8 +144,8 @@ def parse_time_period(value: tuple[str, str]) -> TimePeriod: } value = start_dates[kind], end_dates[kind] - start = dt.datetime.strptime(value[0], "%Y-%m-%d").date() - end = dt.datetime.strptime(value[1], "%Y-%m-%d").date() + start = isoparse(value[0]) + end = isoparse(value[1]) assert start <= end, "Invalid start and end dates provided. End date must follow the start date" return start, end @@ -241,7 +241,7 @@ def parse_data_collection(value: str | dict | DataCollection) -> DataCollection: def restrict_types( - allowed_feature_types: Iterable[FeatureType] | Callable[[FeatureType], bool] + allowed_feature_types: Iterable[FeatureType] | Callable[[FeatureType], bool], ) -> Callable[[Feature], Feature]: """Validates a field representing a feature, where it restricts the possible feature types.""" diff --git a/pyproject.toml b/pyproject.toml index a5f846ee..3254e96c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ dependencies = [ "opencv-python-headless", "pandas", "pydantic>=1.8.0, <2.0", + "python-dateutil", "python-rapidjson", "rasterio", "ray[default]", @@ -72,6 +73,7 @@ docs = [ dev = [ "eo-grow[ML]", "boto3", + "boto3-stubs", "build", "deepdiff", "fs_s3fs", @@ -85,6 +87,7 @@ dev = [ "requests-mock", "scipy", "twine", + "types-python-dateutil", "types-mock", "types-requests", "types-setuptools", diff --git a/tests/core/area/test_batch.py b/tests/core/area/test_batch.py index 3f8b7bca..2c49a073 100644 --- a/tests/core/area/test_batch.py +++ b/tests/core/area/test_batch.py @@ -7,6 +7,7 @@ - Batch request definition endpoint. - Tiling grid request endpoints. - Mocking requests of iter_tiles would be too much effort, so the `_make_new_split` of the splitter is mocked instead. + """ from unittest.mock import patch diff --git a/tests/utils/test_validators.py b/tests/utils/test_validators.py index 72d7dfeb..87afb2c2 100644 --- a/tests/utils/test_validators.py +++ b/tests/utils/test_validators.py @@ -170,18 +170,19 @@ class DummySchema(Pipeline.Schema): @pytest.mark.parametrize( - ("time_period", "year", "expected_start_date", "expected_end_date"), + ("first_param", "second_param", "expected_start_date", "expected_end_date"), [ - ("yearly", 2020, "2020-01-01", "2020-12-31"), - ("Q2", 2021, "2021-04-01", "2021-06-30"), - ("Q2-yearly", 2021, "2020-07-01", "2021-06-30"), + ("yearly", 2020, "2020-01-01T00:00:00", "2020-12-31T00:00:00"), + ("Q2", 2021, "2021-04-01T00:00:00", "2021-06-30T00:00:00"), + ("Q2-yearly", 2021, "2020-07-01T00:00:00", "2021-06-30T00:00:00"), + ("2022-02-02", "2022-02-22T22:22:02", "2022-02-02T00:00:00", "2022-02-22T22:22:02"), ], ) -def test_parse_time_period(time_period, year, expected_start_date, expected_end_date): - start_date, end_date = parse_time_period([time_period, year]) +def test_parse_time_period(first_param, second_param, expected_start_date, expected_end_date): + start_date, end_date = parse_time_period([first_param, second_param]) - assert isinstance(start_date, dt.date) - assert isinstance(end_date, dt.date) + assert isinstance(start_date, dt.datetime) + assert isinstance(end_date, dt.datetime) assert start_date.isoformat() == expected_start_date assert end_date.isoformat() == expected_end_date