From 3db09ba8836322b7752963fd1efe0665861c4ba3 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Thu, 31 Oct 2024 15:32:50 +0000 Subject: [PATCH] Feature/pdct 1514 replace passed/approved from backend (#71) * Add FIXMEs * Update logic for family published date * Validate the taxonomy * Bump to 3.8.24 * Try adding expression for published_date * Try adding expression for published_date * Fix metadata check * Fix logic for checking datetime_event)name * Return None if can't find datetime_event_name in event meta * Remove type ignore * Error handling --- db_client/functions/metadata.py | 40 ++++++++++++++++++++++++++------- db_client/models/dfce/family.py | 18 ++++++++++++++- pyproject.toml | 2 +- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/db_client/functions/metadata.py b/db_client/functions/metadata.py index 5a953a2..348f79a 100644 --- a/db_client/functions/metadata.py +++ b/db_client/functions/metadata.py @@ -1,5 +1,5 @@ import logging -from typing import Mapping, Optional, Sequence, Union +from typing import Any, Mapping, Optional, Sequence, Union from sqlalchemy.orm import Session @@ -134,7 +134,7 @@ def build_valid_taxonomy( Takes the taxonomy from the database and builds a dictionary of TaxonomyEntry objects, used for validation. - :param Sequence taxonomy: From the database model + :param Mapping taxonomy: From the database model CorpusType.valid_metadata and potentially filtered by entity key :param Optional[TaxonomyDataEntry] metadata: The metadata to validate. @@ -152,14 +152,38 @@ def build_valid_taxonomy( taxonomy_entries: Mapping[str, TaxonomyEntry] = {} for key, values in taxonomy.items(): - # TODO: Can we do extra validation of the taxonomy here - e.g., compare the - # metadata datetime_event_name value against the list of allowed event types in - # the taxonomy. In fact - split any conditionals under this for loop into a - # separate validate_taxonomy function that we can isolate to test against. - if not isinstance(values, dict): - raise TypeError(f"Taxonomy entry for '{key}' is not a dictionary") + _validate_taxonomy(taxonomy, key, values) # We rely on pydantic to validate the values here taxonomy_entries[key] = TaxonomyEntry(**values) return taxonomy_entries + + +def _validate_taxonomy(taxonomy: Mapping, key: str, values: Any) -> None: + """Extra validation of the taxonomy. + + :param Mapping taxonomy: From the database model + CorpusType.valid_metadata and potentially filtered by entity key + :param str key: A taxonomy key. + :param Any values: Values for a taxonomy key. + :raises TypeError: If values is not a dictionary. + :raises ValueError: If too many datetime_event_name values. + :raises ValueError: If datetime_event_name value is not in list of + allowed event_type values. + """ + if not isinstance(values, dict): + raise TypeError(f"Taxonomy entry for '{key}' is not a dictionary") + + if key == "datetime_event_name": + # Compare the metadata datetime_event_name value against the list of allowed + # event_types under _event in the taxonomy. + datetime_event_name_values = values["allowed_values"] + if len(datetime_event_name_values) > 1: + raise ValueError(f"Too many values for taxonomy '{key}'") + + datetime_event_name_value = datetime_event_name_values[0] + if datetime_event_name_value not in taxonomy["event_type"]["allowed_values"]: + raise ValueError( + f"Invalid value '{datetime_event_name_value}' for taxonomy '{key}'" + ) diff --git a/db_client/models/dfce/family.py b/db_client/models/dfce/family.py index d282775..c48666f 100644 --- a/db_client/models/dfce/family.py +++ b/db_client/models/dfce/family.py @@ -1,3 +1,4 @@ +import logging from datetime import datetime from typing import Literal, Optional, cast @@ -12,6 +13,8 @@ from .geography import Geography +_LOGGER = logging.getLogger(__name__) + class FamilyCategory(BaseModelEnum): """Family categories as understood in the context of law/policy.""" @@ -144,9 +147,22 @@ def published_date(self) -> Optional[datetime]: """A date to use for filtering by published date.""" if not self.events: return None + date = None for event in self.events: - if event.event_type_name == "Passed/Approved": + event_meta = cast(dict, event.valid_metadata) + if "datetime_event_name" not in event_meta: + _LOGGER.error(event_meta) + return None + + if not isinstance(event_meta["datetime_event_name"], list): + _LOGGER.error( + f"datetime_event_name is type {type(event_meta['datetime_event_name'])}" + ) + return None + + datetime_event_name = event_meta["datetime_event_name"][0] + if event.event_type_name == datetime_event_name: return cast(datetime, event.date) if date is None: date = cast(datetime, event.date) diff --git a/pyproject.toml b/pyproject.toml index c60c54f..8808c9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "db-client" -version = "3.8.23" +version = "3.8.24" description = "All things to do with the datamodel and its storage. Including alembic migrations and datamodel code." authors = ["CPR-dev-team "] license = "Apache-2.0"