From 89170a4c98af6d4786903cafd48199f3b43db9d7 Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Thu, 5 Dec 2024 09:48:53 +0000 Subject: [PATCH 1/4] Add metadata validation to validation service --- app/service/validation.py | 31 +++++++++++++++++ .../validation/test_metadata_validation.py | 34 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 tests/unit_tests/service/validation/test_metadata_validation.py diff --git a/app/service/validation.py b/app/service/validation.py index 88e67668..7de67898 100644 --- a/app/service/validation.py +++ b/app/service/validation.py @@ -240,3 +240,34 @@ def validate_ingest_data(data: dict[str, Any]) -> None: raise HTTPException(status_code=status.HTTP_204_NO_CONTENT) validate_entity_relationships(data) + + +def _validate_values_are_strings(value: Any) -> None: + """ + Recursively validates that all values are strings. + + :param Any value: The value to be validated. + :raises ValidationError: raised if any value is not a string. + """ + if isinstance(value, list): + for v in value: + _validate_values_are_strings(v) + elif not isinstance(value, str): + raise ValidationError("Metadata values should be strings.") + + +def validate_metadata_values_are_strings(data: dict[str, Any]) -> None: + """ + Validates family and document metadata in the data. + + :param dict[str, Any] data: The data object to be validated. + """ + metadata_values = [ + value + for entity in data.values() + if entity is not None + for e in entity + for value in e.get("metadata", {}).values() + ] + + _validate_values_are_strings(metadata_values) diff --git a/tests/unit_tests/service/validation/test_metadata_validation.py b/tests/unit_tests/service/validation/test_metadata_validation.py new file mode 100644 index 00000000..4e2194f3 --- /dev/null +++ b/tests/unit_tests/service/validation/test_metadata_validation.py @@ -0,0 +1,34 @@ +import pytest + +from app.errors import ValidationError +from app.service.validation import validate_metadata_values_are_strings + + +def test_validate_metadata_when_ok(): + test_data = { + "entity1": [{"metadata": {"key1": ["value"], "key2": [""]}}], + "entity2": [{"other_key": {"key1": ["value"]}}], + "entity3": [{}], + "entity4": [], + "entity5": "", + "entity6": None, + } + + validate_metadata_values_are_strings(test_data) + + +@pytest.mark.parametrize( + "test_data", + [ + {"entity": [{"metadata": {"key": [1]}}]}, + {"entity": [{"metadata": {"key": [1]}}]}, + {"entity": [{"metadata": {"key": 1}}]}, + {"entity": [{"metadata": {"key": [None]}}]}, + {"entity": [{"metadata": {"key": None}}]}, + ], +) +def test_validate_metadata_throws_exception_when_non_string_values_present(test_data): + + with pytest.raises(ValidationError) as e: + validate_metadata_values_are_strings(test_data) + assert "Metadata values should be strings." == e.value.message From dc9bea1a6f9f60984a7b30af54f3e6f89c09dd00 Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Thu, 5 Dec 2024 11:48:00 +0000 Subject: [PATCH 2/4] Add metadata value validation to bulk import endpoint --- app/service/validation.py | 3 ++- .../routers/bulk_import/test_bulk_import.py | 17 +++++++++++++++++ .../validation/test_metadata_validation.py | 2 +- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/app/service/validation.py b/app/service/validation.py index b01a581d..9297f9cb 100644 --- a/app/service/validation.py +++ b/app/service/validation.py @@ -239,6 +239,7 @@ def validate_bulk_import_data(data: dict[str, Any]) -> None: if not data: raise HTTPException(status_code=status.HTTP_204_NO_CONTENT) + validate_metadata_values_are_strings(data) validate_entity_relationships(data) @@ -253,7 +254,7 @@ def _validate_values_are_strings(value: Any) -> None: for v in value: _validate_values_are_strings(v) elif not isinstance(value, str): - raise ValidationError("Metadata values should be strings.") + raise ValidationError("Metadata values should be strings") def validate_metadata_values_are_strings(data: dict[str, Any]) -> None: diff --git a/tests/unit_tests/routers/bulk_import/test_bulk_import.py b/tests/unit_tests/routers/bulk_import/test_bulk_import.py index 13e7360f..91d560cf 100644 --- a/tests/unit_tests/routers/bulk_import/test_bulk_import.py +++ b/tests/unit_tests/routers/bulk_import/test_bulk_import.py @@ -118,3 +118,20 @@ def test_bulk_import_documents_when_no_family( assert response.status_code == status.HTTP_400_BAD_REQUEST assert response.json().get("detail") == "No entity with id test.new.family.0 found" + + +def test_bulk_import_when_metadata_contains_non_string_values( + client: TestClient, superuser_header_token +): + json_input = build_json_file( + {"families": [{**default_family, "metadata": {"key": [1]}}]} + ) + + response = client.post( + "/api/v1/bulk-import/test", + files={"data": json_input}, + headers=superuser_header_token, + ) + + assert response.status_code == status.HTTP_400_BAD_REQUEST + assert response.json().get("detail") == "Metadata values should be strings" diff --git a/tests/unit_tests/service/validation/test_metadata_validation.py b/tests/unit_tests/service/validation/test_metadata_validation.py index 4e2194f3..3dfbff89 100644 --- a/tests/unit_tests/service/validation/test_metadata_validation.py +++ b/tests/unit_tests/service/validation/test_metadata_validation.py @@ -31,4 +31,4 @@ def test_validate_metadata_throws_exception_when_non_string_values_present(test_ with pytest.raises(ValidationError) as e: validate_metadata_values_are_strings(test_data) - assert "Metadata values should be strings." == e.value.message + assert "Metadata values should be strings" == e.value.message From cbe7095bc45da90f7cdf06aba1f8e3e4a4952c39 Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Thu, 5 Dec 2024 11:52:25 +0000 Subject: [PATCH 3/4] Add extra test data variation and update docstrings --- app/service/validation.py | 6 ++--- .../validation/test_metadata_validation.py | 22 +++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/app/service/validation.py b/app/service/validation.py index 9297f9cb..85658029 100644 --- a/app/service/validation.py +++ b/app/service/validation.py @@ -245,10 +245,10 @@ def validate_bulk_import_data(data: dict[str, Any]) -> None: def _validate_values_are_strings(value: Any) -> None: """ - Recursively validates that all values are strings. + Recursively validates that all single values are strings. :param Any value: The value to be validated. - :raises ValidationError: raised if any value is not a string. + :raises ValidationError: raised if any single value is not a string. """ if isinstance(value, list): for v in value: @@ -259,7 +259,7 @@ def _validate_values_are_strings(value: Any) -> None: def validate_metadata_values_are_strings(data: dict[str, Any]) -> None: """ - Validates family and document metadata in the data. + Validates any metadata in the data. :param dict[str, Any] data: The data object to be validated. """ diff --git a/tests/unit_tests/service/validation/test_metadata_validation.py b/tests/unit_tests/service/validation/test_metadata_validation.py index 3dfbff89..0d3ea5e5 100644 --- a/tests/unit_tests/service/validation/test_metadata_validation.py +++ b/tests/unit_tests/service/validation/test_metadata_validation.py @@ -4,15 +4,19 @@ from app.service.validation import validate_metadata_values_are_strings -def test_validate_metadata_when_ok(): - test_data = { - "entity1": [{"metadata": {"key1": ["value"], "key2": [""]}}], - "entity2": [{"other_key": {"key1": ["value"]}}], - "entity3": [{}], - "entity4": [], - "entity5": "", - "entity6": None, - } +@pytest.mark.parametrize( + "test_data", + [ + {"entity1": [{"metadata": {"key1": ["value"], "key2": [""]}}]}, + {"entity2": [{"other_key": {"key1": ["value"]}}]}, + {"entity3": [{}]}, + {"entity4": []}, + {"entity5": ""}, + {"entity6": None}, + {}, + ], +) +def test_validate_metadata_when_ok(test_data): validate_metadata_values_are_strings(test_data) From b935987a2b6a9265b6def4e099fba77f619de4d3 Mon Sep 17 00:00:00 2001 From: Anna Pokorska Date: Thu, 5 Dec 2024 12:57:50 +0000 Subject: [PATCH 4/4] Update patch version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0f5ba2ff..cc17c644 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "admin_backend" -version = "2.17.19" +version = "2.17.20" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }]