Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/pdct 1542 change bulk import tool to not allow non strings as metadata #263

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions app/service/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,4 +239,36 @@ def validate_bulk_import_data(data: dict[str, Any]) -> None:
if not data:
raise HTTPException(status_code=status.HTTP_204_NO_CONTENT)

validate_metadata_values_are_strings(data)
validate_entity_relationships(data)


def _validate_values_are_strings(value: Any) -> None:
"""
Recursively validates that all single values are strings.

:param Any value: The value to be validated.
:raises ValidationError: raised if any single value is not a string.
"""
if isinstance(value, list):
for v in value:
_validate_values_are_strings(v)
elif not isinstance(value, str):
raise ValidationError("Metadata values should be strings")


def validate_metadata_values_are_strings(data: dict[str, Any]) -> None:
"""
Validates any metadata in the data.

:param dict[str, Any] data: The data object to be validated.
"""
metadata_values = [
value
for entity in data.values()
if entity is not None
for e in entity
for value in e.get("metadata", {}).values()
]

_validate_values_are_strings(metadata_values)
Copy link
Contributor

@jamesgorrie jamesgorrie Dec 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of writing our own parser, we could use Pydantic's parser? e.g.

from pydantic import BaseModel, RootModel, ValidationError
from typing import Dict, Union, List

Metadata = RootModel[Dict[str, Union[str, List[str]]]]

if __name__ == "__main__":
    good_json = '{"name": "John", "age": "30", "list": ["item1", "item2"]}'
    bad_json = '{"name": "John", "age": 30, "list": ["item1", "item2"]}'

    try: 
        print(Metadata.model_validate_json(good_json))
        print("Successful") # reaches here
    except ValidationError as e:
        print(e)
        print("Failed")

    try: 
        print(Metadata.model_validate_json(bad_json))
        print("Successful")
    except ValidationError as e:
        print(e)
        print("Failed") # reaches here with a nice descriptive error

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "admin_backend"
version = "2.17.19"
version = "2.17.20"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand Down
17 changes: 17 additions & 0 deletions tests/unit_tests/routers/bulk_import/test_bulk_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,20 @@ def test_bulk_import_documents_when_no_family(

assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.json().get("detail") == "No entity with id test.new.family.0 found"


def test_bulk_import_when_metadata_contains_non_string_values(
client: TestClient, superuser_header_token
):
json_input = build_json_file(
{"families": [{**default_family, "metadata": {"key": [1]}}]}
)

response = client.post(
"/api/v1/bulk-import/test",
files={"data": json_input},
headers=superuser_header_token,
)

assert response.status_code == status.HTTP_400_BAD_REQUEST
assert response.json().get("detail") == "Metadata values should be strings"
38 changes: 38 additions & 0 deletions tests/unit_tests/service/validation/test_metadata_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest

from app.errors import ValidationError
from app.service.validation import validate_metadata_values_are_strings


@pytest.mark.parametrize(
"test_data",
[
{"entity1": [{"metadata": {"key1": ["value"], "key2": [""]}}]},
{"entity2": [{"other_key": {"key1": ["value"]}}]},
{"entity3": [{}]},
{"entity4": []},
{"entity5": ""},
{"entity6": None},
{},
],
)
def test_validate_metadata_when_ok(test_data):

validate_metadata_values_are_strings(test_data)


@pytest.mark.parametrize(
"test_data",
[
{"entity": [{"metadata": {"key": [1]}}]},
{"entity": [{"metadata": {"key": [1]}}]},
{"entity": [{"metadata": {"key": 1}}]},
{"entity": [{"metadata": {"key": [None]}}]},
{"entity": [{"metadata": {"key": None}}]},
],
)
def test_validate_metadata_throws_exception_when_non_string_values_present(test_data):

with pytest.raises(ValidationError) as e:
validate_metadata_values_are_strings(test_data)
assert "Metadata values should be strings" == e.value.message
Loading