Skip to content
This repository has been archived by the owner on Sep 11, 2024. It is now read-only.

Commit

Permalink
Merge pull request #87 from climatepolicyradar/bugfix/raise-correct-e…
Browse files Browse the repository at this point in the history
…rror

Bug Fix - Raise Correct Exception
  • Loading branch information
THOR300 authored Nov 8, 2023
2 parents c841aab + 6257921 commit 0a2bc68
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
9 changes: 5 additions & 4 deletions src/cpr_data_access/parser_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from collections import Counter

from deprecation import deprecated
from pydantic import BaseModel, AnyHttpUrl, Field, root_validator
from pydantic import BaseModel, AnyHttpUrl, Field, validator
from langdetect import DetectorFactory, LangDetectException, detect

from cpr_data_access.pipeline_general_models import (
Expand Down Expand Up @@ -193,8 +193,8 @@ class BaseParserOutput(BaseModel):
pdf_data: Optional[PDFData] = None
pipeline_metadata: Json = {} # note: defaulting to {} here is safe (pydantic)

@root_validator
def check_html_pdf_metadata(cls, values):
@validator("pdf_data") # Validate the pdf_data field as it is ordered last
def check_html_pdf_metadata(cls, value, values):
"""
Validate the relationship between content-type and the data that is set.
Expand All @@ -204,6 +204,7 @@ def check_html_pdf_metadata(cls, values):
Check that if the content-type is not HTML or PDF, then html_data and pdf_data
are both null.
"""
values["pdf_data"] = value
if (
values["document_content_type"] == CONTENT_TYPE_HTML
and values["html_data"] is None
Expand All @@ -224,7 +225,7 @@ def check_html_pdf_metadata(cls, values):
"html_data and pdf_data must be null for documents with no content type."
)

return values
return values["pdf_data"]

def get_text_blocks(self, including_invalid_html=False) -> Sequence[TextBlock]:
"""A method for getting text blocks with the option to include invalid html."""
Expand Down
9 changes: 9 additions & 0 deletions tests/test_parser_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,12 @@ def test_parser_output_object(parser_output_json_pdf, parser_output_json_html) -
== text_blocks_include_invalid
== text_blocks_not_include_invalid
)

# Test that the correct validation error is thrown during instantiation
parser_output_json_bad_text_block = parser_output_json_pdf.copy()
parser_output_json_bad_text_block["pdf_data"]["text_blocks"][0][
"type"
] = "ThisBlockTypeDoesNotExist"
with pytest.raises(pydantic.error_wrappers.ValidationError) as context:
ParserOutput.parse_obj(parser_output_json_bad_text_block)
assert "value is not a valid enumeration member" in str(context.value)

0 comments on commit 0a2bc68

Please sign in to comment.