Skip to content

Commit

Permalink
Add test for invalid data
Browse files Browse the repository at this point in the history
  • Loading branch information
henhuy committed Jun 17, 2024
1 parent dc1d37a commit 9ba770a
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 4 deletions.
7 changes: 6 additions & 1 deletion src/omi/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,13 +365,18 @@ def __validate_data_against_schema(data: pd.DataFrame, fields: dict[str, str]) -
Report
Frictionless report of validated data
"""
# Check if all fields oin metadata are represented in data
for field in fields:
if field not in data.columns:
raise ValidationError(f"Could not find column '{field}' in data.")

ordered_fields = {}
for field in data.columns:
if field not in fields:
raise ValidationError(f"Could not find field '{field}' in schema.")
ordered_fields[field] = fields[field]
frictionless_fields = __map_fields_to_frictionless_fields(ordered_fields)
schema = Schema(fields=frictionless_fields)
schema = Schema(fields=frictionless_fields, primary_key=["id"])
resource = Resource(
data=data,
profile="tabular-data-resource",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_data/validation/data.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
id;region;year;cost_var_e;bandwidth_type;source;method;comment;version
0;["DE"];2024;[100.0];{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
0;["DE"];2024;[100.0];{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
1;["DE"];2024;[100.0];{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
id;region;year;natural_domestic_limit;bandwidth_type;source;method;comment;version
0;DE;2024;100.0;should_be_json;{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
0;DE;2024;100.0;should_be_json;{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
1;DE;2024;100.0;should_be_json;{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
2 changes: 1 addition & 1 deletion tests/test_data/validation/hackathon_lignite_hh_valid.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
id;region;year;natural_domestic_limit;bandwidth_type;source;method;comment;version
0;["DE"];2024;100.0;{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
0;["DE"];2024;100.0;{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
1;["DE"];2024;100.0;{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id;region;year;cost_var_e;bandwidth_type;source;method;comment;version
1;["DE"];2024;[100.0];{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
1;["DE"];2024;[100.0];{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
3 changes: 3 additions & 0 deletions tests/test_data/validation/invalid_data/extra_column.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id;region;year;cost_var_e;bandwidth_type;source;method;comment;version;added_column
0;["DE"];2024;[100.0];{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1;1
0;["DE"];2024;[100.0];{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1;2
3 changes: 3 additions & 0 deletions tests/test_data/validation/invalid_data/invalid_datatype.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id;region;year;cost_var_e;bandwidth_type;source;method;comment;version
0;["DE"];2024;[100.0];{"natural_domestic_limit": "point"};1;{"natural_domestic_limit": "exact"};{"test": "test"};v1
0;["DE"];2024;100.0;{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"natural_domestic_limit": "exact"};{"test": "test"};v1
3 changes: 3 additions & 0 deletions tests/test_data/validation/invalid_data/missing_column.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
id;region;year;cost_var_e;bandwidth_type;source;comment;version
0;["DE"];2024;[100.0];{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"test": "test"};v1
0;["DE"];2024;[100.0];{"natural_domestic_limit": "point"};{"natural_domestic_limit": "file"};{"test": "test"};v1
33 changes: 33 additions & 0 deletions tests/test_data_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,39 @@ def test_data_validation_invalid_report():
assert not report.valid


def test_invalid_data():
"""Test invalid data validation with example files."""
metadata_file = pathlib.Path(__file__).parent / "test_data" / "validation" / "metadata_for_data_csv.json"
with metadata_file.open("r") as f:
metadata = json.load(f)

invalid_data_file = (
pathlib.Path(__file__).parent / "test_data" / "validation" / "invalid_data" / "missing_column.csv"
)
invalid_data = pd.read_csv(invalid_data_file, delimiter=";")
with pytest.raises(validation.ValidationError, match="Could not find column 'method' in data."):
validation.validate_data(invalid_data, metadata=metadata)

invalid_data_file = pathlib.Path(__file__).parent / "test_data" / "validation" / "invalid_data" / "extra_column.csv"
invalid_data = pd.read_csv(invalid_data_file, delimiter=";")
with pytest.raises(validation.ValidationError, match="Could not find field 'added_column' in schema."):
validation.validate_data(invalid_data, metadata=metadata)

invalid_data_file = (
pathlib.Path(__file__).parent / "test_data" / "validation" / "invalid_data" / "invalid_datatype.csv"
)
invalid_data = pd.read_csv(invalid_data_file, delimiter=";")
with pytest.raises(validation.ValidationError, match="type-error"):
validation.validate_data(invalid_data, metadata=metadata)

invalid_data_file = (
pathlib.Path(__file__).parent / "test_data" / "validation" / "invalid_data" / "duplicate_primary_keys.csv"
)
invalid_data = pd.read_csv(invalid_data_file, delimiter=";")
with pytest.raises(validation.ValidationError, match="primary-key"):
validation.validate_data(invalid_data, metadata=metadata)


def test_invalid_arguments_to_validation_function():
"""Test different invalid function calls to validation function."""
with pytest.raises(validation.ValidationError, match="Data must be given as pandas.DataFrame."):
Expand Down

0 comments on commit 9ba770a

Please sign in to comment.