-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature/pdct 1368 add the ability to read in all relevant data files (#4
- Loading branch information
Showing
11 changed files
with
306 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,9 @@ lint: | |
paths: | ||
- .trunk/configs/cspell.json | ||
- .gitignore | ||
- linters: [pre-commit-hooks, prettier] | ||
paths: | ||
- tests/unit_tests/test_fixtures/malformed_data.json | ||
|
||
enabled: | ||
- [email protected] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import csv | ||
import json | ||
import os | ||
from enum import Enum | ||
from typing import Any, Optional, Union | ||
|
||
import click | ||
|
||
|
||
class AllowedFileExtensions(Enum): | ||
JSON = "json" | ||
CSV = "csv" | ||
|
||
|
||
def read_csv(file_path: str) -> list[dict[str, Any]]: | ||
""" | ||
Reads a csv file and returns a list of dictionaries | ||
:param file_path str: a file path to the csv file | ||
:return list: a list of dictionaries, where each line in the csv file is | ||
mapped to a dictionary | ||
""" | ||
with open(file_path, "r") as file: | ||
csv_reader = csv.DictReader(file) | ||
fieldnames = csv_reader.fieldnames or [] | ||
data = [{field: line[field] for field in fieldnames} for line in csv_reader] | ||
return data | ||
|
||
|
||
def read_json(file_path: str) -> Optional[dict]: | ||
""" | ||
Reads a json file and returns the json object as a dict | ||
:param file_path str: A file path to the csv file | ||
:raises JSONDecodeError: if the file cannot be read | ||
:return dict: A dictionary of the json data | ||
""" | ||
try: | ||
with open(file_path, "r") as file: | ||
return json.load(file) | ||
except json.JSONDecodeError as e: | ||
raise e | ||
|
||
|
||
def read_data_file( | ||
file_path: str, | ||
) -> Optional[Union[dict[str, Any], list[dict[str, Any]]]]: | ||
""" | ||
Simple program that validates a file path for existence, type and size, | ||
then calls a function to read the csv or json file respectively | ||
:param file_path str: A file path to the csv/json file | ||
:raises ValueError: if a non csv or json file type is provided | ||
:raises FileNotFoundError: if the file does not exist | ||
:raises ValueError: if the file is empty | ||
:return Optional[Union[dict[str, Any], list[dict[str, Any]]]]: A dictionary or list of dictionaries | ||
depending on the file type | ||
""" | ||
file_extension = os.path.splitext(file_path)[1][1:] | ||
if file_extension not in [e.value for e in AllowedFileExtensions]: | ||
raise ValueError("Error reading file: File must be a valid json or csv file") | ||
if not os.path.exists(file_path): | ||
raise FileNotFoundError(f"No such file or directory: '{file_path}'") | ||
if os.path.getsize(file_path) == 0: | ||
raise ValueError("Error reading file: File is empty") | ||
try: | ||
if file_extension == AllowedFileExtensions.CSV.value: | ||
return read_csv(file_path) | ||
return read_json(file_path) | ||
except Exception as e: | ||
click.echo(f"Error reading file: {e}") | ||
raise e |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
country,avg_temp_celsius,annual_rainfall_mm,climate_zone | ||
Brazil,21.5,1500,Tropical | ||
Canada,6.3,940,Continental | ||
Egypt,22.1,25,Desert |
23 changes: 23 additions & 0 deletions
23
tests/unit_tests/test_fixtures/invalid_climate_json_data.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
{ | ||
"climate_data": [ | ||
{ | ||
"country": "Brazil", | ||
"capital": "Brasilia", | ||
"climate_info": { | ||
"avg_temp_celsius": "twenty-one point five", | ||
"annual_rainfall_mm": 1500 | ||
}, | ||
"natural_disasters": ["Floods", "Landslides"] | ||
}, | ||
{ | ||
"country": "Canada", | ||
"capital": "Ottawa", | ||
"climate_info": { | ||
"avg_temp_celsius": 6.3, | ||
"annual_rainfall_mm": "nine hundred forty", | ||
"climate_zone": "Continental" | ||
}, | ||
"natural_disasters": ["Blizzards", "Wildfires"] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
|
||
{ | ||
"location": "New York", | ||
"temperature": { | ||
"value": 75, | ||
"unit": "Fahrenheit", | ||
}, | ||
"humidity": 60, | ||
"conditions": [ | ||
"Sunny", | ||
"Windy", | ||
], | ||
"forecast": { | ||
"day": "Monday", | ||
"high": 80, | ||
"low": 65 | ||
// Missing closing brace | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Very basic txt file to test that the read function does not process non-csv/json files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
country,capital,avg_temp_celsius,annual_rainfall_mm,climate_zone | ||
Brazil,Brasilia,21.5,1500,Tropical | ||
Canada,Ottawa,6.3,940,Continental | ||
Egypt,Cairo,22.1,25,Desert |
24 changes: 24 additions & 0 deletions
24
tests/unit_tests/test_fixtures/valid_climate_json_data.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
{ | ||
"climate_data": [ | ||
{ | ||
"country": "Brazil", | ||
"capital": "Brasilia", | ||
"climate_info": { | ||
"avg_temp_celsius": 21.5, | ||
"annual_rainfall_mm": 1500, | ||
"climate_zone": "Tropical" | ||
}, | ||
"natural_disasters": ["Floods", "Landslides"] | ||
}, | ||
{ | ||
"country": "Canada", | ||
"capital": "Ottawa", | ||
"climate_info": { | ||
"avg_temp_celsius": 6.3, | ||
"annual_rainfall_mm": 940, | ||
"climate_zone": "Continental" | ||
}, | ||
"natural_disasters": ["Blizzards", "Wildfires"] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
import json | ||
import os | ||
from typing import Any, Union | ||
|
||
import pytest | ||
|
||
from gcf_data_mapper.read import read_data_file | ||
|
||
UNIT_TESTS_FOLDER = os.path.dirname(os.path.abspath(__file__)) | ||
FIXTURES_FOLDER = os.path.join(UNIT_TESTS_FOLDER, "test_fixtures") | ||
|
||
|
||
def return_valid_csv_data(): | ||
""" | ||
Function which returns expected data structure of csv file. | ||
""" | ||
|
||
csv_data = [ | ||
{ | ||
"country": "Brazil", | ||
"capital": "Brasilia", | ||
"avg_temp_celsius": "21.5", | ||
"annual_rainfall_mm": "1500", | ||
"climate_zone": "Tropical", | ||
}, | ||
{ | ||
"country": "Canada", | ||
"capital": "Ottawa", | ||
"avg_temp_celsius": "6.3", | ||
"annual_rainfall_mm": "940", | ||
"climate_zone": "Continental", | ||
}, | ||
{ | ||
"country": "Egypt", | ||
"capital": "Cairo", | ||
"avg_temp_celsius": "22.1", | ||
"annual_rainfall_mm": "25", | ||
"climate_zone": "Desert", | ||
}, | ||
] | ||
return csv_data | ||
|
||
|
||
def return_valid_json_data(): | ||
""" | ||
Function which returns expected data structure of json file. | ||
""" | ||
|
||
json_data = { | ||
"climate_data": [ | ||
{ | ||
"country": "Brazil", | ||
"capital": "Brasilia", | ||
"climate_info": { | ||
"avg_temp_celsius": 21.5, | ||
"annual_rainfall_mm": 1500, | ||
"climate_zone": "Tropical", | ||
}, | ||
"natural_disasters": ["Floods", "Landslides"], | ||
}, | ||
{ | ||
"country": "Canada", | ||
"capital": "Ottawa", | ||
"climate_info": { | ||
"avg_temp_celsius": 6.3, | ||
"annual_rainfall_mm": 940, | ||
"climate_zone": "Continental", | ||
}, | ||
"natural_disasters": ["Blizzards", "Wildfires"], | ||
}, | ||
] | ||
} | ||
return json_data | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"filepath, expected_output", | ||
( | ||
( | ||
os.path.join(FIXTURES_FOLDER, "valid_climate_json_data.json"), | ||
return_valid_json_data(), | ||
), | ||
( | ||
os.path.join(FIXTURES_FOLDER, "valid_climate_csv_data.csv"), | ||
return_valid_csv_data(), | ||
), | ||
), | ||
) | ||
def test_valid_files_return_expected_output( | ||
filepath: str, expected_output: Union[dict, list[dict[str, Any]]] | ||
): | ||
assert os.path.exists(filepath) | ||
data = read_data_file(filepath) | ||
assert data is not None | ||
assert data == expected_output | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"filepath, expected_output", | ||
( | ||
( | ||
os.path.join(FIXTURES_FOLDER, "invalid_climate_json_data.json"), | ||
return_valid_json_data(), | ||
), | ||
( | ||
os.path.join(FIXTURES_FOLDER, "invalid_climate_csv_data.csv"), | ||
return_valid_csv_data(), | ||
), | ||
), | ||
) | ||
def test_invalid_files_do_not_return_expected_output( | ||
filepath: str, expected_output: Union[dict, list[dict[str, Any]]] | ||
): | ||
assert os.path.exists(filepath) | ||
data = read_data_file(filepath) | ||
assert data != expected_output | ||
|
||
|
||
def test_raises_error_on_invalid_file_extension(): | ||
with pytest.raises(ValueError) as e: | ||
read_data_file(os.path.join(FIXTURES_FOLDER, "test_text_file.txt")) | ||
assert str(e.value) == ("Error reading file: File must be a valid json or csv file") | ||
|
||
|
||
def test_raises_error_with_non_existent_file(): | ||
non_existent_file_path = os.path.join(FIXTURES_FOLDER, "non_existent_file.csv") | ||
with pytest.raises(FileNotFoundError) as e: | ||
read_data_file(non_existent_file_path) | ||
assert str(e.value) == f"No such file or directory: '{non_existent_file_path}'" | ||
|
||
|
||
def test_raises_error_with_empty_file(): | ||
empty_file_path = os.path.join(FIXTURES_FOLDER, "empty_file.csv") | ||
with pytest.raises(ValueError) as e: | ||
read_data_file(empty_file_path) | ||
assert str(e.value) == "Error reading file: File is empty" | ||
|
||
|
||
def test_raises_error_on_malformed_json(): | ||
with pytest.raises(json.JSONDecodeError): | ||
read_data_file(os.path.join(FIXTURES_FOLDER, "malformed_data.json")) |