Skip to content

Commit

Permalink
Feature/pdct 1368 add the ability to read in all relevant data files (#4
Browse files Browse the repository at this point in the history
)
  • Loading branch information
odrakes-cpr authored Aug 29, 2024
2 parents 9d298d2 + 41f0edd commit a486468
Show file tree
Hide file tree
Showing 11 changed files with 306 additions and 8 deletions.
3 changes: 3 additions & 0 deletions .trunk/trunk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ lint:
paths:
- .trunk/configs/cspell.json
- .gitignore
- linters: [pre-commit-hooks, prettier]
paths:
- tests/unit_tests/test_fixtures/malformed_data.json

enabled:
- [email protected]
Expand Down
23 changes: 15 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
install_trunk:
.DEFAULT_GOAL := help

.PHONY: help
help: ## Display this help message
@echo "Available commands:"
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

install_trunk: ## Install trunk
$(eval trunk_installed=$(shell trunk --version > /dev/null 2>&1 ; echo $$? ))
ifneq (${trunk_installed},0)
$(eval OS_NAME=$(shell uname -s | tr A-Z a-z))
curl https://get.trunk.io -fsSL | bash
endif

uninstall_trunk:
uninstall_trunk: ## Uninstall trunk
sudo rm -if `which trunk`
rm -ifr ${HOME}/.cache/trunk

share_trunk:
trunk init

move_workflows:
move_workflows: ## Move workflows to .github/workflows
mv workflows .github/workflows

init: share_trunk move_workflows

setup_with_pyenv:
setup_with_pyenv: ## Setup the project with pyenv
pyenv install 3.10
pyenv virtualenv 3.10 gcf-dm
pyenv activate gcf-dm
Expand All @@ -26,12 +33,12 @@ setup_with_pyenv:
install_git_hooks: install_trunk
trunk init

check:
check: ## Format and check the project with trunk
trunk fmt
trunk check

build:
build: ## Build the project
poetry build

test:
poetry run pytest
test: ## Run tests using pytest
poetry run pytest -v
72 changes: 72 additions & 0 deletions gcf_data_mapper/read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import csv
import json
import os
from enum import Enum
from typing import Any, Optional, Union

import click


class AllowedFileExtensions(Enum):
JSON = "json"
CSV = "csv"


def read_csv(file_path: str) -> list[dict[str, Any]]:
"""
Reads a csv file and returns a list of dictionaries
:param file_path str: a file path to the csv file
:return list: a list of dictionaries, where each line in the csv file is
mapped to a dictionary
"""
with open(file_path, "r") as file:
csv_reader = csv.DictReader(file)
fieldnames = csv_reader.fieldnames or []
data = [{field: line[field] for field in fieldnames} for line in csv_reader]
return data


def read_json(file_path: str) -> Optional[dict]:
"""
Reads a json file and returns the json object as a dict
:param file_path str: A file path to the csv file
:raises JSONDecodeError: if the file cannot be read
:return dict: A dictionary of the json data
"""
try:
with open(file_path, "r") as file:
return json.load(file)
except json.JSONDecodeError as e:
raise e


def read_data_file(
file_path: str,
) -> Optional[Union[dict[str, Any], list[dict[str, Any]]]]:
"""
Simple program that validates a file path for existence, type and size,
then calls a function to read the csv or json file respectively
:param file_path str: A file path to the csv/json file
:raises ValueError: if a non csv or json file type is provided
:raises FileNotFoundError: if the file does not exist
:raises ValueError: if the file is empty
:return Optional[Union[dict[str, Any], list[dict[str, Any]]]]: A dictionary or list of dictionaries
depending on the file type
"""
file_extension = os.path.splitext(file_path)[1][1:]
if file_extension not in [e.value for e in AllowedFileExtensions]:
raise ValueError("Error reading file: File must be a valid json or csv file")
if not os.path.exists(file_path):
raise FileNotFoundError(f"No such file or directory: '{file_path}'")
if os.path.getsize(file_path) == 0:
raise ValueError("Error reading file: File is empty")
try:
if file_extension == AllowedFileExtensions.CSV.value:
return read_csv(file_path)
return read_json(file_path)
except Exception as e:
click.echo(f"Error reading file: {e}")
raise e
Empty file.
4 changes: 4 additions & 0 deletions tests/unit_tests/test_fixtures/invalid_climate_csv_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
country,avg_temp_celsius,annual_rainfall_mm,climate_zone
Brazil,21.5,1500,Tropical
Canada,6.3,940,Continental
Egypt,22.1,25,Desert
23 changes: 23 additions & 0 deletions tests/unit_tests/test_fixtures/invalid_climate_json_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"climate_data": [
{
"country": "Brazil",
"capital": "Brasilia",
"climate_info": {
"avg_temp_celsius": "twenty-one point five",
"annual_rainfall_mm": 1500
},
"natural_disasters": ["Floods", "Landslides"]
},
{
"country": "Canada",
"capital": "Ottawa",
"climate_info": {
"avg_temp_celsius": 6.3,
"annual_rainfall_mm": "nine hundred forty",
"climate_zone": "Continental"
},
"natural_disasters": ["Blizzards", "Wildfires"]
}
]
}
19 changes: 19 additions & 0 deletions tests/unit_tests/test_fixtures/malformed_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

{
"location": "New York",
"temperature": {
"value": 75,
"unit": "Fahrenheit",
},
"humidity": 60,
"conditions": [
"Sunny",
"Windy",
],
"forecast": {
"day": "Monday",
"high": 80,
"low": 65
// Missing closing brace
}

1 change: 1 addition & 0 deletions tests/unit_tests/test_fixtures/test_text_file.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Very basic txt file to test that the read function does not process non-csv/json files
4 changes: 4 additions & 0 deletions tests/unit_tests/test_fixtures/valid_climate_csv_data.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
country,capital,avg_temp_celsius,annual_rainfall_mm,climate_zone
Brazil,Brasilia,21.5,1500,Tropical
Canada,Ottawa,6.3,940,Continental
Egypt,Cairo,22.1,25,Desert
24 changes: 24 additions & 0 deletions tests/unit_tests/test_fixtures/valid_climate_json_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"climate_data": [
{
"country": "Brazil",
"capital": "Brasilia",
"climate_info": {
"avg_temp_celsius": 21.5,
"annual_rainfall_mm": 1500,
"climate_zone": "Tropical"
},
"natural_disasters": ["Floods", "Landslides"]
},
{
"country": "Canada",
"capital": "Ottawa",
"climate_info": {
"avg_temp_celsius": 6.3,
"annual_rainfall_mm": 940,
"climate_zone": "Continental"
},
"natural_disasters": ["Blizzards", "Wildfires"]
}
]
}
141 changes: 141 additions & 0 deletions tests/unit_tests/test_read_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import json
import os
from typing import Any, Union

import pytest

from gcf_data_mapper.read import read_data_file

UNIT_TESTS_FOLDER = os.path.dirname(os.path.abspath(__file__))
FIXTURES_FOLDER = os.path.join(UNIT_TESTS_FOLDER, "test_fixtures")


def return_valid_csv_data():
"""
Function which returns expected data structure of csv file.
"""

csv_data = [
{
"country": "Brazil",
"capital": "Brasilia",
"avg_temp_celsius": "21.5",
"annual_rainfall_mm": "1500",
"climate_zone": "Tropical",
},
{
"country": "Canada",
"capital": "Ottawa",
"avg_temp_celsius": "6.3",
"annual_rainfall_mm": "940",
"climate_zone": "Continental",
},
{
"country": "Egypt",
"capital": "Cairo",
"avg_temp_celsius": "22.1",
"annual_rainfall_mm": "25",
"climate_zone": "Desert",
},
]
return csv_data


def return_valid_json_data():
"""
Function which returns expected data structure of json file.
"""

json_data = {
"climate_data": [
{
"country": "Brazil",
"capital": "Brasilia",
"climate_info": {
"avg_temp_celsius": 21.5,
"annual_rainfall_mm": 1500,
"climate_zone": "Tropical",
},
"natural_disasters": ["Floods", "Landslides"],
},
{
"country": "Canada",
"capital": "Ottawa",
"climate_info": {
"avg_temp_celsius": 6.3,
"annual_rainfall_mm": 940,
"climate_zone": "Continental",
},
"natural_disasters": ["Blizzards", "Wildfires"],
},
]
}
return json_data


@pytest.mark.parametrize(
"filepath, expected_output",
(
(
os.path.join(FIXTURES_FOLDER, "valid_climate_json_data.json"),
return_valid_json_data(),
),
(
os.path.join(FIXTURES_FOLDER, "valid_climate_csv_data.csv"),
return_valid_csv_data(),
),
),
)
def test_valid_files_return_expected_output(
filepath: str, expected_output: Union[dict, list[dict[str, Any]]]
):
assert os.path.exists(filepath)
data = read_data_file(filepath)
assert data is not None
assert data == expected_output


@pytest.mark.parametrize(
"filepath, expected_output",
(
(
os.path.join(FIXTURES_FOLDER, "invalid_climate_json_data.json"),
return_valid_json_data(),
),
(
os.path.join(FIXTURES_FOLDER, "invalid_climate_csv_data.csv"),
return_valid_csv_data(),
),
),
)
def test_invalid_files_do_not_return_expected_output(
filepath: str, expected_output: Union[dict, list[dict[str, Any]]]
):
assert os.path.exists(filepath)
data = read_data_file(filepath)
assert data != expected_output


def test_raises_error_on_invalid_file_extension():
with pytest.raises(ValueError) as e:
read_data_file(os.path.join(FIXTURES_FOLDER, "test_text_file.txt"))
assert str(e.value) == ("Error reading file: File must be a valid json or csv file")


def test_raises_error_with_non_existent_file():
non_existent_file_path = os.path.join(FIXTURES_FOLDER, "non_existent_file.csv")
with pytest.raises(FileNotFoundError) as e:
read_data_file(non_existent_file_path)
assert str(e.value) == f"No such file or directory: '{non_existent_file_path}'"


def test_raises_error_with_empty_file():
empty_file_path = os.path.join(FIXTURES_FOLDER, "empty_file.csv")
with pytest.raises(ValueError) as e:
read_data_file(empty_file_path)
assert str(e.value) == "Error reading file: File is empty"


def test_raises_error_on_malformed_json():
with pytest.raises(json.JSONDecodeError):
read_data_file(os.path.join(FIXTURES_FOLDER, "malformed_data.json"))

0 comments on commit a486468

Please sign in to comment.