diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml index c7b9631..ae8fc74 100644 --- a/.trunk/trunk.yaml +++ b/.trunk/trunk.yaml @@ -41,6 +41,9 @@ lint: paths: - .trunk/configs/cspell.json - .gitignore + - linters: [pre-commit-hooks, prettier] + paths: + - tests/unit_tests/test_fixtures/malformed_data.json enabled: - cspell@8.14.1 diff --git a/Makefile b/Makefile index e952f9a..94f4cd8 100644 --- a/Makefile +++ b/Makefile @@ -1,23 +1,30 @@ -install_trunk: +.DEFAULT_GOAL := help + +.PHONY: help +help: ## Display this help message + @echo "Available commands:" + @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + +install_trunk: ## Install trunk $(eval trunk_installed=$(shell trunk --version > /dev/null 2>&1 ; echo $$? )) ifneq (${trunk_installed},0) $(eval OS_NAME=$(shell uname -s | tr A-Z a-z)) curl https://get.trunk.io -fsSL | bash endif -uninstall_trunk: +uninstall_trunk: ## Uninstall trunk sudo rm -if `which trunk` rm -ifr ${HOME}/.cache/trunk share_trunk: trunk init -move_workflows: +move_workflows: ## Move workflows to .github/workflows mv workflows .github/workflows init: share_trunk move_workflows -setup_with_pyenv: +setup_with_pyenv: ## Setup the project with pyenv pyenv install 3.10 pyenv virtualenv 3.10 gcf-dm pyenv activate gcf-dm @@ -26,12 +33,12 @@ setup_with_pyenv: install_git_hooks: install_trunk trunk init -check: +check: ## Format and check the project with trunk trunk fmt trunk check -build: +build: ## Build the project poetry build -test: - poetry run pytest +test: ## Run tests using pytest + poetry run pytest -v diff --git a/gcf_data_mapper/read.py b/gcf_data_mapper/read.py new file mode 100644 index 0000000..bbbccd8 --- /dev/null +++ b/gcf_data_mapper/read.py @@ -0,0 +1,72 @@ +import csv +import json +import os +from enum import Enum +from typing import Any, Optional, Union + +import click + + +class AllowedFileExtensions(Enum): + JSON = "json" + CSV = "csv" + + +def read_csv(file_path: str) -> list[dict[str, Any]]: + """ + Reads a csv file and returns a list of dictionaries + + :param file_path str: a file path to the csv file + :return list: a list of dictionaries, where each line in the csv file is + mapped to a dictionary + """ + with open(file_path, "r") as file: + csv_reader = csv.DictReader(file) + fieldnames = csv_reader.fieldnames or [] + data = [{field: line[field] for field in fieldnames} for line in csv_reader] + return data + + +def read_json(file_path: str) -> Optional[dict]: + """ + Reads a json file and returns the json object as a dict + + :param file_path str: A file path to the csv file + :raises JSONDecodeError: if the file cannot be read + :return dict: A dictionary of the json data + """ + try: + with open(file_path, "r") as file: + return json.load(file) + except json.JSONDecodeError as e: + raise e + + +def read_data_file( + file_path: str, +) -> Optional[Union[dict[str, Any], list[dict[str, Any]]]]: + """ + Simple program that validates a file path for existence, type and size, + then calls a function to read the csv or json file respectively + + :param file_path str: A file path to the csv/json file + :raises ValueError: if a non csv or json file type is provided + :raises FileNotFoundError: if the file does not exist + :raises ValueError: if the file is empty + :return Optional[Union[dict[str, Any], list[dict[str, Any]]]]: A dictionary or list of dictionaries + depending on the file type + """ + file_extension = os.path.splitext(file_path)[1][1:] + if file_extension not in [e.value for e in AllowedFileExtensions]: + raise ValueError("Error reading file: File must be a valid json or csv file") + if not os.path.exists(file_path): + raise FileNotFoundError(f"No such file or directory: '{file_path}'") + if os.path.getsize(file_path) == 0: + raise ValueError("Error reading file: File is empty") + try: + if file_extension == AllowedFileExtensions.CSV.value: + return read_csv(file_path) + return read_json(file_path) + except Exception as e: + click.echo(f"Error reading file: {e}") + raise e diff --git a/tests/unit_tests/test_fixtures/empty_file.csv b/tests/unit_tests/test_fixtures/empty_file.csv new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit_tests/test_fixtures/invalid_climate_csv_data.csv b/tests/unit_tests/test_fixtures/invalid_climate_csv_data.csv new file mode 100644 index 0000000..8d8e218 --- /dev/null +++ b/tests/unit_tests/test_fixtures/invalid_climate_csv_data.csv @@ -0,0 +1,4 @@ +country,avg_temp_celsius,annual_rainfall_mm,climate_zone +Brazil,21.5,1500,Tropical +Canada,6.3,940,Continental +Egypt,22.1,25,Desert diff --git a/tests/unit_tests/test_fixtures/invalid_climate_json_data.json b/tests/unit_tests/test_fixtures/invalid_climate_json_data.json new file mode 100644 index 0000000..2fab60c --- /dev/null +++ b/tests/unit_tests/test_fixtures/invalid_climate_json_data.json @@ -0,0 +1,23 @@ +{ + "climate_data": [ + { + "country": "Brazil", + "capital": "Brasilia", + "climate_info": { + "avg_temp_celsius": "twenty-one point five", + "annual_rainfall_mm": 1500 + }, + "natural_disasters": ["Floods", "Landslides"] + }, + { + "country": "Canada", + "capital": "Ottawa", + "climate_info": { + "avg_temp_celsius": 6.3, + "annual_rainfall_mm": "nine hundred forty", + "climate_zone": "Continental" + }, + "natural_disasters": ["Blizzards", "Wildfires"] + } + ] +} diff --git a/tests/unit_tests/test_fixtures/malformed_data.json b/tests/unit_tests/test_fixtures/malformed_data.json new file mode 100644 index 0000000..482c6a0 --- /dev/null +++ b/tests/unit_tests/test_fixtures/malformed_data.json @@ -0,0 +1,19 @@ + + { + "location": "New York", + "temperature": { + "value": 75, + "unit": "Fahrenheit", + }, + "humidity": 60, + "conditions": [ + "Sunny", + "Windy", + ], + "forecast": { + "day": "Monday", + "high": 80, + "low": 65 + // Missing closing brace + } + \ No newline at end of file diff --git a/tests/unit_tests/test_fixtures/test_text_file.txt b/tests/unit_tests/test_fixtures/test_text_file.txt new file mode 100644 index 0000000..54df98a --- /dev/null +++ b/tests/unit_tests/test_fixtures/test_text_file.txt @@ -0,0 +1 @@ +Very basic txt file to test that the read function does not process non-csv/json files diff --git a/tests/unit_tests/test_fixtures/valid_climate_csv_data.csv b/tests/unit_tests/test_fixtures/valid_climate_csv_data.csv new file mode 100644 index 0000000..3f9b0d0 --- /dev/null +++ b/tests/unit_tests/test_fixtures/valid_climate_csv_data.csv @@ -0,0 +1,4 @@ +country,capital,avg_temp_celsius,annual_rainfall_mm,climate_zone +Brazil,Brasilia,21.5,1500,Tropical +Canada,Ottawa,6.3,940,Continental +Egypt,Cairo,22.1,25,Desert diff --git a/tests/unit_tests/test_fixtures/valid_climate_json_data.json b/tests/unit_tests/test_fixtures/valid_climate_json_data.json new file mode 100644 index 0000000..b42ed7f --- /dev/null +++ b/tests/unit_tests/test_fixtures/valid_climate_json_data.json @@ -0,0 +1,24 @@ +{ + "climate_data": [ + { + "country": "Brazil", + "capital": "Brasilia", + "climate_info": { + "avg_temp_celsius": 21.5, + "annual_rainfall_mm": 1500, + "climate_zone": "Tropical" + }, + "natural_disasters": ["Floods", "Landslides"] + }, + { + "country": "Canada", + "capital": "Ottawa", + "climate_info": { + "avg_temp_celsius": 6.3, + "annual_rainfall_mm": 940, + "climate_zone": "Continental" + }, + "natural_disasters": ["Blizzards", "Wildfires"] + } + ] +} diff --git a/tests/unit_tests/test_read_data.py b/tests/unit_tests/test_read_data.py new file mode 100644 index 0000000..f314476 --- /dev/null +++ b/tests/unit_tests/test_read_data.py @@ -0,0 +1,141 @@ +import json +import os +from typing import Any, Union + +import pytest + +from gcf_data_mapper.read import read_data_file + +UNIT_TESTS_FOLDER = os.path.dirname(os.path.abspath(__file__)) +FIXTURES_FOLDER = os.path.join(UNIT_TESTS_FOLDER, "test_fixtures") + + +def return_valid_csv_data(): + """ + Function which returns expected data structure of csv file. + """ + + csv_data = [ + { + "country": "Brazil", + "capital": "Brasilia", + "avg_temp_celsius": "21.5", + "annual_rainfall_mm": "1500", + "climate_zone": "Tropical", + }, + { + "country": "Canada", + "capital": "Ottawa", + "avg_temp_celsius": "6.3", + "annual_rainfall_mm": "940", + "climate_zone": "Continental", + }, + { + "country": "Egypt", + "capital": "Cairo", + "avg_temp_celsius": "22.1", + "annual_rainfall_mm": "25", + "climate_zone": "Desert", + }, + ] + return csv_data + + +def return_valid_json_data(): + """ + Function which returns expected data structure of json file. + """ + + json_data = { + "climate_data": [ + { + "country": "Brazil", + "capital": "Brasilia", + "climate_info": { + "avg_temp_celsius": 21.5, + "annual_rainfall_mm": 1500, + "climate_zone": "Tropical", + }, + "natural_disasters": ["Floods", "Landslides"], + }, + { + "country": "Canada", + "capital": "Ottawa", + "climate_info": { + "avg_temp_celsius": 6.3, + "annual_rainfall_mm": 940, + "climate_zone": "Continental", + }, + "natural_disasters": ["Blizzards", "Wildfires"], + }, + ] + } + return json_data + + +@pytest.mark.parametrize( + "filepath, expected_output", + ( + ( + os.path.join(FIXTURES_FOLDER, "valid_climate_json_data.json"), + return_valid_json_data(), + ), + ( + os.path.join(FIXTURES_FOLDER, "valid_climate_csv_data.csv"), + return_valid_csv_data(), + ), + ), +) +def test_valid_files_return_expected_output( + filepath: str, expected_output: Union[dict, list[dict[str, Any]]] +): + assert os.path.exists(filepath) + data = read_data_file(filepath) + assert data is not None + assert data == expected_output + + +@pytest.mark.parametrize( + "filepath, expected_output", + ( + ( + os.path.join(FIXTURES_FOLDER, "invalid_climate_json_data.json"), + return_valid_json_data(), + ), + ( + os.path.join(FIXTURES_FOLDER, "invalid_climate_csv_data.csv"), + return_valid_csv_data(), + ), + ), +) +def test_invalid_files_do_not_return_expected_output( + filepath: str, expected_output: Union[dict, list[dict[str, Any]]] +): + assert os.path.exists(filepath) + data = read_data_file(filepath) + assert data != expected_output + + +def test_raises_error_on_invalid_file_extension(): + with pytest.raises(ValueError) as e: + read_data_file(os.path.join(FIXTURES_FOLDER, "test_text_file.txt")) + assert str(e.value) == ("Error reading file: File must be a valid json or csv file") + + +def test_raises_error_with_non_existent_file(): + non_existent_file_path = os.path.join(FIXTURES_FOLDER, "non_existent_file.csv") + with pytest.raises(FileNotFoundError) as e: + read_data_file(non_existent_file_path) + assert str(e.value) == f"No such file or directory: '{non_existent_file_path}'" + + +def test_raises_error_with_empty_file(): + empty_file_path = os.path.join(FIXTURES_FOLDER, "empty_file.csv") + with pytest.raises(ValueError) as e: + read_data_file(empty_file_path) + assert str(e.value) == "Error reading file: File is empty" + + +def test_raises_error_on_malformed_json(): + with pytest.raises(json.JSONDecodeError): + read_data_file(os.path.join(FIXTURES_FOLDER, "malformed_data.json"))