Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/pdct 1368 add the ability to read in all relevant data files #4

Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
7ecc260
refactor: update cli.py file
Aug 27, 2024
f3640a4
feat: add function to read csv and json files
Aug 27, 2024
2872d1b
test: add test for reading json and csv
Aug 27, 2024
8218e22
chore: add help target to make commands
Aug 27, 2024
2d5d90c
Merge branch 'main' into feature/pdct-1368-add-the-ability-to-read-in…
Aug 27, 2024
11627cc
fix: update cli for functions moved in merge
Aug 27, 2024
2fc368d
fix: erroneously removed the entrypoint for the cli
Aug 27, 2024
2dc5176
merge main
Aug 27, 2024
c832020
refactor: move read functions out of the cli into a helper file
Aug 27, 2024
4ebc045
merge main
Aug 27, 2024
dd15191
fix: correct typo and missing doc string
Aug 27, 2024
1c64590
test: refactor tests to use parametrize
Aug 27, 2024
fcc500f
refactor: remove indirect true
Aug 27, 2024
eb32e44
chore: update typing
Aug 27, 2024
98e1c47
refactor: refactor tests slightly to merge tests into one
Aug 27, 2024
3182fc3
refactor: update the doc strings in the read functions
Aug 28, 2024
af90377
WIP tests
katybaulch Aug 28, 2024
afe8ac7
Merge branch 'feature/pdct-1368-add-the-ability-to-read-in-all-releva…
Aug 28, 2024
c19e24a
fix: test fixes
Aug 28, 2024
f45b392
style: quick update on wording of return values
Aug 28, 2024
34fac6c
fix: linting errors
Aug 28, 2024
41b9ba7
refactor: update typoing on read_data_file function
Aug 28, 2024
6590875
fix: change return type value in read csv doc string
Aug 28, 2024
28a941c
fix: update test fixture names and related tests
Aug 28, 2024
26510e9
test: update test name
Aug 28, 2024
f2b5f68
test: refactor code to use parametrize
Aug 28, 2024
2e39de5
test: update test name
Aug 28, 2024
9dcf3a5
chore : ignore malformed json data written for tests
Aug 29, 2024
41f0edd
feat: update test and read data
Aug 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions Makefile
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
install_trunk:
.DEFAULT_GOAL := help

.PHONY: help
help: ## Display this help message
@echo "Available commands:"
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

install_trunk: ## Install trunk
$(eval trunk_installed=$(shell trunk --version > /dev/null 2>&1 ; echo $$? ))
ifneq (${trunk_installed},0)
$(eval OS_NAME=$(shell uname -s | tr A-Z a-z))
curl https://get.trunk.io -fsSL | bash
endif

uninstall_trunk:
uninstall_trunk: ## Uninstall trunk
sudo rm -if `which trunk`
rm -ifr ${HOME}/.cache/trunk

share_trunk:
trunk init

move_workflows:
move_workflows: ## Move workflows to .github/workflows
mv workflows .github/workflows

init: share_trunk move_workflows

setup_with_pyenv:
setup_with_pyenv: ## Setup the project with pyenv
pyenv install 3.10
pyenv virtualenv 3.10 gcf-dm
pyenv activate gcf-dm
Expand All @@ -26,12 +33,12 @@ setup_with_pyenv:
install_git_hooks: install_trunk
trunk init

check:
check: ## Format and check the project with trunk
trunk fmt
trunk check

build:
build: ## Build the project
poetry build

test:
test: ## Run tests using pytest
poetry run pytest
62 changes: 62 additions & 0 deletions gcf_data_mapper/read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import csv
import json
from typing import Any, Optional, Union

import click


def read_csv(file_path: str) -> list[dict[str, Any]]:
"""
Reads a csv file and returns a list of dictionaries

:param str: a file path to the csv file
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
:return list: A list of strings
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
"""
with open(file_path, "r") as file:
csv_reader = csv.DictReader(file)
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
data = [
{
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
"country": line["country"],
"capital": line["capital"],
"avg_temp_celsius": float(line["avg_temp_celsius"]),
"annual_rainfall_mm": int(line["annual_rainfall_mm"]),
"climate_zone": line["climate_zone"],
}
for line in csv_reader
]
return data


def read_json(file_path: str) -> dict:
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
"""
Reads a json file, and returns a dict

:param str: A file path to the csv file
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
:return dict: A dictionary of the json data
"""
with open(file_path, "r") as file:
return json.load(file)


def read_data_file(
file_path: str,
) -> Optional[Union[dict[str, Any], list[dict[str, Any]]]]:
"""
Simple program that reads a data file,
calls a function to read a csv or json file respectively

:param str: A file path to the csv/json file
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
:raises ValueError: if a non csv or json file type is provided
:return dict | list[dict[str, Any]] | ValueError: A dictionary or list of dictionaries
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
depending on the file type
"""
file_extension = file_path.lower().split(".")[-1]
katybaulch marked this conversation as resolved.
Show resolved Hide resolved
if file_extension not in ["json", "csv"]:
katybaulch marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError("Error reading file: File must be a valid json or csv file")
try:
if file_extension == "csv":
return read_csv(file_path)
elif file_extension == "json":
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
return read_json(file_path)
except Exception as e:
click.echo(f"Error reading file: {e}")
4 changes: 4 additions & 0 deletions tests/unit_tests/test_fixtures/test_climate_data.csv
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
country,capital,avg_temp_celsius,annual_rainfall_mm,climate_zone
Brazil,Brasilia,21.5,1500,Tropical
Canada,Ottawa,6.3,940,Continental
Egypt,Cairo,22.1,25,Desert
24 changes: 24 additions & 0 deletions tests/unit_tests/test_fixtures/test_country_climate_data.json
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"climate_data": [
{
"country": "Brazil",
"capital": "Brasilia",
"climate_info": {
"avg_temp_celsius": 21.5,
"annual_rainfall_mm": 1500,
"climate_zone": "Tropical"
},
"natural_disasters": ["Floods", "Landslides"]
},
{
"country": "Canada",
"capital": "Ottawa",
"climate_info": {
"avg_temp_celsius": 6.3,
"annual_rainfall_mm": 940,
"climate_zone": "Continental"
},
"natural_disasters": ["Blizzards", "Wildfires"]
}
]
}
14 changes: 14 additions & 0 deletions tests/unit_tests/test_fixtures/test_non_csv_json_file.py
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Very basic python file to test that the read function does not process non-csv/json files


class TestClass:
def __init__(self):
self.message = "This is not a CSV or JSON file"

def display_message(self):
print(self.message)


if __name__ == "__main__":
test_instance = TestClass()
test_instance.display_message()
98 changes: 98 additions & 0 deletions tests/unit_tests/test_read_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import os
from typing import Any, Union

import pytest

from gcf_data_mapper.read import read_data_file

unit_tests_folder = os.path.dirname(os.path.abspath(__file__))
fixtures_folder = os.path.join(unit_tests_folder, "test_fixtures")


def return_mock_test_csv_data():
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
"""
Function which returns expected data structure of csv file.
"""

csv_data = [
{
"country": "Brazil",
"capital": "Brasilia",
"avg_temp_celsius": 21.5,
"annual_rainfall_mm": 1500,
"climate_zone": "Tropical",
},
{
"country": "Canada",
"capital": "Ottawa",
"avg_temp_celsius": 6.3,
"annual_rainfall_mm": 940,
"climate_zone": "Continental",
},
{
"country": "Egypt",
"capital": "Cairo",
"avg_temp_celsius": 22.1,
"annual_rainfall_mm": 25,
"climate_zone": "Desert",
},
]
return csv_data


def return_mock_test_json_data():
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
"""
Function which returns expected data structure of json file.
"""

json_data = {
"climate_data": [
{
"country": "Brazil",
"capital": "Brasilia",
"climate_info": {
"avg_temp_celsius": 21.5,
"annual_rainfall_mm": 1500,
"climate_zone": "Tropical",
},
"natural_disasters": ["Floods", "Landslides"],
},
{
"country": "Canada",
"capital": "Ottawa",
"climate_info": {
"avg_temp_celsius": 6.3,
"annual_rainfall_mm": 940,
"climate_zone": "Continental",
},
"natural_disasters": ["Blizzards", "Wildfires"],
},
]
}
return json_data


@pytest.mark.parametrize(
"filepath, expected_output",
(
(
os.path.join(fixtures_folder, "test_country_climate_data.json"),
return_mock_test_json_data(),
),
(
os.path.join(fixtures_folder, "test_climate_data.csv"),
return_mock_test_csv_data(),
),
),
)
def test_reads_files(filepath: str, expected_output: Union[dict, list[dict[str, Any]]]):
assert os.path.exists(filepath)
data = read_data_file(filepath)
assert data is not None
assert data == expected_output


def test_errors_on_invalid_file():
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
with pytest.raises(ValueError) as e:
read_data_file("tests/unit_tests/test_fixtures/test.py")
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
assert str(e.value) == ("Error reading file: File must be a valid json or csv file")
Loading