Feature/pdct 1368 add the ability to read in all relevant data files (#4

)
climatepolicyradar · Aug 29, 2024 · a486468 · a486468
2 parents 9d298d2 + 41f0edd
commit a486468
Show file tree

Hide file tree

Showing 11 changed files with 306 additions and 8 deletions.
diff --git a/.trunk/trunk.yaml b/.trunk/trunk.yaml
@@ -41,6 +41,9 @@ lint:
       paths:
         - .trunk/configs/cspell.json
         - .gitignore
+    - linters: [pre-commit-hooks, prettier]
+      paths:
+        - tests/unit_tests/test_fixtures/malformed_data.json
 
   enabled:
     - [email protected]

diff --git a/Makefile b/Makefile
@@ -1,23 +1,30 @@
-install_trunk:
+.DEFAULT_GOAL := help
+
+.PHONY: help
+help: ## Display this help message
+	@echo "Available commands:"
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
+
+install_trunk: ## Install trunk
 	$(eval trunk_installed=$(shell trunk --version > /dev/null 2>&1 ; echo $$? ))
 ifneq (${trunk_installed},0)
 	$(eval OS_NAME=$(shell uname -s | tr A-Z a-z))
 	curl https://get.trunk.io -fsSL | bash
 endif
 
-uninstall_trunk:
+uninstall_trunk: ## Uninstall trunk
 	sudo rm -if `which trunk`
 	rm -ifr ${HOME}/.cache/trunk
 
 share_trunk:
 	trunk init
 
-move_workflows:
+move_workflows: ## Move workflows to .github/workflows
 	mv workflows .github/workflows
 
 init: share_trunk move_workflows
 
-setup_with_pyenv:
+setup_with_pyenv: ## Setup the project with pyenv
 	pyenv install 3.10
 	pyenv virtualenv 3.10 gcf-dm
 	pyenv activate gcf-dm
@@ -26,12 +33,12 @@ setup_with_pyenv:
 install_git_hooks: install_trunk
 	trunk init
 
-check:
+check: ## Format and check the project with trunk
 	trunk fmt
 	trunk check
 
-build:
+build: ## Build the project
 	poetry build
 
-test:
-	poetry run pytest
+test: ## Run tests using pytest
+	poetry run pytest -v
diff --git a/gcf_data_mapper/read.py b/gcf_data_mapper/read.py
@@ -0,0 +1,72 @@
+import csv
+import json
+import os
+from enum import Enum
+from typing import Any, Optional, Union
+
+import click
+
+
+class AllowedFileExtensions(Enum):
+    JSON = "json"
+    CSV = "csv"
+
+
+def read_csv(file_path: str) -> list[dict[str, Any]]:
+    """
+    Reads a csv file and returns a list of dictionaries
+
+    :param file_path str: a file path to the csv file
+    :return list: a list of dictionaries, where each line in the csv file is
+    mapped to a dictionary
+    """
+    with open(file_path, "r") as file:
+        csv_reader = csv.DictReader(file)
+        fieldnames = csv_reader.fieldnames or []
+        data = [{field: line[field] for field in fieldnames} for line in csv_reader]
+        return data
+
+
+def read_json(file_path: str) -> Optional[dict]:
+    """
+    Reads a json file and returns the json object as a dict
+
+    :param file_path str: A file path to the csv file
+    :raises JSONDecodeError: if the file cannot be read
+    :return dict: A dictionary of the json data
+    """
+    try:
+        with open(file_path, "r") as file:
+            return json.load(file)
+    except json.JSONDecodeError as e:
+        raise e
+
+
+def read_data_file(
+    file_path: str,
+) -> Optional[Union[dict[str, Any], list[dict[str, Any]]]]:
+    """
+    Simple program that validates a file path for existence, type and size,
+    then calls a function to read the csv or json file respectively
+
+    :param file_path str: A file path to the csv/json file
+    :raises ValueError: if a non csv or json file type is provided
+    :raises FileNotFoundError: if the file does not exist
+    :raises ValueError: if the file is empty
+    :return Optional[Union[dict[str, Any], list[dict[str, Any]]]]: A dictionary or list of dictionaries
+    depending on the file type
+    """
+    file_extension = os.path.splitext(file_path)[1][1:]
+    if file_extension not in [e.value for e in AllowedFileExtensions]:
+        raise ValueError("Error reading file: File must be a valid json or csv file")
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"No such file or directory: '{file_path}'")
+    if os.path.getsize(file_path) == 0:
+        raise ValueError("Error reading file: File is empty")
+    try:
+        if file_extension == AllowedFileExtensions.CSV.value:
+            return read_csv(file_path)
+        return read_json(file_path)
+    except Exception as e:
+        click.echo(f"Error reading file: {e}")
+        raise e
diff --git a/tests/unit_tests/test_fixtures/empty_file.csv b/tests/unit_tests/test_fixtures/empty_file.csv
diff --git a/tests/unit_tests/test_fixtures/invalid_climate_csv_data.csv b/tests/unit_tests/test_fixtures/invalid_climate_csv_data.csv
@@ -0,0 +1,4 @@
+country,avg_temp_celsius,annual_rainfall_mm,climate_zone
+Brazil,21.5,1500,Tropical
+Canada,6.3,940,Continental
+Egypt,22.1,25,Desert
diff --git a/tests/unit_tests/test_fixtures/invalid_climate_json_data.json b/tests/unit_tests/test_fixtures/invalid_climate_json_data.json
@@ -0,0 +1,23 @@
+{
+  "climate_data": [
+    {
+      "country": "Brazil",
+      "capital": "Brasilia",
+      "climate_info": {
+        "avg_temp_celsius": "twenty-one point five",
+        "annual_rainfall_mm": 1500
+      },
+      "natural_disasters": ["Floods", "Landslides"]
+    },
+    {
+      "country": "Canada",
+      "capital": "Ottawa",
+      "climate_info": {
+        "avg_temp_celsius": 6.3,
+        "annual_rainfall_mm": "nine hundred forty",
+        "climate_zone": "Continental"
+      },
+      "natural_disasters": ["Blizzards", "Wildfires"]
+    }
+  ]
+}
diff --git a/tests/unit_tests/test_fixtures/malformed_data.json b/tests/unit_tests/test_fixtures/malformed_data.json
@@ -0,0 +1,19 @@
+
+    {
+        "location": "New York",
+        "temperature": {
+            "value": 75,
+            "unit": "Fahrenheit",
+        },
+        "humidity": 60,
+        "conditions": [
+            "Sunny",
+            "Windy",
+        ],
+        "forecast": {
+            "day": "Monday",
+            "high": 80,
+            "low": 65
+        // Missing closing brace
+    }
+
diff --git a/tests/unit_tests/test_fixtures/test_text_file.txt b/tests/unit_tests/test_fixtures/test_text_file.txt
@@ -0,0 +1 @@
+Very basic txt file to test that the read function does not process non-csv/json files
diff --git a/tests/unit_tests/test_fixtures/valid_climate_csv_data.csv b/tests/unit_tests/test_fixtures/valid_climate_csv_data.csv
@@ -0,0 +1,4 @@
+country,capital,avg_temp_celsius,annual_rainfall_mm,climate_zone
+Brazil,Brasilia,21.5,1500,Tropical
+Canada,Ottawa,6.3,940,Continental
+Egypt,Cairo,22.1,25,Desert
diff --git a/tests/unit_tests/test_fixtures/valid_climate_json_data.json b/tests/unit_tests/test_fixtures/valid_climate_json_data.json
@@ -0,0 +1,24 @@
+{
+  "climate_data": [
+    {
+      "country": "Brazil",
+      "capital": "Brasilia",
+      "climate_info": {
+        "avg_temp_celsius": 21.5,
+        "annual_rainfall_mm": 1500,
+        "climate_zone": "Tropical"
+      },
+      "natural_disasters": ["Floods", "Landslides"]
+    },
+    {
+      "country": "Canada",
+      "capital": "Ottawa",
+      "climate_info": {
+        "avg_temp_celsius": 6.3,
+        "annual_rainfall_mm": 940,
+        "climate_zone": "Continental"
+      },
+      "natural_disasters": ["Blizzards", "Wildfires"]
+    }
+  ]
+}
diff --git a/tests/unit_tests/test_read_data.py b/tests/unit_tests/test_read_data.py
@@ -0,0 +1,141 @@
+import json
+import os
+from typing import Any, Union
+
+import pytest
+
+from gcf_data_mapper.read import read_data_file
+
+UNIT_TESTS_FOLDER = os.path.dirname(os.path.abspath(__file__))
+FIXTURES_FOLDER = os.path.join(UNIT_TESTS_FOLDER, "test_fixtures")
+
+
+def return_valid_csv_data():
+    """
+    Function which returns expected data structure of csv file.
+    """
+
+    csv_data = [
+        {
+            "country": "Brazil",
+            "capital": "Brasilia",
+            "avg_temp_celsius": "21.5",
+            "annual_rainfall_mm": "1500",
+            "climate_zone": "Tropical",
+        },
+        {
+            "country": "Canada",
+            "capital": "Ottawa",
+            "avg_temp_celsius": "6.3",
+            "annual_rainfall_mm": "940",
+            "climate_zone": "Continental",
+        },
+        {
+            "country": "Egypt",
+            "capital": "Cairo",
+            "avg_temp_celsius": "22.1",
+            "annual_rainfall_mm": "25",
+            "climate_zone": "Desert",
+        },
+    ]
+    return csv_data
+
+
+def return_valid_json_data():
+    """
+    Function which returns expected data structure of json file.
+    """
+
+    json_data = {
+        "climate_data": [
+            {
+                "country": "Brazil",
+                "capital": "Brasilia",
+                "climate_info": {
+                    "avg_temp_celsius": 21.5,
+                    "annual_rainfall_mm": 1500,
+                    "climate_zone": "Tropical",
+                },
+                "natural_disasters": ["Floods", "Landslides"],
+            },
+            {
+                "country": "Canada",
+                "capital": "Ottawa",
+                "climate_info": {
+                    "avg_temp_celsius": 6.3,
+                    "annual_rainfall_mm": 940,
+                    "climate_zone": "Continental",
+                },
+                "natural_disasters": ["Blizzards", "Wildfires"],
+            },
+        ]
+    }
+    return json_data
+
+
+@pytest.mark.parametrize(
+    "filepath, expected_output",
+    (
+        (
+            os.path.join(FIXTURES_FOLDER, "valid_climate_json_data.json"),
+            return_valid_json_data(),
+        ),
+        (
+            os.path.join(FIXTURES_FOLDER, "valid_climate_csv_data.csv"),
+            return_valid_csv_data(),
+        ),
+    ),
+)
+def test_valid_files_return_expected_output(
+    filepath: str, expected_output: Union[dict, list[dict[str, Any]]]
+):
+    assert os.path.exists(filepath)
+    data = read_data_file(filepath)
+    assert data is not None
+    assert data == expected_output
+
+
+@pytest.mark.parametrize(
+    "filepath, expected_output",
+    (
+        (
+            os.path.join(FIXTURES_FOLDER, "invalid_climate_json_data.json"),
+            return_valid_json_data(),
+        ),
+        (
+            os.path.join(FIXTURES_FOLDER, "invalid_climate_csv_data.csv"),
+            return_valid_csv_data(),
+        ),
+    ),
+)
+def test_invalid_files_do_not_return_expected_output(
+    filepath: str, expected_output: Union[dict, list[dict[str, Any]]]
+):
+    assert os.path.exists(filepath)
+    data = read_data_file(filepath)
+    assert data != expected_output
+
+
+def test_raises_error_on_invalid_file_extension():
+    with pytest.raises(ValueError) as e:
+        read_data_file(os.path.join(FIXTURES_FOLDER, "test_text_file.txt"))
+    assert str(e.value) == ("Error reading file: File must be a valid json or csv file")
+
+
+def test_raises_error_with_non_existent_file():
+    non_existent_file_path = os.path.join(FIXTURES_FOLDER, "non_existent_file.csv")
+    with pytest.raises(FileNotFoundError) as e:
+        read_data_file(non_existent_file_path)
+    assert str(e.value) == f"No such file or directory: '{non_existent_file_path}'"
+
+
+def test_raises_error_with_empty_file():
+    empty_file_path = os.path.join(FIXTURES_FOLDER, "empty_file.csv")
+    with pytest.raises(ValueError) as e:
+        read_data_file(empty_file_path)
+    assert str(e.value) == "Error reading file: File is empty"
+
+
+def test_raises_error_on_malformed_json():
+    with pytest.raises(json.JSONDecodeError):
+        read_data_file(os.path.join(FIXTURES_FOLDER, "malformed_data.json"))
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Very basic txt file to test that the read function does not process non-csv/json files