Skip to content

Commit

Permalink
Feature/pdct 1398 map family to new json structure (#14)
Browse files Browse the repository at this point in the history
* feat: map family data

create more modular methods for mapping family data, separate out metadata
into its own function.

* test: update tests

* feat: add echo to map family data row

tell the user we are skipping row if the metadata has missing information

* test: update test and fixtures

- separate the test files into mapping family data and meta data
- add more fixtures to conftest to test diff outcomes, i.e where we have
  missing metadata information

* Bump version to 0.1.11

* test: add test asserting output message when metadata is none

* feat: add collections field to the family data being mapped

---------

Co-authored-by: Osneil Drakes <[email protected]>
Co-authored-by: Osneil Drakes <[email protected]>
  • Loading branch information
3 people authored Sep 10, 2024
1 parent 4665dd9 commit 668f12e
Show file tree
Hide file tree
Showing 6 changed files with 350 additions and 82 deletions.
3 changes: 3 additions & 0 deletions gcf_data_mapper/enums/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@ class FamilyColumnsNames(Enum):
RESULT_AREAS = "ResultAreas"
SECTOR = "Sector"
THEME = "Theme"
TITLE = "ProjectName"
SUMMARY = "Summary"


class FamilyNestedColumnNames(Enum):
"""The fields the GCF data mapper needs to parse nested family data/ metadata."""

COUNTRY_ISO3 = "ISO3"
AREA = "Area"
BUDGET = "BudgetUSDeq"
NAME = "Name"
Expand Down
63 changes: 55 additions & 8 deletions gcf_data_mapper/parsers/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,54 @@ def map_family_metadata(row: pd.Series) -> Optional[dict]:
return metadata


def map_family_data(
row: pd.Series,
) -> Optional[dict]:
"""Map the data of a family based on the provided row.
:param pd.Series row: The containing family and family metadata information.
:return Optional[dict]: A dictionary containing the mapped family data.
"""

family_metadata = map_family_metadata(row)

# When processing the family metadata if there are any empty/falsy values we return None
# and skip the row. Therefore we don't want to process the rest of the family data so we
# return None in this conditional.
if family_metadata is None:
click.echo("🛑 Skipping row as family metadata has missing information")
return None

approved_ref = row.at[FamilyColumnsNames.APPROVED_REF.value]
projects_id = row.at[FamilyColumnsNames.PROJECTS_ID.value]
summary = row.at[FamilyColumnsNames.SUMMARY.value]
title = row.at[FamilyColumnsNames.TITLE.value]

geographies = [
country[FamilyNestedColumnNames.COUNTRY_ISO3.value]
for country in row.at[FamilyColumnsNames.COUNTRIES.value]
]

import_id = f"GCF.family.{approved_ref}.{projects_id}"

family_data = {
# For now we are hard coding the category as MCF
"category": "MCF",
"collections": [],
"description": summary,
"geographies": geographies,
"import_id": import_id,
"metadata": family_metadata,
"title": title,
}

return family_data


def process_row(
row: pd.Series, projects_id: str, required_columns: list[str]
row: pd.Series,
projects_id: str,
required_columns: list[str],
) -> Optional[dict]:
"""Map the family data based on the provided row.
Expand All @@ -121,13 +167,12 @@ def process_row(
)
return None

# TODO: Map family data
return {
"metadata": map_family_metadata(row),
}
return map_family_data(row)


def family(projects_data: pd.DataFrame, debug: bool) -> list[Optional[dict[str, Any]]]:
def family(
gcf_projects_data: pd.DataFrame, debug: bool
) -> list[Optional[dict[str, Any]]]:
"""Map the GCF family info to new structure.
:param pd.DataFrame projects_data: The MCF and GCF project data,
Expand All @@ -144,9 +189,11 @@ def family(projects_data: pd.DataFrame, debug: bool) -> list[Optional[dict[str,
mapped_families = []

required_fields = set(str(e.value) for e in FamilyColumnsNames)
verify_required_fields_present(projects_data, required_fields)

for _, row in projects_data.iterrows():
verify_required_fields_present(gcf_projects_data, required_fields)
# Do a check that the projects data has the field you need

for _, row in gcf_projects_data.iterrows():
projects_id = row.at[FamilyColumnsNames.PROJECTS_ID.value]
mapped_families.append(process_row(row, projects_id, list(required_fields)))

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "gcf-data-mapper"
version = "0.1.10"
version = "0.1.11"
description = "A CLI tool to wrangle GCF data into format recognised by the bulk-import tool."
authors = ["CPR-dev-team <[email protected]>"]
license = "Apache-2.0"
Expand Down
130 changes: 129 additions & 1 deletion tests/unit_tests/parsers/family/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


@pytest.fixture()
def test_family_doc_df():
def mock_family_doc_df():
yield pd.DataFrame(
[
{
Expand All @@ -15,6 +15,7 @@ def test_family_doc_df():
"Theme": "Adaptation",
"Sector": "Environment",
"ProjectURL": "https://www.climateaction.fund/project/FP003",
"Summary": "The Summary of the Project",
"Countries": [
{
"CountryName": "Bangladesh",
Expand Down Expand Up @@ -51,6 +52,133 @@ def test_family_doc_df():
)


@pytest.fixture()
def mock_family_row_ds():
yield pd.Series(
{
"ProjectsID": 1,
"ApprovedRef": "FP004",
"ProjectName": "Enhancing resilience of marine ecosystems",
"Theme": "Adaptation",
"Sector": "Private",
"ProjectURL": "https://www.climateaction.fund/project/FP004",
"Summary": "The Summary of the Project",
"Countries": [
{
"CountryName": "Haiti",
"ISO3": "HTI",
"Region": "Latin America and the Caribbean",
},
],
"Entities": [
{
"Name": "Climate Action Innovations",
}
],
"Funding": [
{
"Source": "GCF",
"Budget": 82000,
"BudgetUSDeq": 82000,
},
{
"ProjectBudgetID": 412,
"Source": "Co-Financing",
"Budget": 620000,
"BudgetUSDeq": 620000,
},
],
"ResultAreas": [
{
"Area": "The Area for the Result Area",
"Type": "The Type for the Result Area",
},
],
}
)


@pytest.fixture()
def mock_family_row_no_result_areas():
yield pd.Series(
{
"ProjectsID": 2,
"ApprovedRef": "FP004",
"ProjectName": "Enhancing resilience of marine ecosystems",
"Theme": "Adaptation",
"Sector": "Private",
"ProjectURL": "https://www.climateaction.fund/project/FP004",
"Summary": "The Summary of the Project",
"Countries": [
{
"CountryName": "Haiti",
"ISO3": "HTI",
"Region": "Latin America and the Caribbean",
},
],
"Entities": [
{
"Name": "Climate Action Innovations",
}
],
"Funding": [
{
"Source": "GCF",
"Budget": 82000,
"BudgetUSDeq": 82000,
},
{
"ProjectBudgetID": 412,
"Source": "Co-Financing",
"Budget": 620000,
"BudgetUSDeq": 620000,
},
],
"ResultAreas": [
{"Area": "", "Type": ""},
],
}
)


@pytest.fixture()
def mock_family_row_no_entities_no_regions():
yield pd.Series(
{
"ProjectsID": 3,
"ApprovedRef": "FP004",
"ProjectName": "Enhancing resilience of marine ecosystems",
"Theme": "Adaptation",
"Sector": "Private",
"ProjectURL": "https://www.climateaction.fund/project/FP004",
"Summary": "The Summary of the Project",
"Countries": [
{"Region": ""},
],
"Entities": [{"Name": ""}],
"Funding": [
{
"Source": "GCF",
"Budget": 82000,
"BudgetUSDeq": 82000,
},
{
"ProjectBudgetID": 412,
"Source": "Co-Financing",
"Budget": 620000,
"BudgetUSDeq": 620000,
},
],
"ResultAreas": [
{
"Area": "The Area for the Result Area",
"Type": "The Type for the Result Area",
},
],
}
)


@pytest.fixture()
def required_family_columns():
required_columns = [column.value for column in FamilyColumnsNames]
Expand Down
Loading

0 comments on commit 668f12e

Please sign in to comment.