Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/pdct-1777-support-multiple-geos-in-get #268

Merged
merged 32 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
1b866a7
feat: WIP - update query to accomodate for multi geos related to a fa…
Dec 19, 2024
edc17a7
Added lazy load to prevent multiplicity
katybaulch Dec 19, 2024
09090b4
refactor: update subquery to aggregate geography values
Dec 23, 2024
c62a0cf
Merge branch 'main' into feature/pdct-1777-support-multiple-geos-in-get
odrakes-cpr Dec 23, 2024
7984c15
feat: update package in pyproject
Dec 23, 2024
972a0a4
merge main fixes
Dec 23, 2024
f94e447
fix: add back in default logging which was commented out in debugging
Dec 23, 2024
8fa05c0
test: update unit tests for new geos field
Jan 4, 2025
a55f579
chore: make the test errors verbose when running pytest
Jan 4, 2025
5b560e5
Merge branch 'main' into feature/pdct-1777-support-multiple-geos-in-get
Jan 4, 2025
ed496c2
tests: update integration tests with new geo property on families model
Jan 6, 2025
ac8abb9
refactor: rework geography filter on families search endpoint
Jan 6, 2025
039764a
test: add tests for multi geos in search endpoint
Jan 6, 2025
8ee0ed1
fix: remove breakpoint
Jan 6, 2025
21c04d0
refactor: duplicate query function and have search endpoint work as i…
Jan 6, 2025
8160730
chore: update project version
Jan 6, 2025
620581e
test: update tests to include multi geos
Jan 6, 2025
8722dc9
chore: wrong project version
Jan 6, 2025
cbd330b
test: add missing geos from multi geos
Jan 6, 2025
9db52da
Merge branch 'main' into feature/pdct-1777-support-multiple-geos-in-get
odrakes-cpr Jan 6, 2025
80ba4d4
chore: project version update?
Jan 6, 2025
d487e59
Merge branch 'feature/pdct-1777-support-multiple-geos-in-get' of gith…
Jan 6, 2025
5700452
fix: incorrect typing for geo subquery
Jan 6, 2025
e18a36a
chore: updte project version
Jan 6, 2025
889dec5
merge main
Jan 6, 2025
9715d4e
chore: rectify this trunk error which was angry at sorting
Jan 7, 2025
799d3f7
fix: sorting
Jan 7, 2025
69b079e
Fix imports
katybaulch Jan 7, 2025
a1351e8
fix: fix sorting import error from trunk fmtter
Jan 7, 2025
354276a
Merge branch 'main' into feature/pdct-1777-support-multiple-geos-in-get
katybaulch Jan 8, 2025
c60e6b2
Merge branch 'feature/pdct-1777-support-multiple-geos-in-get' of gith…
Jan 8, 2025
1965679
chore: update project version
Jan 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from contextlib import asynccontextmanager

import uvicorn
from db_client import run_migrations
from fastapi import Depends, FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi_health import health
Expand All @@ -30,6 +29,7 @@
from app.clients.db.session import engine
from app.logging_config import DEFAULT_LOGGING, setup_json_logging
from app.service.health import is_database_online
from db_client import run_migrations

_ALLOW_ORIGIN_REGEX = (
r"http://localhost:3000|"
Expand Down
2 changes: 2 additions & 0 deletions app/model/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class FamilyReadDTO(BaseModel):
title: str
summary: str
geography: str
geographies: list[str]
category: str
status: str
metadata: dict[str, list[str]]
Expand Down Expand Up @@ -40,6 +41,7 @@ class FamilyWriteDTO(BaseModel):
title: str
summary: str
geography: str
geographies: list[str]
katybaulch marked this conversation as resolved.
Show resolved Hide resolved
category: str
metadata: dict[str, list[str]]
collections: list[str]
Expand Down
106 changes: 92 additions & 14 deletions app/repository/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
from db_client.models.organisation.users import Organisation
from sqlalchemy import Column, and_
from sqlalchemy import delete as db_delete
from sqlalchemy import desc, or_
from sqlalchemy import desc, func, or_
from sqlalchemy import update as db_update
from sqlalchemy.exc import NoResultFound, OperationalError
from sqlalchemy.orm import Query, Session
from sqlalchemy.orm import Query, Session, lazyload
from sqlalchemy_utils import escape_like

from app.errors import RepositoryError
Expand All @@ -34,39 +34,96 @@

_LOGGER = logging.getLogger(__name__)

FamilyGeoMetaOrg = Tuple[Family, Geography, FamilyMetadata, Corpus, Organisation]
FamilyGeoMetaOrg = Tuple[
Family,
list[str],
FamilyMetadata,
Corpus,
Organisation,
]


def get_family_geography_subquery(db: Session) -> Query:
"""
Creates a subquery to aggregate geography values for families, accomodating
those with multiple associated geographies.

:param db Session: the database connection
:return Query: A subquery containing family import IDs and their associated geography values
"""
return (
db.query(
FamilyGeography.family_import_id,
func.array_agg(Geography.value).label("geography_values"),
)
.join(Geography, Geography.id == FamilyGeography.geography_id)
.group_by(FamilyGeography.family_import_id)
.subquery()
)


def _get_query(db: Session) -> Query:
# NOTE: SqlAlchemy will make a complete hash of query generation
# if columns are used in the query() call. Therefore, entire
# objects are returned.
return (
db.query(Family, Geography, FamilyMetadata, Corpus, Organisation)
.join(FamilyGeography, FamilyGeography.family_import_id == Family.import_id)

geography_subquery = get_family_geography_subquery(db)

query = (
db.query(
Family,
geography_subquery.c.geography_values,
FamilyMetadata,
Corpus,
Organisation,
)
.join(
Geography,
Geography.id == FamilyGeography.geography_id,
geography_subquery,
geography_subquery.c.family_import_id == Family.import_id,
)
.join(FamilyMetadata, FamilyMetadata.family_import_id == Family.import_id)
.join(FamilyCorpus, FamilyCorpus.family_import_id == Family.import_id)
.join(Corpus, Corpus.import_id == FamilyCorpus.corpus_import_id)
.join(Organisation, Corpus.organisation_id == Organisation.id)
.group_by(
Family.import_id,
Family.title,
geography_subquery.c.geography_values,
FamilyMetadata.family_import_id,
Corpus.import_id,
Organisation,
)
.options(
# Disable any default eager loading as this was causing multiplicity due to
# implicit joins in relationships on the selected models.
lazyload("*")
)
)
_LOGGER.error(query)

return query


def _family_to_dto(
db: Session, fam_geo_meta_corp_org: FamilyGeoMetaOrg
) -> FamilyReadDTO:
fam, geo_value, meta, corpus, org = fam_geo_meta_corp_org
(
fam,
geo_values,
meta,
corpus,
org,
) = fam_geo_meta_corp_org

metadata = cast(dict, meta.value)
org = cast(str, org.name)

return FamilyReadDTO(
import_id=str(fam.import_id),
title=str(fam.title),
summary=str(fam.description),
geography=str(geo_value.value),
geography=str(geo_values[0]),
geographies=[str(value) for value in geo_values],
category=str(fam.family_category),
status=str(fam.family_status),
metadata=metadata,
Expand Down Expand Up @@ -199,25 +256,27 @@ def search(
search.append(Family.description.ilike(term))

if geography is not None:
geography_filter = or_(
*[(Geography.display_value == g.title()) for g in geography]
)
search.append(geography_filter)
geographies = [geo.capitalize() for geo in geography]
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
import_ids = _get_family_import_ids_by_geographies(db, geographies)
search.append(Family.import_id.in_(import_ids))

if "status" in search_params.keys():
term = cast(str, search_params["status"])
search.append(Family.family_status == term.capitalize())

condition = and_(*search) if len(search) > 1 else search[0]

try:
query = _get_query(db).filter(condition)
if org_id is not None:
query = query.filter(Organisation.id == org_id)

found = (
query.order_by(desc(Family.last_modified))
.limit(search_params["max_results"])
.all()
)

odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
except OperationalError as e:
if "canceling statement due to statement timeout" in str(e):
raise TimeoutError
Expand Down Expand Up @@ -527,3 +586,22 @@ def count(db: Session, org_id: Optional[int]) -> Optional[int]:
return

return n_families


def _get_family_import_ids_by_geographies(db: Session, geographies: list[str]) -> Query:
"""
Filters import IDs of families based on the provided geography values.

:param db Session: the database connection
:param list[str] geographies: A list of geography display values to filter by
:return Query: A subquery containing the filtered family import IDs
"""

return (
db.query(
FamilyGeography.family_import_id,
)
.join(Geography, Geography.id == FamilyGeography.geography_id)
.filter(Geography.display_value.in_(geographies))
.subquery()
)
2 changes: 1 addition & 1 deletion docker-compose-test.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: navigator-admin-backend-test
services:
webapp:
command: poetry run pytest --disable-warnings ${TEST}
command: poetry run pytest --disable-warnings -vv ${TEST}
build:
context: ./
dockerfile: Dockerfile.webapp
Expand Down
4 changes: 4 additions & 0 deletions tests/helpers/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def create_family_read_dto(
title: str = "title",
summary: str = "summary",
geography: str = "CHN",
geographies: list[str] = ["BRB", "BHS"],
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
category: str = FamilyCategory.LEGISLATIVE.value,
metadata: Optional[dict] = None,
slug: str = "slug",
Expand All @@ -34,6 +35,7 @@ def create_family_read_dto(
title=title,
summary=summary,
geography=geography,
geographies=geographies,
category=category,
status="status",
metadata=metadata,
Expand Down Expand Up @@ -87,6 +89,7 @@ def create_family_write_dto(
title: str = "title",
summary: str = "summary",
geography: str = "CHN",
geographies: list[str] = ["BRB", "BHS"],
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
category: str = FamilyCategory.LEGISLATIVE.value,
metadata: Optional[dict] = None,
collections: Optional[list[str]] = None,
Expand All @@ -106,6 +109,7 @@ def create_family_write_dto(
title=title,
summary=summary,
geography=geography,
geographies=geographies,
category=category,
metadata=metadata,
collections=collections,
Expand Down
2 changes: 2 additions & 0 deletions tests/integration_tests/family/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def test_search_geographies(
"title": "title",
"summary": "gregarious magazine rub",
"geography": "ALB",
"geographies": ["ALB"],
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
Expand All @@ -38,6 +39,7 @@ def test_search_geographies(
"title": "title",
"summary": "flour umbrella established",
"geography": "ZMB",
"geographies": ["ZMB"],
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
Expand Down
4 changes: 4 additions & 0 deletions tests/integration_tests/setup_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class DBEntry(TypedDict):
title: str
summary: str
geography: str
geographies: list[str]
category: str
status: str
metadata: dict
Expand All @@ -53,6 +54,7 @@ class DBEntry(TypedDict):
"title": "apple",
"summary": "",
"geography": "AFG",
"geographies": ["AFG"],
"category": "UNFCCC",
"status": "Created",
"metadata": {
Expand All @@ -79,6 +81,7 @@ class DBEntry(TypedDict):
"title": "apple orange banana",
"summary": "apple",
"geography": "ZWE",
"geographies": ["ZWE"],
"category": "UNFCCC",
"status": "Created",
"metadata": {
Expand All @@ -105,6 +108,7 @@ class DBEntry(TypedDict):
"title": "title",
"summary": "orange peas",
"geography": "AFG",
"geographies": ["AFG"],
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
Expand Down
1 change: 1 addition & 0 deletions tests/mocks/services/family_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def mock_update_family(
data.title,
data.summary,
data.geography,
data.geographies,
data.category,
data.metadata,
"slug",
Expand Down
Loading