Skip to content

Commit

Permalink
feature/pdct-1777-support-multiple-geos-in-get (#268)
Browse files Browse the repository at this point in the history
* feat: WIP - update query to accomodate for multi geos related to a family

* Added lazy load to prevent multiplicity

* refactor: update subquery to aggregate geography values

- remove commented out code

* feat: update package in pyproject

* fix: add back in default logging which was commented out in debugging

* test: update unit tests for new geos field

* chore: make the test errors verbose when running pytest

* tests: update integration tests with new geo property on families model

* refactor: rework geography filter on families search endpoint

- Have had to rework this one slightly because the get_query function
  for families has been updated for multi geos, the query no longer
      returns a row with the geography table and family geo table
      joined. As a query for a family will return multiple rows if there
      are more than one geography associated - these are now aggregated
      into a list of geographies. Therefore the filtering would no
      longer work, i have flipped the script and included a subquery
      that retrieves families import ids  associated with the
      geographies and then filters the query based on those.

* test: add tests for multi geos in search endpoint

* fix: remove breakpoint

* refactor: duplicate query function and have search endpoint work as it does

this is so we can progressively make the change and not risk breaking search

* chore: update project version

* test: update tests to include multi geos

* chore: wrong project version

* test: add missing geos from multi geos

* chore: project version update?

* fix: incorrect typing for geo subquery

* chore: updte project version

* chore: rectify this trunk error which was angry at sorting

* fix: sorting

* Fix imports

* fix: fix sorting import error from trunk fmtter

* chore: update project version

---------

Co-authored-by: Osneil Drakes <[email protected]>
Co-authored-by: Katy Baulch <[email protected]>
Co-authored-by: Osneil Drakes <[email protected]>
  • Loading branch information
4 people authored Jan 8, 2025
1 parent dd3e3ea commit 3d2f936
Show file tree
Hide file tree
Showing 10 changed files with 234 additions and 11 deletions.
2 changes: 1 addition & 1 deletion app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ async def lifespan(app_: FastAPI):
dependencies=[Depends(check_user_auth)],
)

# Add CORS middleware to allow cross origin requests from any port
# Add CORS middleware to allow cross origin requests from any port.
app.add_middleware(
CORSMiddleware,
allow_origin_regex=_ALLOW_ORIGIN_REGEX,
Expand Down
2 changes: 2 additions & 0 deletions app/model/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class FamilyReadDTO(BaseModel):
title: str
summary: str
geography: str
geographies: list[str]
category: str
status: str
metadata: dict[str, list[str]]
Expand Down Expand Up @@ -40,6 +41,7 @@ class FamilyWriteDTO(BaseModel):
title: str
summary: str
geography: str
geographies: list[str]
category: str
metadata: dict[str, list[str]]
collections: list[str]
Expand Down
144 changes: 136 additions & 8 deletions app/repository/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@
from db_client.models.organisation.users import Organisation
from sqlalchemy import Column, and_
from sqlalchemy import delete as db_delete
from sqlalchemy import desc, or_
from sqlalchemy import desc, func, or_
from sqlalchemy import update as db_update
from sqlalchemy.exc import NoResultFound, OperationalError
from sqlalchemy.orm import Query, Session
from sqlalchemy.orm import Query, Session, lazyload
from sqlalchemy.sql import Subquery
from sqlalchemy_utils import escape_like

from app.errors import RepositoryError
Expand All @@ -34,13 +35,22 @@

_LOGGER = logging.getLogger(__name__)

FamilyGeoMetaOrg = Tuple[Family, Geography, FamilyMetadata, Corpus, Organisation]
FamilyGeoMetaOrg = Tuple[
Family,
list[str],
FamilyMetadata,
Corpus,
Organisation,
]


def _get_query(db: Session) -> Query:
def _get_query_search_endpoint(db: Session) -> Query:
# NOTE: SqlAlchemy will make a complete hash of query generation
# if columns are used in the query() call. Therefore, entire
# objects are returned.
# NOTE: Whilst we are updating the queries to handle multiple geographies, we are keeping
# the returned results for search as they are currently to enable backwards
# compatibility, and as to not extend beyond this pr.
return (
db.query(Family, Geography, FamilyMetadata, Corpus, Organisation)
.join(FamilyGeography, FamilyGeography.family_import_id == Family.import_id)
Expand All @@ -55,9 +65,19 @@ def _get_query(db: Session) -> Query:
)


def _family_to_dto(
db: Session, fam_geo_meta_corp_org: FamilyGeoMetaOrg
def _family_to_dto_search_endpoint(
db: Session,
fam_geo_meta_corp_org: Tuple[
Family,
Geography,
FamilyMetadata,
Corpus,
Organisation,
],
) -> FamilyReadDTO:
# NOTE: Whilst we are updating the queries to handle multiple geographies, we are keeping
# the returned results for search as they are currently to enable backwards
# compatibility, and as to not extend beyond this pr.
fam, geo_value, meta, corpus, org = fam_geo_meta_corp_org
metadata = cast(dict, meta.value)
org = cast(str, org.name)
Expand All @@ -67,6 +87,111 @@ def _family_to_dto(
title=str(fam.title),
summary=str(fam.description),
geography=str(geo_value.value),
geographies=[str(geo_value.value)],
category=str(fam.family_category),
status=str(fam.family_status),
metadata=metadata,
slug=str(fam.slugs[0].name if len(fam.slugs) > 0 else ""),
events=[str(e.import_id) for e in fam.events],
published_date=fam.published_date,
last_updated_date=fam.last_updated_date,
documents=[str(d.import_id) for d in fam.family_documents],
collections=[
c.collection_import_id
for c in db.query(CollectionFamily).filter(
fam.import_id == CollectionFamily.family_import_id
)
],
organisation=org,
corpus_import_id=cast(str, corpus.import_id),
corpus_title=cast(str, corpus.title),
corpus_type=cast(str, corpus.corpus_type_name),
created=cast(datetime, fam.created),
last_modified=cast(datetime, fam.last_modified),
)


def get_family_geography_subquery(db: Session) -> Subquery:
"""
Creates a subquery to aggregate geography values for families, accomodating
those with multiple associated geographies.
:param db Session: the database connection
:return Query: A subquery containing family import IDs and their associated geography values
"""
return (
db.query(
FamilyGeography.family_import_id,
func.array_agg(Geography.value).label("geography_values"),
)
.join(Geography, Geography.id == FamilyGeography.geography_id)
.group_by(FamilyGeography.family_import_id)
.subquery()
)


def _get_query(db: Session) -> Query:
# NOTE: SqlAlchemy will make a complete hash of query generation
# if columns are used in the query() call. Therefore, entire
# objects are returned.

geography_subquery = get_family_geography_subquery(db)

query = (
db.query(
Family,
geography_subquery.c.geography_values,
FamilyMetadata,
Corpus,
Organisation,
)
.join(
geography_subquery,
geography_subquery.c.family_import_id == Family.import_id,
)
.join(FamilyMetadata, FamilyMetadata.family_import_id == Family.import_id)
.join(FamilyCorpus, FamilyCorpus.family_import_id == Family.import_id)
.join(Corpus, Corpus.import_id == FamilyCorpus.corpus_import_id)
.join(Organisation, Corpus.organisation_id == Organisation.id)
.group_by(
Family.import_id,
Family.title,
geography_subquery.c.geography_values,
FamilyMetadata.family_import_id,
Corpus.import_id,
Organisation,
)
.options(
# Disable any default eager loading as this was causing multiplicity due to
# implicit joins in relationships on the selected models.
lazyload("*")
)
)
_LOGGER.error(query)

return query


def _family_to_dto(
db: Session, fam_geo_meta_corp_org: FamilyGeoMetaOrg
) -> FamilyReadDTO:
(
fam,
geo_values,
meta,
corpus,
org,
) = fam_geo_meta_corp_org

metadata = cast(dict, meta.value)
org = cast(str, org.name)

return FamilyReadDTO(
import_id=str(fam.import_id),
title=str(fam.title),
summary=str(fam.description),
geography=str(geo_values[0]),
geographies=[str(value) for value in geo_values],
category=str(fam.family_category),
status=str(fam.family_status),
metadata=metadata,
Expand Down Expand Up @@ -209,21 +334,24 @@ def search(
search.append(Family.family_status == term.capitalize())

condition = and_(*search) if len(search) > 1 else search[0]

try:
query = _get_query(db).filter(condition)
query = _get_query_search_endpoint(db).filter(condition)
if org_id is not None:
query = query.filter(Organisation.id == org_id)

found = (
query.order_by(desc(Family.last_modified))
.limit(search_params["max_results"])
.all()
)

except OperationalError as e:
if "canceling statement due to statement timeout" in str(e):
raise TimeoutError
raise RepositoryError(e)

return [_family_to_dto(db, f) for f in found]
return [_family_to_dto_search_endpoint(db, f) for f in found]


def update(db: Session, import_id: str, family: FamilyWriteDTO, geo_id: int) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion docker-compose-test.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: navigator-admin-backend-test
services:
webapp:
command: poetry run pytest --disable-warnings ${TEST}
command: poetry run pytest --disable-warnings -vv ${TEST}
build:
context: ./
dockerfile: Dockerfile.webapp
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "admin_backend"
version = "2.17.28"
version = "2.17.29"
description = ""
authors = ["CPR-dev-team <[email protected]>"]
packages = [{ include = "app" }, { include = "tests" }]
Expand Down
4 changes: 4 additions & 0 deletions tests/helpers/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def create_family_read_dto(
title: str = "title",
summary: str = "summary",
geography: str = "CHN",
geographies: list[str] = ["CHN", "BRB", "BHS"],
category: str = FamilyCategory.LEGISLATIVE.value,
metadata: Optional[dict] = None,
slug: str = "slug",
Expand All @@ -34,6 +35,7 @@ def create_family_read_dto(
title=title,
summary=summary,
geography=geography,
geographies=geographies,
category=category,
status="status",
metadata=metadata,
Expand Down Expand Up @@ -87,6 +89,7 @@ def create_family_write_dto(
title: str = "title",
summary: str = "summary",
geography: str = "CHN",
geographies: list[str] = ["CHN", "BRB", "BHS"],
category: str = FamilyCategory.LEGISLATIVE.value,
metadata: Optional[dict] = None,
collections: Optional[list[str]] = None,
Expand All @@ -106,6 +109,7 @@ def create_family_write_dto(
title=title,
summary=summary,
geography=geography,
geographies=geographies,
category=category,
metadata=metadata,
collections=collections,
Expand Down
67 changes: 67 additions & 0 deletions tests/integration_tests/family/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def test_search_geographies(
"title": "title",
"summary": "gregarious magazine rub",
"geography": "ALB",
"geographies": ["ALB"],
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
Expand All @@ -38,6 +39,7 @@ def test_search_geographies(
"title": "title",
"summary": "flour umbrella established",
"geography": "ZMB",
"geographies": ["ZMB"],
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
Expand Down Expand Up @@ -74,6 +76,71 @@ def test_search_geographies(
assert ids == expected_ids


def test_search_retrieves_families_with_multiple_geographies(
client: TestClient, data_db: Session, superuser_header_token
):
setup_db(data_db)
add_data(
data_db,
[
{
"import_id": "A.0.0.4",
"title": "title",
"summary": "gregarious magazine rub",
"geography": "ALB",
"geographies": ["ALB", "USA", "BRB"],
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
"organisation": "UNFCCC",
"corpus_import_id": "UNFCCC.corpus.i00000001.n0000",
"corpus_title": "UNFCCC Submissions",
"corpus_type": "Intl. agreements",
"slug": "Slug4",
"events": ["E.0.0.3"],
"published_date": "2018-12-24T04:59:33Z",
"last_updated_date": "2018-12-24T04:59:33Z",
"documents": ["D.0.0.1", "D.0.0.2"],
"collections": ["C.0.0.4"],
},
{
"import_id": "A.0.0.5",
"title": "title",
"summary": "flour umbrella established",
"geography": "AGO",
"geographies": ["AGO", "ALB"],
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
"organisation": "UNFCCC",
"corpus_import_id": "UNFCCC.corpus.i00000001.n0000",
"corpus_title": "UNFCCC Submissions",
"corpus_type": "Intl. agreements",
"slug": "Slug5",
"events": ["E.0.0.3"],
"published_date": "2018-12-24T04:59:33Z",
"last_updated_date": "2018-12-24T04:59:33Z",
"documents": ["D.0.0.1", "D.0.0.2"],
"collections": ["C.0.0.4"],
},
],
)

test_geography = {"display_name": "albania", "iso_code": "ALB"}

geographies_query = f"&geography={test_geography['display_name']}"
response = client.get(
f"/api/v1/families/?{geographies_query}",
headers=superuser_header_token,
)
assert response.status_code == status.HTTP_200_OK
data = response.json()
for item in data:
assert "geographies" in item
assert isinstance(item["geographies"], list)
assert test_geography["iso_code"] in item["geographies"]


def test_search_family_super(
client: TestClient, data_db: Session, superuser_header_token
):
Expand Down
4 changes: 4 additions & 0 deletions tests/integration_tests/setup_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class DBEntry(TypedDict):
title: str
summary: str
geography: str
geographies: list[str]
category: str
status: str
metadata: dict
Expand All @@ -53,6 +54,7 @@ class DBEntry(TypedDict):
"title": "apple",
"summary": "",
"geography": "AFG",
"geographies": ["AFG"],
"category": "UNFCCC",
"status": "Created",
"metadata": {
Expand All @@ -79,6 +81,7 @@ class DBEntry(TypedDict):
"title": "apple orange banana",
"summary": "apple",
"geography": "ZWE",
"geographies": ["ZWE"],
"category": "UNFCCC",
"status": "Created",
"metadata": {
Expand All @@ -105,6 +108,7 @@ class DBEntry(TypedDict):
"title": "title",
"summary": "orange peas",
"geography": "AFG",
"geographies": ["AFG"],
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
Expand Down
Loading

0 comments on commit 3d2f936

Please sign in to comment.