Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/pdct-1777-support-multiple-geos-in-get #268

Merged
merged 32 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
1b866a7
feat: WIP - update query to accomodate for multi geos related to a fa…
Dec 19, 2024
edc17a7
Added lazy load to prevent multiplicity
katybaulch Dec 19, 2024
09090b4
refactor: update subquery to aggregate geography values
Dec 23, 2024
c62a0cf
Merge branch 'main' into feature/pdct-1777-support-multiple-geos-in-get
odrakes-cpr Dec 23, 2024
7984c15
feat: update package in pyproject
Dec 23, 2024
972a0a4
merge main fixes
Dec 23, 2024
f94e447
fix: add back in default logging which was commented out in debugging
Dec 23, 2024
8fa05c0
test: update unit tests for new geos field
Jan 4, 2025
a55f579
chore: make the test errors verbose when running pytest
Jan 4, 2025
5b560e5
Merge branch 'main' into feature/pdct-1777-support-multiple-geos-in-get
Jan 4, 2025
ed496c2
tests: update integration tests with new geo property on families model
Jan 6, 2025
ac8abb9
refactor: rework geography filter on families search endpoint
Jan 6, 2025
039764a
test: add tests for multi geos in search endpoint
Jan 6, 2025
8ee0ed1
fix: remove breakpoint
Jan 6, 2025
21c04d0
refactor: duplicate query function and have search endpoint work as i…
Jan 6, 2025
8160730
chore: update project version
Jan 6, 2025
620581e
test: update tests to include multi geos
Jan 6, 2025
8722dc9
chore: wrong project version
Jan 6, 2025
cbd330b
test: add missing geos from multi geos
Jan 6, 2025
9db52da
Merge branch 'main' into feature/pdct-1777-support-multiple-geos-in-get
odrakes-cpr Jan 6, 2025
80ba4d4
chore: project version update?
Jan 6, 2025
d487e59
Merge branch 'feature/pdct-1777-support-multiple-geos-in-get' of gith…
Jan 6, 2025
5700452
fix: incorrect typing for geo subquery
Jan 6, 2025
e18a36a
chore: updte project version
Jan 6, 2025
889dec5
merge main
Jan 6, 2025
9715d4e
chore: rectify this trunk error which was angry at sorting
Jan 7, 2025
799d3f7
fix: sorting
Jan 7, 2025
69b079e
Fix imports
katybaulch Jan 7, 2025
a1351e8
fix: fix sorting import error from trunk fmtter
Jan 7, 2025
354276a
Merge branch 'main' into feature/pdct-1777-support-multiple-geos-in-get
katybaulch Jan 8, 2025
c60e6b2
Merge branch 'feature/pdct-1777-support-multiple-geos-in-get' of gith…
Jan 8, 2025
1965679
chore: update project version
Jan 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/model/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class FamilyReadDTO(BaseModel):
title: str
summary: str
geography: str
geographies: list[str]
category: str
status: str
metadata: dict[str, list[str]]
Expand Down Expand Up @@ -40,6 +41,7 @@ class FamilyWriteDTO(BaseModel):
title: str
summary: str
geography: str
geographies: list[str]
katybaulch marked this conversation as resolved.
Show resolved Hide resolved
category: str
metadata: dict[str, list[str]]
collections: list[str]
Expand Down
144 changes: 136 additions & 8 deletions app/repository/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@
from db_client.models.organisation.users import Organisation
from sqlalchemy import Column, and_
from sqlalchemy import delete as db_delete
from sqlalchemy import desc, or_
from sqlalchemy import desc, func, or_
from sqlalchemy import update as db_update
from sqlalchemy.exc import NoResultFound, OperationalError
from sqlalchemy.orm import Query, Session
from sqlalchemy.orm import Query, Session, lazyload
from sqlalchemy.sql import Subquery
from sqlalchemy_utils import escape_like

from app.errors import RepositoryError
Expand All @@ -34,13 +35,22 @@

_LOGGER = logging.getLogger(__name__)

FamilyGeoMetaOrg = Tuple[Family, Geography, FamilyMetadata, Corpus, Organisation]
FamilyGeoMetaOrg = Tuple[
Family,
list[str],
FamilyMetadata,
Corpus,
Organisation,
]


def _get_query(db: Session) -> Query:
def _get_query_search_endpoint(db: Session) -> Query:
# NOTE: SqlAlchemy will make a complete hash of query generation
# if columns are used in the query() call. Therefore, entire
# objects are returned.
# NOTE: Whilst we are updating the queries to handle multiple geographies, we are keeping
# the returned results for search as they are currently to enable backwards
# compatibility, and as to not extend beyond this pr.
return (
db.query(Family, Geography, FamilyMetadata, Corpus, Organisation)
.join(FamilyGeography, FamilyGeography.family_import_id == Family.import_id)
Expand All @@ -55,9 +65,19 @@ def _get_query(db: Session) -> Query:
)


def _family_to_dto(
db: Session, fam_geo_meta_corp_org: FamilyGeoMetaOrg
def _family_to_dto_search_endpoint(
db: Session,
fam_geo_meta_corp_org: Tuple[
Family,
Geography,
FamilyMetadata,
Corpus,
Organisation,
],
) -> FamilyReadDTO:
# NOTE: Whilst we are updating the queries to handle multiple geographies, we are keeping
# the returned results for search as they are currently to enable backwards
# compatibility, and as to not extend beyond this pr.
fam, geo_value, meta, corpus, org = fam_geo_meta_corp_org
metadata = cast(dict, meta.value)
org = cast(str, org.name)
Expand All @@ -67,6 +87,111 @@ def _family_to_dto(
title=str(fam.title),
summary=str(fam.description),
geography=str(geo_value.value),
geographies=[str(geo_value.value)],
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
category=str(fam.family_category),
status=str(fam.family_status),
metadata=metadata,
slug=str(fam.slugs[0].name if len(fam.slugs) > 0 else ""),
events=[str(e.import_id) for e in fam.events],
published_date=fam.published_date,
last_updated_date=fam.last_updated_date,
documents=[str(d.import_id) for d in fam.family_documents],
collections=[
c.collection_import_id
for c in db.query(CollectionFamily).filter(
fam.import_id == CollectionFamily.family_import_id
)
],
organisation=org,
corpus_import_id=cast(str, corpus.import_id),
corpus_title=cast(str, corpus.title),
corpus_type=cast(str, corpus.corpus_type_name),
created=cast(datetime, fam.created),
last_modified=cast(datetime, fam.last_modified),
)


def get_family_geography_subquery(db: Session) -> Subquery:
"""
Creates a subquery to aggregate geography values for families, accomodating
those with multiple associated geographies.

:param db Session: the database connection
:return Query: A subquery containing family import IDs and their associated geography values
"""
return (
db.query(
FamilyGeography.family_import_id,
func.array_agg(Geography.value).label("geography_values"),
)
.join(Geography, Geography.id == FamilyGeography.geography_id)
.group_by(FamilyGeography.family_import_id)
.subquery()
)


def _get_query(db: Session) -> Query:
# NOTE: SqlAlchemy will make a complete hash of query generation
# if columns are used in the query() call. Therefore, entire
# objects are returned.

geography_subquery = get_family_geography_subquery(db)

query = (
db.query(
Family,
geography_subquery.c.geography_values,
FamilyMetadata,
Corpus,
Organisation,
)
.join(
geography_subquery,
geography_subquery.c.family_import_id == Family.import_id,
)
.join(FamilyMetadata, FamilyMetadata.family_import_id == Family.import_id)
.join(FamilyCorpus, FamilyCorpus.family_import_id == Family.import_id)
.join(Corpus, Corpus.import_id == FamilyCorpus.corpus_import_id)
.join(Organisation, Corpus.organisation_id == Organisation.id)
.group_by(
Family.import_id,
Family.title,
geography_subquery.c.geography_values,
FamilyMetadata.family_import_id,
Corpus.import_id,
Organisation,
)
.options(
# Disable any default eager loading as this was causing multiplicity due to
# implicit joins in relationships on the selected models.
lazyload("*")
)
)
_LOGGER.error(query)

return query


def _family_to_dto(
db: Session, fam_geo_meta_corp_org: FamilyGeoMetaOrg
) -> FamilyReadDTO:
(
fam,
geo_values,
meta,
corpus,
org,
) = fam_geo_meta_corp_org

metadata = cast(dict, meta.value)
org = cast(str, org.name)

return FamilyReadDTO(
import_id=str(fam.import_id),
title=str(fam.title),
summary=str(fam.description),
geography=str(geo_values[0]),
geographies=[str(value) for value in geo_values],
category=str(fam.family_category),
status=str(fam.family_status),
metadata=metadata,
Expand Down Expand Up @@ -209,21 +334,24 @@ def search(
search.append(Family.family_status == term.capitalize())

condition = and_(*search) if len(search) > 1 else search[0]

try:
query = _get_query(db).filter(condition)
query = _get_query_search_endpoint(db).filter(condition)
if org_id is not None:
query = query.filter(Organisation.id == org_id)

found = (
query.order_by(desc(Family.last_modified))
.limit(search_params["max_results"])
.all()
)

odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
except OperationalError as e:
if "canceling statement due to statement timeout" in str(e):
raise TimeoutError
raise RepositoryError(e)

return [_family_to_dto(db, f) for f in found]
return [_family_to_dto_search_endpoint(db, f) for f in found]


def update(db: Session, import_id: str, family: FamilyWriteDTO, geo_id: int) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion docker-compose-test.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: navigator-admin-backend-test
services:
webapp:
command: poetry run pytest --disable-warnings ${TEST}
command: poetry run pytest --disable-warnings -vv ${TEST}
build:
context: ./
dockerfile: Dockerfile.webapp
Expand Down
4 changes: 4 additions & 0 deletions tests/helpers/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def create_family_read_dto(
title: str = "title",
summary: str = "summary",
geography: str = "CHN",
geographies: list[str] = ["CHN", "BRB", "BHS"],
category: str = FamilyCategory.LEGISLATIVE.value,
metadata: Optional[dict] = None,
slug: str = "slug",
Expand All @@ -34,6 +35,7 @@ def create_family_read_dto(
title=title,
summary=summary,
geography=geography,
geographies=geographies,
category=category,
status="status",
metadata=metadata,
Expand Down Expand Up @@ -87,6 +89,7 @@ def create_family_write_dto(
title: str = "title",
summary: str = "summary",
geography: str = "CHN",
geographies: list[str] = ["CHN", "BRB", "BHS"],
category: str = FamilyCategory.LEGISLATIVE.value,
metadata: Optional[dict] = None,
collections: Optional[list[str]] = None,
Expand All @@ -106,6 +109,7 @@ def create_family_write_dto(
title=title,
summary=summary,
geography=geography,
geographies=geographies,
category=category,
metadata=metadata,
collections=collections,
Expand Down
67 changes: 67 additions & 0 deletions tests/integration_tests/family/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def test_search_geographies(
"title": "title",
"summary": "gregarious magazine rub",
"geography": "ALB",
"geographies": ["ALB"],
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
Expand All @@ -38,6 +39,7 @@ def test_search_geographies(
"title": "title",
"summary": "flour umbrella established",
"geography": "ZMB",
"geographies": ["ZMB"],
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
Expand Down Expand Up @@ -74,6 +76,71 @@ def test_search_geographies(
assert ids == expected_ids


def test_search_retrieves_families_with_multiple_geographies(
client: TestClient, data_db: Session, superuser_header_token
):
setup_db(data_db)
add_data(
data_db,
[
{
"import_id": "A.0.0.4",
"title": "title",
"summary": "gregarious magazine rub",
"geography": "ALB",
"geographies": ["ALB", "USA", "BRB"],
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
"organisation": "UNFCCC",
"corpus_import_id": "UNFCCC.corpus.i00000001.n0000",
"corpus_title": "UNFCCC Submissions",
"corpus_type": "Intl. agreements",
"slug": "Slug4",
"events": ["E.0.0.3"],
"published_date": "2018-12-24T04:59:33Z",
"last_updated_date": "2018-12-24T04:59:33Z",
"documents": ["D.0.0.1", "D.0.0.2"],
"collections": ["C.0.0.4"],
},
{
"import_id": "A.0.0.5",
"title": "title",
"summary": "flour umbrella established",
"geography": "AGO",
"geographies": ["AGO", "ALB"],
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
"organisation": "UNFCCC",
"corpus_import_id": "UNFCCC.corpus.i00000001.n0000",
"corpus_title": "UNFCCC Submissions",
"corpus_type": "Intl. agreements",
"slug": "Slug5",
"events": ["E.0.0.3"],
"published_date": "2018-12-24T04:59:33Z",
"last_updated_date": "2018-12-24T04:59:33Z",
"documents": ["D.0.0.1", "D.0.0.2"],
"collections": ["C.0.0.4"],
},
],
)

test_geography = {"display_name": "albania", "iso_code": "ALB"}

geographies_query = f"&geography={test_geography['display_name']}"
response = client.get(
f"/api/v1/families/?{geographies_query}",
headers=superuser_header_token,
)
assert response.status_code == status.HTTP_200_OK
data = response.json()
for item in data:
assert "geographies" in item
assert isinstance(item["geographies"], list)
assert test_geography["iso_code"] in item["geographies"]


def test_search_family_super(
client: TestClient, data_db: Session, superuser_header_token
):
Expand Down
4 changes: 4 additions & 0 deletions tests/integration_tests/setup_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class DBEntry(TypedDict):
title: str
summary: str
geography: str
geographies: list[str]
category: str
status: str
metadata: dict
Expand All @@ -53,6 +54,7 @@ class DBEntry(TypedDict):
"title": "apple",
"summary": "",
"geography": "AFG",
"geographies": ["AFG"],
"category": "UNFCCC",
"status": "Created",
"metadata": {
Expand All @@ -79,6 +81,7 @@ class DBEntry(TypedDict):
"title": "apple orange banana",
"summary": "apple",
"geography": "ZWE",
"geographies": ["ZWE"],
"category": "UNFCCC",
"status": "Created",
"metadata": {
Expand All @@ -105,6 +108,7 @@ class DBEntry(TypedDict):
"title": "title",
"summary": "orange peas",
"geography": "AFG",
"geographies": ["AFG"],
odrakes-cpr marked this conversation as resolved.
Show resolved Hide resolved
"category": "UNFCCC",
"status": "Created",
"metadata": {"author": ["CPR"], "author_type": ["Party"]},
Expand Down
1 change: 1 addition & 0 deletions tests/mocks/services/family_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def mock_update_family(
data.title,
data.summary,
data.geography,
data.geographies,
data.category,
data.metadata,
"slug",
Expand Down
Loading
Loading