-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature/pdct 1533 documents should only show documents in allowed cor…
…pora (#415) * Make documents router dependent on app token * Driveby: Add CORS tests for MCF * Update slug lookup query to respect allowed corpora * Include actual CCLW corpus ID in test token * Bump to 1.19.11 * Refactor _get_query_template * Refactor doc and fam lookup tests * Add integration tests for doc/fam lookup when corpora mismatch * Add alternative corpora token * Refactor download code
- Loading branch information
1 parent
9fc8060
commit e4e9b9d
Showing
15 changed files
with
435 additions
and
279 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,43 @@ | ||
"""Functions to support browsing the RDS document structure""" | ||
|
||
import os | ||
from functools import lru_cache | ||
from logging import getLogger | ||
|
||
import pandas as pd | ||
from fastapi import Depends | ||
|
||
from app.clients.db.session import get_db | ||
from app.repository.helpers import get_query_template | ||
|
||
_LOGGER = getLogger(__name__) | ||
|
||
|
||
@lru_cache() | ||
def _get_query_template(): | ||
with open(os.path.join("app", "repository", "sql", "download.sql"), "r") as file: | ||
return file.read() | ||
def create_query( | ||
template_query, ingest_cycle_start: str, allowed_corpora_ids: list[str] | ||
) -> str: | ||
"""Create download whole database query, replacing variables. | ||
:param str ingest_cycle_start: The current ingest cycle date. | ||
:param list[str] allowed_corpora_ids: The corpora from which we | ||
should allow the data to be dumped. | ||
:return str: The SQL query to perform on the database session. | ||
""" | ||
corpora_ids = "'" + "','".join(allowed_corpora_ids) + "'" | ||
return template_query.replace( # type: ignore | ||
"{ingest_cycle_start}", ingest_cycle_start | ||
).replace( | ||
"{allowed_corpora_ids}", corpora_ids | ||
) # type: ignore | ||
|
||
|
||
def get_whole_database_dump( | ||
ingest_cycle_start: str, allowed_corpora_ids: list[str], db=Depends(get_db) | ||
): | ||
query_template = get_query_template( | ||
os.path.join("app", "repository", "sql", "download.sql") | ||
) | ||
query = create_query(query_template, ingest_cycle_start, allowed_corpora_ids) | ||
|
||
|
||
def get_whole_database_dump(query, db=Depends(get_db)): | ||
with db.connection() as conn: | ||
df = pd.read_sql(query, conn.connection) | ||
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
""" | ||
Functions to support the documents endpoints | ||
old functions (non DFC) are moved to the deprecated_documents.py file. | ||
""" | ||
|
||
from functools import lru_cache | ||
|
||
|
||
@lru_cache() | ||
def get_query_template(filepath: str) -> str: | ||
"""Read query for non-deleted docs and their associated data.""" | ||
with open(filepath, "r") as file: | ||
return file.read() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
SELECT | ||
slug.family_document_import_id, slug.family_import_id | ||
FROM slug | ||
LEFT JOIN family ON family.import_id = slug.family_import_id | ||
LEFT JOIN family_corpus ON family_corpus.family_import_id = family.import_id | ||
LEFT JOIN corpus ON corpus.import_id = family_corpus.corpus_import_id | ||
WHERE slug.name = '{slug_name}' | ||
AND corpus.import_id IN ({allowed_corpora_ids}) | ||
|
||
UNION | ||
|
||
SELECT | ||
slug.family_document_import_id, slug.family_import_id | ||
FROM slug | ||
LEFT JOIN family_document ON family_document.import_id = slug.family_document_import_id | ||
LEFT JOIN family ON family.import_id = family_document.family_import_id | ||
LEFT JOIN family_corpus ON family_corpus.family_import_id = family.import_id | ||
LEFT JOIN corpus ON corpus.import_id = family_corpus.corpus_import_id | ||
WHERE slug.name = '{slug_name}' | ||
AND corpus.import_id IN ({allowed_corpora_ids}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[tool.poetry] | ||
name = "navigator_backend" | ||
version = "1.19.10" | ||
version = "1.19.11" | ||
description = "" | ||
authors = ["CPR-dev-team <[email protected]>"] | ||
packages = [{ include = "app" }, { include = "tests" }] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
24 changes: 24 additions & 0 deletions
24
tests/non_search/routers/documents/setup_doc_fam_lookup.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
from typing import Optional | ||
|
||
from fastapi import status | ||
|
||
DOCUMENTS_ENDPOINT = "/api/v1/documents" | ||
TEST_HOST = "http://localhost:3000/" | ||
|
||
|
||
def _make_doc_fam_lookup_request( | ||
client, | ||
token, | ||
slug: str, | ||
expected_status_code: int = status.HTTP_200_OK, | ||
origin: Optional[str] = TEST_HOST, | ||
): | ||
headers = ( | ||
{"app-token": token} | ||
if origin is None | ||
else {"app-token": token, "origin": origin} | ||
) | ||
|
||
response = client.get(f"{DOCUMENTS_ENDPOINT}/{slug}", headers=headers) | ||
assert response.status_code == expected_status_code, response.text | ||
return response.json() |
Oops, something went wrong.