From be9bd8d0455113639ee80ba551031b463d9ae2a5 Mon Sep 17 00:00:00 2001 From: Katy Baulch <46493669+katybaulch@users.noreply.github.com> Date: Mon, 23 Sep 2024 10:56:28 +0100 Subject: [PATCH] Feature/pdct 1274 Fix logic for validating corpora IDs (#348) * Rename function from decode_configuration_token * Remove space between hyphen and command * Raise custom HTTP exception on decode error & reuse config vars * Remove commented out code * Apply custom app config token to search query * Bump CPR SDK to 1.4.3 * Don't mock vespa search adapter * Revert "Don't mock vespa search adapter" This reverts commit 358d1c28026567078930c31b79cc5e30a64f1c37. * Raise HTTPException in router not service layer * Bump CPR SDK to 1.4.4 * Wrap decode in try except block * Make all vespa search tests use valid_token * Reduce duplication * Use fastAPI status for contextualising HTTP return code * Move corpus verification to search router * Move metadata vespa tests into separate file * Raise 400 error if corpus validation fails * Make sure ruff version is compatible with VScode ext * Consolidate steps as make start includes vespa_setup * Use different secret key for custom app token * Make audience localhost:3000 for test token * Fix validate_corpora_ids * Log actual JWT error to console * Remove validation of corpora from decode tests * Bump to 1.16.1 * Use fastapi status to contextualise http error code * Add tests for vespa search returning 400 when config token invalid * Make sure audience has trailing slash * Rename from SECRET_KEY to differentiate between keys * Bump to 1.16.0 * Fix decode token unit tests * Calculate valid config token for tests on the fly * Update debug message * Validate corpora IDs against db by default * Bump to 1.16.1 * Bump to 1.16.3 * Remove debug --- app/api/api_v1/routers/search.py | 2 +- app/db/crud/helpers/validate.py | 26 ++++++++++++------- pyproject.toml | 2 +- .../test_search_raises_on_token_errors.py | 5 +--- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/app/api/api_v1/routers/search.py b/app/api/api_v1/routers/search.py index 87d5a28d..c621f006 100644 --- a/app/api/api_v1/routers/search.py +++ b/app/api/api_v1/routers/search.py @@ -157,7 +157,7 @@ def search_documents( ) if not validate_corpora_ids(db, allowed_corpora_ids): - msg = "One or more of the given corpora do not exist in the database." + msg = "Error validating corpora IDs." raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=msg, diff --git a/app/db/crud/helpers/validate.py b/app/db/crud/helpers/validate.py index 05a158c9..b017bf6b 100644 --- a/app/db/crud/helpers/validate.py +++ b/app/db/crud/helpers/validate.py @@ -1,4 +1,5 @@ import logging +from typing import Optional, cast from db_client.models.dfce.family import Corpus from sqlalchemy import distinct, select @@ -7,16 +8,23 @@ _LOGGER = logging.getLogger(__name__) -def validate_corpora_ids(db: Session, allowed_corpora_ids: list[str]) -> bool: - """Validate whether all given corpus IDs exist in the DB. +def validate_corpora_ids( + db: Session, corpora_ids: list[str], allowed_corpora_ids: Optional[list[str]] = None +) -> bool: + """Validate all given corpus IDs against a list of allowed corpora. :param Session db: The DB session to connect to. - :param list[str] allowed_corpora_ids: The corpus import IDs we want - to validate. - :return bool: Return whether or not all the corpora exist in the DB. + :param list[str] corpora_ids: The corpus import IDs we want to + validate. + :param Optional[list[str]] allowed_corpora_ids: The corpus import + IDs we want to validate against. + :return bool: Return whether or not all the corpora are valid. """ - existing_corpora_in_db = db.scalars(select(distinct(Corpus.import_id))).all() - validate_success = all( - corpus in allowed_corpora_ids for corpus in existing_corpora_in_db - ) + if allowed_corpora_ids is None: + allowed_corpora_ids = cast( + list, db.scalars(select(distinct(Corpus.import_id))).all() + ) + _LOGGER.info(allowed_corpora_ids) # TODO remove in part 2. + + validate_success = all(corpus in allowed_corpora_ids for corpus in corpora_ids) return validate_success diff --git a/pyproject.toml b/pyproject.toml index a3ffe3c9..cc74cc11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "navigator_backend" -version = "1.16.2" +version = "1.16.3" description = "" authors = ["CPR-dev-team "] packages = [{ include = "app" }, { include = "tests" }] diff --git a/tests/search/vespa/test_search_raises_on_token_errors.py b/tests/search/vespa/test_search_raises_on_token_errors.py index 7bee883d..6c0073d8 100644 --- a/tests/search/vespa/test_search_raises_on_token_errors.py +++ b/tests/search/vespa/test_search_raises_on_token_errors.py @@ -33,10 +33,7 @@ def test_search_with_invalid_corpus_id_in_token( expected_status_code=status.HTTP_400_BAD_REQUEST, ) - assert ( - response["detail"] - == "One or more of the given corpora do not exist in the database." - ) + assert response["detail"] == "Error validating corpora IDs." @pytest.mark.search