Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PDCT-420 Implement reading the user and model defined language for a document #29

Merged
merged 8 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions app/model/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@ class DocumentReadDTO(BaseModel):
cdn_object: Optional[str]
source_url: Optional[str]
content_type: Optional[str]
user_language_name: str
# TODO: Languages for a document
# languages: list[]
user_language_name: Optional[str]
calc_language_name: Optional[str]


class DocumentWriteDTO(BaseModel):
Expand All @@ -38,9 +37,7 @@ class DocumentWriteDTO(BaseModel):
type: Optional[str]
title: str
source_url: Optional[str]
user_language_name: str
# TODO: Languages for a document
# languages: list[]
user_language_name: Optional[str]


class DocumentCreateDTO(BaseModel):
Expand All @@ -56,8 +53,6 @@ class DocumentCreateDTO(BaseModel):
title: str
source_url: str
user_language_name: str
# TODO: Languages for a document
# languages: list[]


class DocumentUploadRequest(BaseModel):
Expand Down
167 changes: 121 additions & 46 deletions app/repository/document.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
from typing import Tuple
import logging
from typing import Optional, cast
from sqlalchemy.orm import Session, Query
from sqlalchemy import Column
from app.clients.db.models.app.counters import CountedEntity
from app.clients.db.models.law_policy.family import (
DocumentStatus,
FamilyDocumentRole,
FamilyDocumentType,
Slug,
Variant,
)
from app.errors import RepositoryError, ValidationError
from app.model.document import DocumentCreateDTO, DocumentReadDTO, DocumentWriteDTO
Expand All @@ -22,55 +17,86 @@
from app.clients.db.models.law_policy import (
FamilyDocument,
)
from sqlalchemy.exc import NoResultFound

from sqlalchemy import or_, update as db_update
from sqlalchemy import Column, or_, and_, update as db_update, insert as db_insert
from sqlalchemy_utils import escape_like
from sqlalchemy.exc import NoResultFound
from sqlalchemy.orm import Session, Query, aliased
from sqlalchemy.sql.functions import concat
from sqlalchemy import func

from app.repository.helpers import generate_import_id, generate_slug
from app.repository import family as family_repo


_LOGGER = logging.getLogger(__name__)

DocumentTuple = Tuple[FamilyDocument, PhysicalDocument, Slug]
CreateObjects = Tuple[PhysicalDocumentLanguage, FamilyDocument, PhysicalDocument]


def _get_query(db: Session) -> Query:
# NOTE: SqlAlchemy will make a complete hash of the query generation
# if columns are used in the query() call. Therefore, entire
# objects are returned.
lang_model = aliased(Language)
lang_user = aliased(Language)
pdl_model = aliased(PhysicalDocumentLanguage)
pdl_user = aliased(PhysicalDocumentLanguage)

sq_slug = (
db.query(
func.string_agg(Slug.name, ",").label("name"),
Slug.family_document_import_id.label("doc_id"),
)
.group_by(Slug.family_document_import_id)
.subquery()
)

# FIXME: TODO: will this work with multiple slugs????
return (
db.query(FamilyDocument, PhysicalDocument, Slug)
db.query(
FamilyDocument.import_id.label("import_id"),
FamilyDocument.family_import_id.label("family_import_id"),
FamilyDocument.variant_name.label("variant_name"),
FamilyDocument.document_status.label("status"),
FamilyDocument.document_role.label("role"),
FamilyDocument.document_type.label("type"),
concat(sq_slug.c.name).label("slug"),
PhysicalDocument.id.label("physical_id"),
PhysicalDocument.title.label("title"),
PhysicalDocument.md5_sum.label("md5_sum"),
PhysicalDocument.cdn_object.label("cdn_object"),
PhysicalDocument.source_url.label("source_url"),
PhysicalDocument.content_type.label("content_type"),
lang_user.name.label("user_language_name"),
lang_model.name.label("calc_language_name"),
)
.select_from(FamilyDocument, PhysicalDocument)
.filter(FamilyDocument.physical_document_id == PhysicalDocument.id)
.join(sq_slug, sq_slug.c.doc_id == FamilyDocument.import_id, isouter=True)
.join(
Slug,
Slug.family_document_import_id == FamilyDocument.import_id,
pdl_user,
and_(
PhysicalDocument.id == pdl_user.document_id,
pdl_user.source == LanguageSource.USER,
),
isouter=True,
)
)


def _document_to_dto(doc_tuple: DocumentTuple) -> DocumentReadDTO:
fd, pd, slug = doc_tuple
return DocumentReadDTO(
import_id=cast(str, fd.import_id),
family_import_id=cast(str, fd.family_import_id),
variant_name=cast(Variant, fd.variant_name),
status=cast(DocumentStatus, fd.document_status),
role=cast(FamilyDocumentRole, fd.document_role),
type=cast(FamilyDocumentType, fd.document_type),
slug=cast(str, slug.name) if slug is not None else "",
physical_id=cast(int, pd.id),
title=cast(str, pd.title),
md5_sum=cast(str, pd.md5_sum),
cdn_object=cast(str, pd.cdn_object),
source_url=cast(str, pd.source_url),
content_type=cast(str, pd.content_type),
user_language_name="TODO",
.join(
lang_user,
lang_user.id == pdl_user.language_id,
isouter=True,
)
.join(
pdl_model,
and_(
PhysicalDocument.id == pdl_model.document_id,
pdl_model.source == LanguageSource.MODEL,
),
isouter=True,
)
.join(
lang_model,
lang_model.id == pdl_model.language_id,
isouter=True,
)
.distinct(FamilyDocument.import_id)
)


Expand Down Expand Up @@ -109,14 +135,12 @@ def all(db: Session) -> list[DocumentReadDTO]:
:param db Session: the database connection
:return Optional[DocumentResponse]: All of things
"""
doc_tuples = _get_query(db).all()
result = _get_query(db).all()

if not doc_tuples:
if not result:
return []

result = [_document_to_dto(d) for d in doc_tuples]

return result
return [DocumentReadDTO(**dict(r)) for r in result]


def get(db: Session, import_id: str) -> Optional[DocumentReadDTO]:
Expand All @@ -128,12 +152,12 @@ def get(db: Session, import_id: str) -> Optional[DocumentReadDTO]:
:return Optional[DocumentResponse]: A single document or nothing
"""
try:
doc_tuple = _get_query(db).filter(FamilyDocument.import_id == import_id).one()
result = _get_query(db).filter(FamilyDocument.import_id == import_id).one()
except NoResultFound as e:
_LOGGER.error(e)
return

return _document_to_dto(doc_tuple)
return DocumentReadDTO(**dict(result))


def search(db: Session, search_term: str) -> list[DocumentReadDTO]:
Expand All @@ -146,9 +170,9 @@ def search(db: Session, search_term: str) -> list[DocumentReadDTO]:
"""
term = f"%{escape_like(search_term)}%"
search = or_(PhysicalDocument.title.ilike(term))
found = _get_query(db).filter(search).all()
result = _get_query(db).filter(search).all()

return [_document_to_dto(d) for d in found]
return [DocumentReadDTO(**dict(r)) for r in result]


def update(db: Session, import_id: str, document: DocumentWriteDTO) -> bool:
Expand All @@ -160,7 +184,6 @@ def update(db: Session, import_id: str, document: DocumentWriteDTO) -> bool:
:param DocumentDTO document: The new values
:return bool: True if new values were set otherwise false.
"""
# TODO: Implement this:

new_values = document.model_dump()

Expand All @@ -186,6 +209,17 @@ def update(db: Session, import_id: str, document: DocumentWriteDTO) -> bool:
)
return False

# User Language changed?
pdl = (
db.query(PhysicalDocumentLanguage)
.filter(
PhysicalDocumentLanguage.document_id == original_fd.physical_document_id
)
.filter(PhysicalDocumentLanguage.source == LanguageSource.USER)
.one_or_none()
)
new_language = _get_new_language(db, new_values, pdl)

update_slug = original_pd.title != new_values["title"]

commands = [
Expand All @@ -204,6 +238,27 @@ def update(db: Session, import_id: str, document: DocumentWriteDTO) -> bool:
),
]

if new_language is not None:
if pdl is not None:
command = (
db_update(PhysicalDocumentLanguage)
.where(
and_(
PhysicalDocumentLanguage.document_id
== original_fd.physical_document_id,
PhysicalDocumentLanguage.source == LanguageSource.USER,
)
)
.values(language_id=new_language.id)
)
else:
command = db_insert(PhysicalDocumentLanguage).values(
document_id=original_fd.physical_document_id,
language_id=new_language.id,
source=LanguageSource.USER,
)
commands.append(command)

for c in commands:
result = db.execute(c)

Expand All @@ -222,6 +277,26 @@ def update(db: Session, import_id: str, document: DocumentWriteDTO) -> bool:
return True


def _get_new_language(
db: Session, new_values: dict, pdl: PhysicalDocumentLanguage
) -> Optional[Language]:
requested_language = new_values["user_language_name"]
if requested_language is None:
return None
else:
new_language = (
db.query(Language)
.filter(Language.name == new_values["user_language_name"])
.one()
)
update_language = (
pdl.language_id != new_language.id if pdl is not None else True
)

if update_language:
return new_language


def create(db: Session, document: DocumentCreateDTO) -> str:
"""
Creates a new document.
Expand Down Expand Up @@ -316,7 +391,7 @@ def count(db: Session) -> Optional[int]:
:return Optional[int]: The number of documents in the repository or none.
"""
try:
n_documents = _get_query(db).count()
n_documents = db.query(FamilyDocument).count()
except NoResultFound as e:
_LOGGER.error(e)
return
Expand Down
Loading
Loading