From ef937bc38081f2d8b7193a7e58cd13daf2d7f733 Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Mon, 3 Jun 2024 08:57:46 +0200 Subject: [PATCH 01/44] Resolve merge conflict --- src/onegov/agency/views/search.py | 13 +- src/onegov/fsi/views/search.py | 21 +++- src/onegov/landsgemeinde/views/search.py | 14 ++- src/onegov/onboarding/app.py | 4 +- src/onegov/org/app.py | 4 +- src/onegov/org/layout.py | 7 ++ src/onegov/org/models/__init__.py | 3 +- src/onegov/org/models/search.py | 154 +++++++++++++++++++++++ src/onegov/org/path.py | 11 ++ src/onegov/org/views/search.py | 47 +++++++ src/onegov/search/__init__.py | 4 +- src/onegov/search/integration.py | 13 +- src/onegov/town6/views/search.py | 14 ++- tests/onegov/search/conftest.py | 4 +- tests/onegov/search/test_integration.py | 22 ++-- 15 files changed, 299 insertions(+), 36 deletions(-) diff --git a/src/onegov/agency/views/search.py b/src/onegov/agency/views/search.py index dfdf8a9a61..39c98bd8a4 100644 --- a/src/onegov/agency/views/search.py +++ b/src/onegov/agency/views/search.py @@ -1,9 +1,8 @@ from onegov.agency import AgencyApp from onegov.agency.layout import AgencySearchLayout from onegov.core.security import Public -from onegov.org.models import Search -from onegov.org.views.search import search as search_view - +from onegov.org.models import Search, SearchPostgres +from onegov.org.views.search import search as search_view, search_postgres from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -23,3 +22,11 @@ def search( if isinstance(data, dict): data['layout'] = AgencySearchLayout(self, request) return data + + +@AgencyApp.html(model=SearchPostgres, template='search.pt', permission=Public) +def agency_search_postgres(self, request): + data = search_postgres(self, request) + if isinstance(data, dict): + data['layout'] = AgencySearchLayout(self, request) + return data diff --git a/src/onegov/fsi/views/search.py b/src/onegov/fsi/views/search.py index c8e6cd549a..1405aa99b6 100644 --- a/src/onegov/fsi/views/search.py +++ b/src/onegov/fsi/views/search.py @@ -1,8 +1,11 @@ from onegov.core.security import Personal from onegov.fsi import FsiApp -from onegov.org.models import Search +from onegov.org.models import Search, SearchPostgres from onegov.org.views.search import search as search_view +from onegov.org.views.search import search_postgres as search_postgres_view from onegov.org.views.search import suggestions as suggestions_view +from onegov.org.views.search import (suggestions_postgres as + suggestions_postgres_view) from typing import TYPE_CHECKING @@ -21,9 +24,25 @@ def search( return search_view(self, request) +@FsiApp.html(model=SearchPostgres, template='search.pt', permission=Personal) +def search( + self: Search['Base'], + request: 'FsiRequest' +) -> 'RenderData | Response': + return search_postgres_view(self, request) + + @FsiApp.json(model=Search, name='suggest', permission=Personal) def suggestions( self: Search['Base'], request: 'FsiRequest' ) -> 'JSON_ro': return suggestions_view(self, request) + + +@FsiApp.json(model=SearchPostgres, name='suggest', permission=Personal) +def suggestions( + self: Search['Base'], + request: 'FsiRequest' +) -> 'JSON_ro': + return suggestions_postgres_view(self, request) diff --git a/src/onegov/landsgemeinde/views/search.py b/src/onegov/landsgemeinde/views/search.py index ded65c259f..c7d8b9c908 100644 --- a/src/onegov/landsgemeinde/views/search.py +++ b/src/onegov/landsgemeinde/views/search.py @@ -1,9 +1,8 @@ from onegov.core.security import Public from onegov.landsgemeinde import LandsgemeindeApp from onegov.landsgemeinde.layouts import DefaultLayout -from onegov.org.models import Search -from onegov.org.views.search import search - +from onegov.org.models import Search, SearchPostgres +from onegov.org.views.search import search, search_postgres from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -19,3 +18,12 @@ def landsgemeinde_search( request: 'LandsgemeindeRequest' ) -> 'RenderData | Response': return search(self, request, DefaultLayout(self, request)) + + +@LandsgemeindeApp.html(model=SearchPostgres, template='search.pt', + permission=Public) +def landsgemeinde_search( + self: Search['Base'], + request: 'LandsgemeindeRequest' +) -> 'RenderData | Response': + return search_postgres(self, request, DefaultLayout(self, request)) diff --git a/src/onegov/onboarding/app.py b/src/onegov/onboarding/app.py index a460033181..ab5864d9b4 100644 --- a/src/onegov/onboarding/app.py +++ b/src/onegov/onboarding/app.py @@ -2,7 +2,7 @@ from onegov.file import DepotApp from onegov.onboarding.theme import OnboardingTheme from onegov.reservation import LibresIntegration -from onegov.search import ElasticsearchApp +from onegov.search import SearchApp from typing import Any, TYPE_CHECKING @@ -10,7 +10,7 @@ from collections.abc import Iterator -class OnboardingApp(Framework, LibresIntegration, DepotApp, ElasticsearchApp): +class OnboardingApp(Framework, LibresIntegration, DepotApp, SearchApp): serve_static_files = True diff --git a/src/onegov/org/app.py b/src/onegov/org/app.py index 47500e8845..0aff8a2359 100644 --- a/src/onegov/org/app.py +++ b/src/onegov/org/app.py @@ -32,7 +32,7 @@ from onegov.page import Page, PageCollection from onegov.pay import PayApp from onegov.reservation import LibresIntegration -from onegov.search import ElasticsearchApp +from onegov.search import SearchApp from onegov.ticket import TicketCollection from onegov.ticket import TicketPermission from onegov.user import UserApp @@ -59,7 +59,7 @@ from webob import Response -class OrgApp(Framework, LibresIntegration, ElasticsearchApp, MapboxApp, +class OrgApp(Framework, LibresIntegration, SearchApp, MapboxApp, DepotApp, PayApp, FormApp, UserApp, WebsocketsApp): serve_static_files = True diff --git a/src/onegov/org/layout.py b/src/onegov/org/layout.py index 3bc551cdbf..d4d089d240 100644 --- a/src/onegov/org/layout.py +++ b/src/onegov/org/layout.py @@ -42,6 +42,7 @@ from onegov.org.models.extensions import PersonLinkExtension from onegov.org.models.external_link import ExternalLinkCollection from onegov.org.models.form import submission_deletable +from onegov.org.models.search import SearchPostgres from onegov.org.open_graph import OpenGraphMixin from onegov.org.theme.org_theme import user_options from onegov.org.utils import IMG_URLS @@ -334,11 +335,17 @@ def homepage_url(self) -> str: @cached_property def search_url(self) -> str: """ Returns the url to the search page. """ + # Allows using postgres search while es search remains default + if 'search_postgres' in self.request.path_info: + return self.request.class_link(SearchPostgres) return self.request.class_link(Search) @cached_property def suggestions_url(self) -> str: """ Returns the url to the suggestions json view. """ + # Allows using postgres search while es search remains default + if 'search_postgres' in self.request.path_info: + return self.request.class_link(SearchPostgres, name='suggest') return self.request.class_link(Search, name='suggest') @cached_property diff --git a/src/onegov/org/models/__init__.py b/src/onegov/org/models/__init__.py index 6f123c9222..d2f6dd2f6b 100644 --- a/src/onegov/org/models/__init__.py +++ b/src/onegov/org/models/__init__.py @@ -46,7 +46,7 @@ from onegov.org.models.recipient import ResourceRecipient from onegov.org.models.recipient import ResourceRecipientCollection from onegov.org.models.resource import DaypassResource -from onegov.org.models.search import Search +from onegov.org.models.search import Search, SearchPostgres from onegov.org.models.sitecollection import SiteCollection from onegov.org.models.swiss_holidays import SwissHolidays from onegov.org.models.tan import TAN @@ -103,6 +103,7 @@ 'ResourceRecipient', 'ResourceRecipientCollection', 'Search', + 'SearchPostgres', 'SiteCollection', 'SubmissionMessage', 'SwissHolidays', diff --git a/src/onegov/org/models/search.py b/src/onegov/org/models/search.py index 32da2574fb..68129e2398 100644 --- a/src/onegov/org/models/search.py +++ b/src/onegov/org/models/search.py @@ -1,14 +1,23 @@ +from operator import attrgetter + from elasticsearch_dsl.function import SF from elasticsearch_dsl.query import FunctionScore from elasticsearch_dsl.query import Match from elasticsearch_dsl.query import MatchPhrase from elasticsearch_dsl.query import MultiMatch from functools import cached_property + +from sqlalchemy import func + from onegov.core.collection import Pagination, _M +from onegov.core.orm import Base from onegov.event.models import Event from typing import TYPE_CHECKING + +from onegov.search.utils import searchable_sqlalchemy_models + if TYPE_CHECKING: from onegov.org.request import OrgRequest from onegov.search.dsl import Hit, Response, Search as ESSearch @@ -161,3 +170,148 @@ def suggestions(self) -> tuple[str, ...]: return tuple(self.request.app.es_suggestions_by_request( self.request, self.query )) + + +def locale_mapping(locale): + mapping = {'de_CH': 'german', 'fr_CH': 'french', 'it_CH': 'italian', + 'rm_CH': 'english'} + return mapping.get(locale, 'english') + + +class SearchPostgres(Pagination): + """ + Implements searching in postgres db based on the gin index + """ + results_per_page = 10 + max_query_length = 100 + + def __init__(self, request, query, page): + self.request = request + self.query = query + self.page = page # page index + + self.nbr_of_docs = 0 + self.nbr_of_results = 0 + + @cached_property + def available_documents(self): + if not self.nbr_of_docs: + self.load_batch_results + return self.nbr_of_docs + + @cached_property + def available_results(self): + if not self.nbr_of_results: + self.load_batch_results + return self.nbr_of_results + + @property + def q(self): + return self.query + + def __eq__(self, other): + return self.page == other.page and self.query == other.query + + def subset(self): + return self.batch + + @property + def page_index(self): + return self.page + + def page_by_index(self, index): + return SearchPostgres(self.request, self.query, index) + + @cached_property + def batch(self): + if not self.query: + return None + + if self.query.startswith('#'): + results = self.hashtag_search() + else: + results = self.generic_search() + + return results[self.offset:self.offset + self.batch_size] + + @cached_property + def load_batch_results(self): + """Load search results and sort events by latest occurrence. + This methods is a wrapper around `batch.load()`, which returns the + actual search results form the query. """ + + batch = self.batch + events = [] + non_events = [] + for search_result in batch: + if isinstance(search_result, Event): + events.append(search_result) + else: + non_events.append(search_result) + if not events: + return batch + sorted_events = sorted(events, key=lambda e: e.latest_occurrence.start) + return sorted_events + non_events + + def generic_search(self): + doc_count = 0 + results = [] + + language = locale_mapping(self.request.locale) + for model in searchable_sqlalchemy_models(Base): + if model.es_public or self.request.is_logged_in: + query = self.request.session.query(model) + doc_count += query.count() + query = query.filter( + model.fts_idx.op('@@')(func.websearch_to_tsquery( + language, self.query)) + ) + query = query.order_by(func.ts_rank_cd( + model.fts_idx, func.websearch_to_tsquery(language, + self.query))) + results.extend(query.all()) + + self.nbr_of_docs = doc_count + self.nbr_of_results = len(results) + results.sort(key=attrgetter('search_score'), reverse=False) + return results + + def hashtag_search(self): + q = self.query.lstrip('#') + results = [] + + for model in searchable_sqlalchemy_models(Base): + # skip certain tables for hashtag search for better performance + if model.__tablename__ not in ['attendees', 'files', 'people', + 'tickets', 'users']: + if model.es_public or self.request.is_logged_in: + for doc in self.request.session.query(model).all(): + if doc.es_tags and q in doc.es_tags: + results.append(doc) + + self.nbr_of_results = len(results) + results.sort(key=attrgetter('search_score'), reverse=False) + return results + + def feeling_lucky(self): + if self.batch: + first_entry = self.batch[0].load() + + # XXX the default view to the event should be doing the redirect + if first_entry.__tablename__ == 'events': + return self.request.link(first_entry, 'latest') + else: + return self.request.link(first_entry) + + @cached_property + def subset_count(self): + return self.available_results + + def suggestions(self): + suggestions = list() + + for element in self.generic_search(): + suggest = getattr(element, 'es_suggestion', []) + suggestions.append(suggest) + + return tuple(suggestions[:15]) diff --git a/src/onegov/org/path.py b/src/onegov/org/path.py index e1ac9428b3..0edfebb52b 100644 --- a/src/onegov/org/path.py +++ b/src/onegov/org/path.py @@ -67,6 +67,7 @@ from onegov.org.models.external_link import ( ExternalLinkCollection, ExternalLink) from onegov.org.models.resource import FindYourSpotCollection +from onegov.org.models.search import SearchPostgres from onegov.page import PageCollection from onegov.pay import PaymentProvider, Payment, PaymentCollection from onegov.pay import PaymentProviderCollection @@ -690,6 +691,16 @@ def get_search( return Search(request, q, page) +@OrgApp.path(model=SearchPostgres, path='/search_postgres', + converters={'page': int}) +def get_search_postgres( + request: 'OrgRequest', + q: str = '', + page: int = 0 +) -> Search[Any]: + return SearchPostgres(request, q, page) + + @OrgApp.path(model=AtoZPages, path='/a-z') def get_a_to_z(request: 'OrgRequest') -> AtoZPages: return AtoZPages(request) diff --git a/src/onegov/org/views/search.py b/src/onegov/org/views/search.py index 7b714ed902..617ac899e3 100644 --- a/src/onegov/org/views/search.py +++ b/src/onegov/org/views/search.py @@ -5,6 +5,7 @@ from onegov.org.elements import Link from onegov.org.layout import DefaultLayout from onegov.org.models import Search +from onegov.org.models.search import SearchPostgres from onegov.search import SearchOfflineError from webob import exc @@ -59,9 +60,55 @@ def search( } +@OrgApp.html(model=SearchPostgres, template='search', permission=Public) +def search_postgres(self, request, layout=None): + layout = layout or DefaultLayout(self, request) + layout.breadcrumbs.append(Link(_("Search"), '#')) + + try: + searchlabel = _("Search through ${count} indexed documents", mapping={ + 'count': self.available_documents + }) + resultslabel = _("${count} Results", mapping={ + 'count': self.available_results + }) + except SearchOfflineError: + return { + 'title': _("Search Unavailable"), + 'layout': layout, + 'connection': False + } + + if 'lucky' in request.GET: + url = self.feeling_lucky() + + if url: + return morepath.redirect(url) + + return { + # TODO switch back to 'Search' once es is gone + 'title': _("Org Search Postgres"), + 'model': self, + 'layout': layout, + 'hide_search_header': True, + 'searchlabel': searchlabel, + 'resultslabel': resultslabel, + 'connection': True + } + + @OrgApp.json(model=Search, name='suggest', permission=Public) def suggestions(self: Search['Base'], request: 'OrgRequest') -> 'JSON_ro': try: return self.suggestions() except SearchOfflineError as exception: raise exc.HTTPNotFound() from exception + + +@OrgApp.json(model=SearchPostgres, name='suggest', permission=Public) +def suggestions_postgres(self: Search['Base'], request: 'OrgRequest') \ + -> 'JSON_ro': + try: + return self.suggestions() + except SearchOfflineError as exception: + raise exc.HTTPNotFound() from exception diff --git a/src/onegov/search/__init__.py b/src/onegov/search/__init__.py index d2d5df7288..b1da536e8a 100644 --- a/src/onegov/search/__init__.py +++ b/src/onegov/search/__init__.py @@ -7,11 +7,11 @@ from onegov.search.mixins import Searchable, ORMSearchable, SearchableContent from onegov.search.dsl import Search -from onegov.search.integration import ElasticsearchApp +from onegov.search.integration import SearchApp from onegov.search.errors import SearchOfflineError __all__ = [ - 'ElasticsearchApp', + 'SearchApp', 'ORMSearchable', 'Search', 'Searchable', diff --git a/src/onegov/search/integration.py b/src/onegov/search/integration.py index 0a9c3f31da..4ddb72aba0 100644 --- a/src/onegov/search/integration.py +++ b/src/onegov/search/integration.py @@ -108,9 +108,8 @@ def is_5xx_error(error: TransportError) -> bool: return False -# TODO rename to SearchApp -class ElasticsearchApp(morepath.App): - """ Provides elasticsearch integration for +class SearchApp(morepath.App): + """ Provides elasticsearch and postgres integration for :class:`onegov.core.framework.Framework` based applications. The application must be connected to a database. @@ -177,6 +176,8 @@ def configure_search(self, **cfg: Any) -> None: - fr """ + # TODO: set default to False once fully switched to psql (or remove + # es stuff entirely) if not cfg.get('enable_elasticsearch', True): self.es_client = None return @@ -483,14 +484,14 @@ def reindex_model(model: type['Base']) -> None: self.psql_indexer.bulk_process() -@ElasticsearchApp.tween_factory(over=transaction_tween_factory) +@SearchApp.tween_factory(over=transaction_tween_factory) def process_indexer_tween_factory( - app: ElasticsearchApp, + app: SearchApp, handler: 'Callable[[CoreRequest], Response]' ) -> 'Callable[[CoreRequest], Response]': def process_indexer_tween(request: 'CoreRequest') -> 'Response': - app: ElasticsearchApp = request.app # type:ignore[assignment] + app: SearchApp = request.app # type:ignore[assignment] if not app.es_client: return handler(request) diff --git a/src/onegov/town6/views/search.py b/src/onegov/town6/views/search.py index db31ec7a48..0914d8e0a8 100644 --- a/src/onegov/town6/views/search.py +++ b/src/onegov/town6/views/search.py @@ -1,10 +1,10 @@ from onegov.core.security import Public -from onegov.org.views.search import search -from onegov.town6 import TownApp from onegov.org.models import Search +from onegov.org.models.search import SearchPostgres +from onegov.org.views.search import search, search_postgres +from onegov.town6 import TownApp from onegov.town6.layout import DefaultLayout - from typing import TYPE_CHECKING if TYPE_CHECKING: from onegov.core.orm import Base @@ -19,3 +19,11 @@ def town_search( request: 'TownRequest' ) -> 'RenderData | Response': return search(self, request, DefaultLayout(self, request)) + + +@TownApp.html(model=SearchPostgres, template='search.pt', permission=Public) +def town_search_postgres( + self: Search['Base'], + request: 'TownRequest' +) -> 'RenderData | Response': + return search_postgres(self, request, DefaultLayout(self, request)) diff --git a/tests/onegov/search/conftest.py b/tests/onegov/search/conftest.py index da452eb3f7..f970b20325 100644 --- a/tests/onegov/search/conftest.py +++ b/tests/onegov/search/conftest.py @@ -3,7 +3,7 @@ from yaml import dump from onegov.core import Framework -from onegov.search import ElasticsearchApp +from onegov.search import SearchApp from tests.shared.utils import create_app @@ -46,7 +46,7 @@ def cfg_path(postgres_dsn, session_manager, temporary_directory, redis_url): return cfg_path -class TestApp(Framework, ElasticsearchApp): +class TestApp(Framework, SearchApp): pass diff --git a/tests/onegov/search/test_integration.py b/tests/onegov/search/test_integration.py index a93e13f60d..581818c5e9 100644 --- a/tests/onegov/search/test_integration.py +++ b/tests/onegov/search/test_integration.py @@ -8,7 +8,7 @@ from onegov.core import Framework from onegov.core.orm.mixins import TimestampMixin from onegov.core.utils import scan_morepath_modules -from onegov.search import ElasticsearchApp, ORMSearchable +from onegov.search import SearchApp, ORMSearchable from sqlalchemy import Boolean, Column, Integer, Text from sqlalchemy.ext.declarative import declarative_base from webtest import TestApp as Client @@ -17,7 +17,7 @@ def test_app_integration(es_url): - class App(Framework, ElasticsearchApp): + class App(Framework, SearchApp): pass app = App() @@ -33,7 +33,7 @@ class App(Framework, ElasticsearchApp): def test_search_query(es_url, postgres_dsn): - class App(Framework, ElasticsearchApp): + class App(Framework, SearchApp): pass Base = declarative_base() @@ -153,7 +153,7 @@ def es_language(self): def test_orm_integration(es_url, postgres_dsn, redis_url): - class App(Framework, ElasticsearchApp): + class App(Framework, SearchApp): pass Base = declarative_base() @@ -298,7 +298,7 @@ def view_delete_document(self, request): def test_alternate_id_property(es_url, postgres_dsn): - class App(Framework, ElasticsearchApp): + class App(Framework, SearchApp): pass Base = declarative_base() @@ -362,7 +362,7 @@ def es_suggestion(self): def test_orm_polymorphic(es_url, postgres_dsn): - class App(Framework, ElasticsearchApp): + class App(Framework, SearchApp): pass Base = declarative_base() @@ -450,7 +450,7 @@ def update(): def test_orm_polymorphic_sublcass_only(es_url, postgres_dsn): - class App(Framework, ElasticsearchApp): + class App(Framework, SearchApp): pass Base = declarative_base() @@ -508,7 +508,7 @@ def es_suggestion(self): def test_suggestions(es_url, postgres_dsn): - class App(Framework, ElasticsearchApp): + class App(Framework, SearchApp): pass Base = declarative_base() @@ -629,7 +629,7 @@ def es_suggestion(self): def test_language_detection(es_url, postgres_dsn): - class App(Framework, ElasticsearchApp): + class App(Framework, SearchApp): pass Base = declarative_base() @@ -685,7 +685,7 @@ class Document(Base, ORMSearchable): def test_language_update(es_url, postgres_dsn): - class App(Framework, ElasticsearchApp): + class App(Framework, SearchApp): pass Base = declarative_base() @@ -741,7 +741,7 @@ class Document(Base, ORMSearchable): def test_date_decay(es_url, postgres_dsn): - class App(Framework, ElasticsearchApp): + class App(Framework, SearchApp): pass Base = declarative_base() From 2fdd50e4cbe34f643513a83c98924c710042dc06 Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Tue, 4 Jun 2024 11:01:21 +0200 Subject: [PATCH 02/44] Adds view and templates for search views --- src/onegov/org/models/search.py | 74 +++++++++++-------- src/onegov/org/path.py | 2 +- src/onegov/org/templates/search_postgres.pt | 68 +++++++++++++++++ src/onegov/org/views/search.py | 3 +- src/onegov/search/cli.py | 2 +- src/onegov/search/integration.py | 13 ++-- src/onegov/search/mixins.py | 5 ++ src/onegov/town6/templates/search_postgres.pt | 68 +++++++++++++++++ src/onegov/town6/views/search.py | 7 +- 9 files changed, 199 insertions(+), 43 deletions(-) create mode 100644 src/onegov/org/templates/search_postgres.pt create mode 100644 src/onegov/town6/templates/search_postgres.pt diff --git a/src/onegov/org/models/search.py b/src/onegov/org/models/search.py index 68129e2398..9ff250e1fa 100644 --- a/src/onegov/org/models/search.py +++ b/src/onegov/org/models/search.py @@ -8,13 +8,14 @@ from functools import cached_property from sqlalchemy import func +from sqlalchemy.orm import Query from onegov.core.collection import Pagination, _M from onegov.core.orm import Base from onegov.event.models import Event -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from onegov.search.utils import searchable_sqlalchemy_models @@ -172,20 +173,20 @@ def suggestions(self) -> tuple[str, ...]: )) -def locale_mapping(locale): +def locale_mapping(locale: str) -> str: mapping = {'de_CH': 'german', 'fr_CH': 'french', 'it_CH': 'italian', 'rm_CH': 'english'} return mapping.get(locale, 'english') -class SearchPostgres(Pagination): +class SearchPostgres(Pagination[_M]): """ Implements searching in postgres db based on the gin index """ results_per_page = 10 max_query_length = 100 - def __init__(self, request, query, page): + def __init__(self, request: 'OrgRequest', query: str, page: int): self.request = request self.query = query self.page = page # page index @@ -194,32 +195,34 @@ def __init__(self, request, query, page): self.nbr_of_results = 0 @cached_property - def available_documents(self): + def available_documents(self) -> int: if not self.nbr_of_docs: self.load_batch_results return self.nbr_of_docs @cached_property - def available_results(self): + def available_results(self) -> int: if not self.nbr_of_results: self.load_batch_results return self.nbr_of_results @property - def q(self): + def q(self) -> str: return self.query - def __eq__(self, other): + def __eq__(self, other: object) -> bool: + if not isinstance(other, SearchPostgres): + return NotImplemented return self.page == other.page and self.query == other.query def subset(self): return self.batch @property - def page_index(self): + def page_index(self) -> int: return self.page - def page_by_index(self, index): + def page_by_index(self, index: int): return SearchPostgres(self.request, self.query, index) @cached_property @@ -235,7 +238,7 @@ def batch(self): return results[self.offset:self.offset + self.batch_size] @cached_property - def load_batch_results(self): + def load_batch_results(self) -> list[Query[Any]]: """Load search results and sort events by latest occurrence. This methods is a wrapper around `batch.load()`, which returns the actual search results form the query. """ @@ -253,30 +256,38 @@ def load_batch_results(self): sorted_events = sorted(events, key=lambda e: e.latest_occurrence.start) return sorted_events + non_events - def generic_search(self): + def generic_search(self) -> list[Query[Any]]: doc_count = 0 results = [] language = locale_mapping(self.request.locale) - for model in searchable_sqlalchemy_models(Base): - if model.es_public or self.request.is_logged_in: - query = self.request.session.query(model) - doc_count += query.count() - query = query.filter( - model.fts_idx.op('@@')(func.websearch_to_tsquery( - language, self.query)) - ) - query = query.order_by(func.ts_rank_cd( - model.fts_idx, func.websearch_to_tsquery(language, - self.query))) - results.extend(query.all()) + for base in self.request.app.session_manager.bases: + for model in searchable_sqlalchemy_models(base): + if model.es_public or self.request.is_logged_in: + query = self.request.session.query(model) + + if query.count(): + doc_count += query.count() + query = query.filter( + model.fts_idx.op('@@')(func.websearch_to_tsquery( + language, self.query)) + ) + query = query.order_by( + func.ts_rank_cd( + model.fts_idx, + func.websearch_to_tsquery( + language, + self.query) + ) + ) + results.extend(query.all()) self.nbr_of_docs = doc_count self.nbr_of_results = len(results) - results.sort(key=attrgetter('search_score'), reverse=False) + results.sort(key=attrgetter('ts_score'), reverse=False) return results - def hashtag_search(self): + def hashtag_search(self) -> list[Query[Any]]: q = self.query.lstrip('#') results = [] @@ -290,10 +301,10 @@ def hashtag_search(self): results.append(doc) self.nbr_of_results = len(results) - results.sort(key=attrgetter('search_score'), reverse=False) + results.sort(key=attrgetter('ts_score'), reverse=False) return results - def feeling_lucky(self): + def feeling_lucky(self) -> str | None: if self.batch: first_entry = self.batch[0].load() @@ -302,13 +313,14 @@ def feeling_lucky(self): return self.request.link(first_entry, 'latest') else: return self.request.link(first_entry) + return None @cached_property - def subset_count(self): + def subset_count(self) -> int: return self.available_results - def suggestions(self): - suggestions = list() + def suggestions(self) -> tuple[str, ...]: + suggestions = [] for element in self.generic_search(): suggest = getattr(element, 'es_suggestion', []) diff --git a/src/onegov/org/path.py b/src/onegov/org/path.py index 0edfebb52b..9d928e8c34 100644 --- a/src/onegov/org/path.py +++ b/src/onegov/org/path.py @@ -691,7 +691,7 @@ def get_search( return Search(request, q, page) -@OrgApp.path(model=SearchPostgres, path='/search_postgres', +@OrgApp.path(model=SearchPostgres, path='/search-postgres', converters={'page': int}) def get_search_postgres( request: 'OrgRequest', diff --git a/src/onegov/org/templates/search_postgres.pt b/src/onegov/org/templates/search_postgres.pt new file mode 100644 index 0000000000..eee07c1e56 --- /dev/null +++ b/src/onegov/org/templates/search_postgres.pt @@ -0,0 +1,68 @@ +
+ + ${title} + + +
+
+ Postgres Searching is currently unavailable due to technical + difficulties. + Please excuse the inconvenience and try again later. +
+
+ +
+
+ +
+
+ + +

${resultslabel}

+
+
+ +

Your postgres search returned no results.

+ +
    + +
  • + + + +
      +
    • Score: ${result.explanation['score']}
    • + +
    • + ${title}: ${result.explanation[key]['value']} +
    • +
      +
    +
    +
  • +
    +
+ +
+
+
+ + +
\ No newline at end of file diff --git a/src/onegov/org/views/search.py b/src/onegov/org/views/search.py index 617ac899e3..ddb719d51a 100644 --- a/src/onegov/org/views/search.py +++ b/src/onegov/org/views/search.py @@ -60,7 +60,8 @@ def search( } -@OrgApp.html(model=SearchPostgres, template='search', permission=Public) +@OrgApp.html(model=SearchPostgres, template='search_postgres.pt', + permission=Public) def search_postgres(self, request, layout=None): layout = layout or DefaultLayout(self, request) layout.breadcrumbs.append(Link(_("Search"), '#')) diff --git a/src/onegov/search/cli.py b/src/onegov/search/cli.py index 3257e929a3..6be69e260f 100644 --- a/src/onegov/search/cli.py +++ b/src/onegov/search/cli.py @@ -83,7 +83,7 @@ def run_reindex(request: 'CoreRequest', app: 'Framework') -> None: click.secho(title, underline=True) start = utcnow() - request.app.es_perform_reindex(fail) # type:ignore[attr-defined] + request.app.perform_reindex(fail) # type:ignore[attr-defined] click.secho(f"took {utcnow() - start}") diff --git a/src/onegov/search/integration.py b/src/onegov/search/integration.py index 4ddb72aba0..1ece380f30 100644 --- a/src/onegov/search/integration.py +++ b/src/onegov/search/integration.py @@ -419,7 +419,7 @@ def get_searchable_models(self) -> list[type['Searchable']]: for model in searchable_sqlalchemy_models(base) ] - def es_perform_reindex(self, fail: bool = False) -> None: + def perform_reindex(self, fail: bool = False) -> None: """ Re-indexes all content. This is a heavy operation and should be run with consideration. @@ -428,7 +428,7 @@ def es_perform_reindex(self, fail: bool = False) -> None: """ # prevent tables get re-indexed twice - index_done = [] + index_done = [str] schema = self.schema index_log.info(f'Indexing schema {schema}..') @@ -470,12 +470,13 @@ def reindex_model(model: type['Base']) -> None: session.invalidate() session.bind.dispose() - models = self.get_searchable_models() - index_log.info(f'Number of models to be indexed: {len(models)}') - with ThreadPoolExecutor() as executor: results = executor.map( - reindex_model, (model for model in models) + reindex_model, ( + model + for base in self.session_manager.bases + for model in searchable_sqlalchemy_models(base) + ) ) if fail: print(tuple(results)) diff --git a/src/onegov/search/mixins.py b/src/onegov/search/mixins.py index 02e64668cd..96a25cc7ef 100644 --- a/src/onegov/search/mixins.py +++ b/src/onegov/search/mixins.py @@ -175,6 +175,11 @@ def es_tags(self) -> list[str] | None: """ Returns a list of tags associated with this content. """ return None + @property + def ts_score(self) -> int: + """ Returns the score of the full text search. """ + return 10 + class ORMSearchable(Searchable): """ Extends the default :class:`Searchable` class with sensible defaults diff --git a/src/onegov/town6/templates/search_postgres.pt b/src/onegov/town6/templates/search_postgres.pt new file mode 100644 index 0000000000..eee07c1e56 --- /dev/null +++ b/src/onegov/town6/templates/search_postgres.pt @@ -0,0 +1,68 @@ +
+ + ${title} + + +
+
+ Postgres Searching is currently unavailable due to technical + difficulties. + Please excuse the inconvenience and try again later. +
+
+ +
+
+ +
+
+ + +

${resultslabel}

+
+
+ +

Your postgres search returned no results.

+ +
    + +
  • + + + +
      +
    • Score: ${result.explanation['score']}
    • + +
    • + ${title}: ${result.explanation[key]['value']} +
    • +
      +
    +
    +
  • +
    +
+ +
+
+
+ + +
\ No newline at end of file diff --git a/src/onegov/town6/views/search.py b/src/onegov/town6/views/search.py index 0914d8e0a8..066e513ae5 100644 --- a/src/onegov/town6/views/search.py +++ b/src/onegov/town6/views/search.py @@ -21,9 +21,10 @@ def town_search( return search(self, request, DefaultLayout(self, request)) -@TownApp.html(model=SearchPostgres, template='search.pt', permission=Public) +@TownApp.html(model=SearchPostgres, template='search_postgres.pt', + permission=Public) def town_search_postgres( - self: Search['Base'], - request: 'TownRequest' + self: Search['Base'], + request: 'TownRequest' ) -> 'RenderData | Response': return search_postgres(self, request, DefaultLayout(self, request)) From d90e85aa4fe6a4c108165835c4b50bd67ab2eace Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Wed, 5 Jun 2024 10:06:51 +0200 Subject: [PATCH 03/44] Revert type wrong type annotations --- src/onegov/org/models/search.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/onegov/org/models/search.py b/src/onegov/org/models/search.py index 9ff250e1fa..e8d6285e86 100644 --- a/src/onegov/org/models/search.py +++ b/src/onegov/org/models/search.py @@ -8,14 +8,13 @@ from functools import cached_property from sqlalchemy import func -from sqlalchemy.orm import Query from onegov.core.collection import Pagination, _M from onegov.core.orm import Base from onegov.event.models import Event -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING from onegov.search.utils import searchable_sqlalchemy_models @@ -179,7 +178,7 @@ def locale_mapping(locale: str) -> str: return mapping.get(locale, 'english') -class SearchPostgres(Pagination[_M]): +class SearchPostgres(Pagination): """ Implements searching in postgres db based on the gin index """ @@ -238,7 +237,7 @@ def batch(self): return results[self.offset:self.offset + self.batch_size] @cached_property - def load_batch_results(self) -> list[Query[Any]]: + def load_batch_results(self): """Load search results and sort events by latest occurrence. This methods is a wrapper around `batch.load()`, which returns the actual search results form the query. """ @@ -256,7 +255,7 @@ def load_batch_results(self) -> list[Query[Any]]: sorted_events = sorted(events, key=lambda e: e.latest_occurrence.start) return sorted_events + non_events - def generic_search(self) -> list[Query[Any]]: + def generic_search(self): doc_count = 0 results = [] @@ -287,7 +286,7 @@ def generic_search(self) -> list[Query[Any]]: results.sort(key=attrgetter('ts_score'), reverse=False) return results - def hashtag_search(self) -> list[Query[Any]]: + def hashtag_search(self): q = self.query.lstrip('#') results = [] From 22932f0dd8c6c30c8102cb664998e7d0d966e8a9 Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Wed, 5 Jun 2024 11:15:51 +0200 Subject: [PATCH 04/44] Sort search results after score, timestamp --- src/onegov/org/models/search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/onegov/org/models/search.py b/src/onegov/org/models/search.py index e8d6285e86..36769325e1 100644 --- a/src/onegov/org/models/search.py +++ b/src/onegov/org/models/search.py @@ -283,7 +283,8 @@ def generic_search(self): self.nbr_of_docs = doc_count self.nbr_of_results = len(results) - results.sort(key=attrgetter('ts_score'), reverse=False) + results.sort(key=attrgetter('ts_score', 'modified', 'created'), + reverse=False) return results def hashtag_search(self): From 0507b46bde969a06432b66a0f30aa7d3c855fab3 Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Wed, 5 Jun 2024 14:14:01 +0200 Subject: [PATCH 05/44] Adds a simple ranking --- src/onegov/activity/models/attendee.py | 4 ++++ src/onegov/directory/models/directory.py | 4 ++++ .../directory/models/directory_entry.py | 4 ++++ src/onegov/event/models/event.py | 4 ++++ src/onegov/feriennet/models/activity.py | 4 ++++ src/onegov/file/models/file.py | 4 ++++ src/onegov/form/models/definition.py | 4 ++++ src/onegov/fsi/models/course.py | 4 ++++ src/onegov/newsletter/models.py | 4 ++++ src/onegov/org/models/external_link.py | 4 ++++ src/onegov/org/models/search.py | 21 ++++++++++++++++--- src/onegov/page/model.py | 4 ++++ src/onegov/people/models/membership.py | 4 ++++ src/onegov/people/models/person.py | 4 ++++ src/onegov/search/mixins.py | 8 +++++-- src/onegov/ticket/model.py | 4 ++++ .../translator_directory/models/translator.py | 4 ++++ src/onegov/user/models/user.py | 4 ++++ 18 files changed, 88 insertions(+), 5 deletions(-) diff --git a/src/onegov/activity/models/attendee.py b/src/onegov/activity/models/attendee.py index 163a62bc25..852f25ba69 100644 --- a/src/onegov/activity/models/attendee.py +++ b/src/onegov/activity/models/attendee.py @@ -67,6 +67,10 @@ class Attendee(Base, TimestampMixin, ORMSearchable): } es_public = False + @property + def ts_score(self) -> int: + return 3 + @property def es_suggestion(self) -> str: return self.name diff --git a/src/onegov/directory/models/directory.py b/src/onegov/directory/models/directory.py index 7196aa9aa8..0377154ce9 100644 --- a/src/onegov/directory/models/directory.py +++ b/src/onegov/directory/models/directory.py @@ -109,6 +109,10 @@ class Directory(Base, ContentMixin, TimestampMixin, 'lead': {'type': 'localized'} } + @property + def ts_score(self) -> int: + return 7 + @property def es_public(self) -> bool: return False # to be overridden downstream diff --git a/src/onegov/directory/models/directory_entry.py b/src/onegov/directory/models/directory_entry.py index b477d04b2d..b5946abc52 100644 --- a/src/onegov/directory/models/directory_entry.py +++ b/src/onegov/directory/models/directory_entry.py @@ -41,6 +41,10 @@ class DirectoryEntry(Base, ContentMixin, CoordinatesMixin, TimestampMixin, 'text': {'type': 'localized_html'} } + @property + def ts_score(self) -> int: + return 4 + @property def es_public(self) -> bool: return False # to be overridden downstream diff --git a/src/onegov/event/models/event.py b/src/onegov/event/models/event.py index 09814c5bbc..3ecb36e3ff 100644 --- a/src/onegov/event/models/event.py +++ b/src/onegov/event/models/event.py @@ -137,6 +137,10 @@ class Event(Base, OccurrenceMixin, TimestampMixin, SearchableContent, EventFile, 'pdf', 'one-to-one', uselist=False, backref_suffix='pdf' ) + @property + def ts_score(self) -> int: + return 1 + def set_image( self, content: bytes | IO[bytes] | None, diff --git a/src/onegov/feriennet/models/activity.py b/src/onegov/feriennet/models/activity.py index cc47c0a4b4..8277619bdc 100644 --- a/src/onegov/feriennet/models/activity.py +++ b/src/onegov/feriennet/models/activity.py @@ -31,6 +31,10 @@ class VacationActivity(Activity, CoordinatesExtension, SearchableContent): 'organiser': {'type': 'text'} } + @property + def ts_score(self) -> int: + return 1 + @property def es_public(self) -> bool: return self.state == 'accepted' diff --git a/src/onegov/file/models/file.py b/src/onegov/file/models/file.py index dd2f073fcc..5a3a030270 100644 --- a/src/onegov/file/models/file.py +++ b/src/onegov/file/models/file.py @@ -109,6 +109,10 @@ class SearchableFile(ORMSearchable): name: Column[str] published: Column[bool] + @property + def ts_score(self) -> int: + return 10 + @property def es_suggestion(self) -> str: return self.name diff --git a/src/onegov/form/models/definition.py b/src/onegov/form/models/definition.py index 07ed59e749..7a500d0427 100644 --- a/src/onegov/form/models/definition.py +++ b/src/onegov/form/models/definition.py @@ -147,6 +147,10 @@ def id(self) -> str: 'polymorphic_identity': 'generic' } + @property + def ts_score(self) -> int: + return 7 + @property def form_class(self) -> Type['Form']: """ Parses the form definition and returns a form class. """ diff --git a/src/onegov/fsi/models/course.py b/src/onegov/fsi/models/course.py index dc44990e62..2c7ae1c50a 100644 --- a/src/onegov/fsi/models/course.py +++ b/src/onegov/fsi/models/course.py @@ -52,6 +52,10 @@ class Course(Base, ORMSearchable): default=False ) + @property + def ts_score(self) -> int: + return 2 + if TYPE_CHECKING: # FIXME: use explicit backref events: relationship[AppenderQuery[CourseEvent]] diff --git a/src/onegov/newsletter/models.py b/src/onegov/newsletter/models.py index 266a9f916e..779a953566 100644 --- a/src/onegov/newsletter/models.py +++ b/src/onegov/newsletter/models.py @@ -95,6 +95,10 @@ def validate_name(self, key: str, name: str) -> str: secondary=newsletter_recipients, back_populates='newsletters') + @property + def ts_score(self) -> int: + return 6 + @property def open_recipients(self) -> tuple['Recipient', ...]: received = select([newsletter_recipients.c.recipient_id]).where( diff --git a/src/onegov/org/models/external_link.py b/src/onegov/org/models/external_link.py index 25ea34581f..5c68fef630 100644 --- a/src/onegov/org/models/external_link.py +++ b/src/onegov/org/models/external_link.py @@ -59,6 +59,10 @@ class ExternalLink(Base, ContentMixin, TimestampMixin, AccessExtension, lead: dict_property[str | None] = meta_property() + @property + def ts_score(self) -> int: + return 8 + @observes('title') def title_observer(self, title: str) -> None: self.order = normalize_for_url(title) diff --git a/src/onegov/org/models/search.py b/src/onegov/org/models/search.py index 36769325e1..776441916d 100644 --- a/src/onegov/org/models/search.py +++ b/src/onegov/org/models/search.py @@ -1,3 +1,4 @@ +from datetime import datetime from operator import attrgetter from elasticsearch_dsl.function import SF @@ -7,6 +8,7 @@ from elasticsearch_dsl.query import MultiMatch from functools import cached_property +from pytz import utc from sqlalchemy import func from onegov.core.collection import Pagination, _M @@ -283,9 +285,22 @@ def generic_search(self): self.nbr_of_docs = doc_count self.nbr_of_results = len(results) - results.sort(key=attrgetter('ts_score', 'modified', 'created'), - reverse=False) - return results + + # remove duplicates + results = list(set(results)) + + # sort items after ts_score, modified and created. If no timestamp + # is available, use default time + default_time = utc.localize( + datetime.datetime(1970, 1, 1)) + return sorted( + results, + key=lambda k: ( + k.get('ts_score', 10), + k.get('modified') or default_time, + k.get('created') or default_time, + ), + reverse=False) def hashtag_search(self): q = self.query.lstrip('#') diff --git a/src/onegov/page/model.py b/src/onegov/page/model.py index 659a2fa59b..718a88ff53 100644 --- a/src/onegov/page/model.py +++ b/src/onegov/page/model.py @@ -27,6 +27,10 @@ class Page(AdjacencyList, ContentMixin, TimestampMixin, __tablename__ = 'pages' + @property + def ts_score(self) -> int: + return 2 + if TYPE_CHECKING: # we override these relationships to be more specific parent: relationship['Page | None'] diff --git a/src/onegov/people/models/membership.py b/src/onegov/people/models/membership.py index a046c94f8b..98cbb8657c 100644 --- a/src/onegov/people/models/membership.py +++ b/src/onegov/people/models/membership.py @@ -104,6 +104,10 @@ class AgencyMembership(Base, ContentMixin, TimestampMixin, ORMSearchable, #: when the membership started since: 'Column[str | None]' = Column(Text, nullable=True) + @property + def ts_score(self) -> int: + return 3 + @property def siblings_by_agency(self) -> 'Query[Self]': """ Returns a query that includes all siblings by agency, including diff --git a/src/onegov/people/models/person.py b/src/onegov/people/models/person.py index 0785dc24b1..9fbce4dafb 100644 --- a/src/onegov/people/models/person.py +++ b/src/onegov/people/models/person.py @@ -148,6 +148,10 @@ def spoken_title(self) -> str: #: some remarks about the person notes: 'Column[str | None]' = Column(Text, nullable=True) + @property + def ts_score(self) -> int: + return 3 + if TYPE_CHECKING: # FIXME: Replace with explicit backref with back_populates memberships: relationship[AppenderQuery[AgencyMembership]] diff --git a/src/onegov/search/mixins.py b/src/onegov/search/mixins.py index 96a25cc7ef..e9c3d8f62d 100644 --- a/src/onegov/search/mixins.py +++ b/src/onegov/search/mixins.py @@ -177,8 +177,12 @@ def es_tags(self) -> list[str] | None: @property def ts_score(self) -> int: - """ Returns the score of the full text search. """ - return 10 + """ Returns the score for the full text search. The lower the scroe + the higher the object will be ranked in the search results. + """ + + raise NotImplementedError('The ts_score property must be implemented ' + 'in {}'.format(self.__class__.__name__)) class ORMSearchable(Searchable): diff --git a/src/onegov/ticket/model.py b/src/onegov/ticket/model.py index 538f3e9b79..eaebc570bf 100644 --- a/src/onegov/ticket/model.py +++ b/src/onegov/ticket/model.py @@ -105,6 +105,10 @@ class Ticket(Base, TimestampMixin, ORMSearchable): #: true if the notifications for this ticket should be muted muted: 'Column[bool]' = Column(Boolean, nullable=False, default=False) + @property + def ts_score(self) -> int: + return 6 + if TYPE_CHECKING: created: Column[datetime] else: diff --git a/src/onegov/translator_directory/models/translator.py b/src/onegov/translator_directory/models/translator.py index 67091273db..108a545fce 100644 --- a/src/onegov/translator_directory/models/translator.py +++ b/src/onegov/translator_directory/models/translator.py @@ -220,6 +220,10 @@ class Translator(Base, TimestampMixin, AssociatedFiles, ContentMixin, expertise_professional_guilds_other: 'dict_property[Sequence[str]]' expertise_professional_guilds_other = meta_property(default=tuple) + @property + def ts_score(self) -> int: + return 4 + @property def expertise_professional_guilds_all(self) -> 'Sequence[str]': return ( diff --git a/src/onegov/user/models/user.py b/src/onegov/user/models/user.py index 423b9a0fc9..f2d8d5537a 100644 --- a/src/onegov/user/models/user.py +++ b/src/onegov/user/models/user.py @@ -152,6 +152,10 @@ def userprofile(self) -> list[str]: signup_token: 'Column[str | None]' = Column( Text, nullable=True, default=None) + @property + def ts_score(self) -> int: + return 5 + __table_args__ = ( Index('lowercase_username', func.lower(username), unique=True), UniqueConstraint('source', 'source_id', name='unique_source_id'), From 3beab01071d4671f828d272e73311109b23101b0 Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Tue, 16 Jul 2024 15:41:09 +0200 Subject: [PATCH 06/44] Person title and user title and userprofile are now hybrid properties --- src/onegov/people/models/person.py | 5 ++++- src/onegov/user/models/user.py | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/onegov/people/models/person.py b/src/onegov/people/models/person.py index 0aef7c8975..844e11d776 100644 --- a/src/onegov/people/models/person.py +++ b/src/onegov/people/models/person.py @@ -1,3 +1,5 @@ +from sqlalchemy.ext.hybrid import hybrid_property + from onegov.core.orm import Base from onegov.core.orm.mixins import ContentMixin from onegov.core.orm.mixins import TimestampMixin @@ -55,7 +57,8 @@ class Person(Base, ContentMixin, TimestampMixin, ORMSearchable, def es_suggestion(self) -> tuple[str, ...]: return (self.title, f'{self.first_name} {self.last_name}') - @property + # @property + @hybrid_property def title(self) -> str: """ Returns the Eastern-ordered name. """ diff --git a/src/onegov/user/models/user.py b/src/onegov/user/models/user.py index cde37d3719..f1debd58d3 100644 --- a/src/onegov/user/models/user.py +++ b/src/onegov/user/models/user.py @@ -59,7 +59,8 @@ class User(Base, TimestampMixin, ORMSearchable): def es_suggestion(self) -> tuple[str, str]: return (self.realname or self.username, self.username) - @property + # @property + @hybrid_property def userprofile(self) -> list[str]: if not self.data: return [] From da09357cbd0424950e16d70affe8f830c57b5f01 Mon Sep 17 00:00:00 2001 From: Reto Tschuppert Date: Tue, 16 Jul 2024 15:42:45 +0200 Subject: [PATCH 07/44] Rework ranking --- src/onegov/org/models/search.py | 94 ++++++++++--------- src/onegov/people/models/person.py | 1 - src/onegov/town6/templates/search.pt | 4 +- src/onegov/town6/templates/search_postgres.pt | 4 +- src/onegov/user/models/user.py | 1 - 5 files changed, 55 insertions(+), 49 deletions(-) diff --git a/src/onegov/org/models/search.py b/src/onegov/org/models/search.py index 776441916d..3e08ebd33a 100644 --- a/src/onegov/org/models/search.py +++ b/src/onegov/org/models/search.py @@ -1,4 +1,3 @@ -from datetime import datetime from operator import attrgetter from elasticsearch_dsl.function import SF @@ -7,8 +6,6 @@ from elasticsearch_dsl.query import MatchPhrase from elasticsearch_dsl.query import MultiMatch from functools import cached_property - -from pytz import utc from sqlalchemy import func from onegov.core.collection import Pagination, _M @@ -33,7 +30,7 @@ class Search(Pagination[_M]): def __init__(self, request: 'OrgRequest', query: str, page: int) -> None: super().__init__(page) self.request = request - self.query = query + self.web_search = query @cached_property def available_documents(self) -> int: @@ -46,13 +43,13 @@ def explain(self) -> bool: @property def q(self) -> str: - return self.query + return self.web_search def __eq__(self, other: object) -> bool: return ( isinstance(other, self.__class__) and self.page == other.page - and self.query == other.query + and self.web_search == other.web_search ) if TYPE_CHECKING: @@ -67,11 +64,11 @@ def page_index(self) -> int: return self.page def page_by_index(self, index: int) -> 'Search[_M]': - return Search(self.request, self.query, index) + return Search(self.request, self.web_search, index) @cached_property def batch(self) -> 'Response | None': # type:ignore[override] - if not self.query: + if not self.web_search: return None search = self.request.app.es_search_by_request( @@ -81,7 +78,7 @@ def batch(self) -> 'Response | None': # type:ignore[override] # queries need to be cut at some point to make sure we're not # pushing the elasticsearch cluster to the brink - query = self.query[:self.max_query_length] + query = self.web_search[:self.max_query_length] if query.startswith('#'): search = self.hashtag_search(search, query) @@ -170,7 +167,7 @@ def subset_count(self) -> int: def suggestions(self) -> tuple[str, ...]: return tuple(self.request.app.es_suggestions_by_request( - self.request, self.query + self.request, self.web_search )) @@ -189,7 +186,7 @@ class SearchPostgres(Pagination): def __init__(self, request: 'OrgRequest', query: str, page: int): self.request = request - self.query = query + self.web_search = query self.page = page # page index self.nbr_of_docs = 0 @@ -209,12 +206,12 @@ def available_results(self) -> int: @property def q(self) -> str: - return self.query + return self.web_search def __eq__(self, other: object) -> bool: if not isinstance(other, SearchPostgres): return NotImplemented - return self.page == other.page and self.query == other.query + return self.page == other.page and self.web_search == other.web_search def subset(self): return self.batch @@ -224,14 +221,14 @@ def page_index(self) -> int: return self.page def page_by_index(self, index: int): - return SearchPostgres(self.request, self.query, index) + return SearchPostgres(self.request, self.web_search, index) @cached_property def batch(self): - if not self.query: + if not self.web_search: return None - if self.query.startswith('#'): + if self.web_search.startswith('#'): results = self.hashtag_search() else: results = self.generic_search() @@ -257,11 +254,36 @@ def load_batch_results(self): sorted_events = sorted(events, key=lambda e: e.latest_occurrence.start) return sorted_events + non_events + def _create_weighted_vector(self, model, language='simple'): + # for now weight the first field with 'A', the rest with 'B' + weighted_vector = [ + func.setweight( + func.to_tsvector( + language, + getattr(model, field, '') + ), + weight + ) + for field, weight in zip(model.es_properties.keys(), 'ABBBBBBBBBB') + if not field.startswith('es_') # TODO: rename to fts_ + ] + + # combine all weighted vectors + if weighted_vector: + combined_vector = weighted_vector[0] + for vector in weighted_vector[1:]: + combined_vector = combined_vector.op('||')(vector) + else: + combined_vector = func.to_tsvector(language, '') + + return combined_vector + def generic_search(self): doc_count = 0 results = [] - language = locale_mapping(self.request.locale) + ts_query = func.websearch_to_tsquery(language, self.web_search) + for base in self.request.app.session_manager.bases: for model in searchable_sqlalchemy_models(base): if model.es_public or self.request.is_logged_in: @@ -269,41 +291,27 @@ def generic_search(self): if query.count(): doc_count += query.count() - query = query.filter( - model.fts_idx.op('@@')(func.websearch_to_tsquery( - language, self.query)) + vector = self._create_weighted_vector(model, language) + rank_expression = func.ts_rank( + vector, + ts_query, + 0 # normalization, ignore document length ) - query = query.order_by( - func.ts_rank_cd( - model.fts_idx, - func.websearch_to_tsquery( - language, - self.query) - ) + query = query.filter( + model.fts_idx.op('@@')(ts_query) ) - results.extend(query.all()) + query = query.order_by(rank_expression.desc()) + res = query.all() + results.extend(res) self.nbr_of_docs = doc_count self.nbr_of_results = len(results) # remove duplicates - results = list(set(results)) - - # sort items after ts_score, modified and created. If no timestamp - # is available, use default time - default_time = utc.localize( - datetime.datetime(1970, 1, 1)) - return sorted( - results, - key=lambda k: ( - k.get('ts_score', 10), - k.get('modified') or default_time, - k.get('created') or default_time, - ), - reverse=False) + return tuple(set(results)) def hashtag_search(self): - q = self.query.lstrip('#') + q = self.web_search.lstrip('#') results = [] for model in searchable_sqlalchemy_models(Base): diff --git a/src/onegov/people/models/person.py b/src/onegov/people/models/person.py index 844e11d776..d4ee90962d 100644 --- a/src/onegov/people/models/person.py +++ b/src/onegov/people/models/person.py @@ -57,7 +57,6 @@ class Person(Base, ContentMixin, TimestampMixin, ORMSearchable, def es_suggestion(self) -> tuple[str, ...]: return (self.title, f'{self.first_name} {self.last_name}') - # @property @hybrid_property def title(self) -> str: """ Returns the Eastern-ordered name. """ diff --git a/src/onegov/town6/templates/search.pt b/src/onegov/town6/templates/search.pt index d1b5291270..f74f9ec927 100644 --- a/src/onegov/town6/templates/search.pt +++ b/src/onegov/town6/templates/search.pt @@ -15,7 +15,7 @@