Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ogc 508 replace elastic search by postgres v3 #1559

Open
wants to merge 42 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
6ff58c9
Introduce postgres search views and integration (start over from v2 n…
Tschuppi81 Oct 24, 2024
18658fb
Introduce postgres search views and integration (start over from v2 n…
Tschuppi81 Oct 24, 2024
d0aad65
Put only upcoming events to the top of search results (instead of all…
Tschuppi81 Oct 25, 2024
0dd6e57
Revert several changes
Tschuppi81 Oct 25, 2024
0e165e3
store static search property data in separate column. Determine publi…
Tschuppi81 Oct 25, 2024
a3d2a4b
Fix linter issues
Tschuppi81 Oct 25, 2024
54d9860
Fix fsi search views and add tests
Tschuppi81 Oct 25, 2024
2e3816d
fix tests
Tschuppi81 Oct 25, 2024
f4b87d0
Adds to index data and filter for it during search
Tschuppi81 Oct 28, 2024
333f57a
Exclude members from seeing non es_public documents
Tschuppi81 Oct 29, 2024
7e652e4
Ensure member finds documents with access level 'member'
Tschuppi81 Oct 29, 2024
b021ef4
Resolve mypy issues
Tschuppi81 Oct 29, 2024
f75fccf
Fix statement for members
Tschuppi81 Oct 30, 2024
e2f9e00
Removed unused import
Tschuppi81 Oct 30, 2024
037d6ce
Fix member / manager filters
Tschuppi81 Oct 31, 2024
41b2714
Ignore mypy truthy-function and unreachable
Tschuppi81 Nov 4, 2024
0eef94f
Extend search tests
Tschuppi81 Nov 4, 2024
8e50ff5
Revert renaming
Tschuppi81 Nov 11, 2024
e11f317
Test for instance to identify events
Tschuppi81 Nov 11, 2024
1ed1d1d
Renaming variables
Tschuppi81 Nov 11, 2024
b168a85
Update src/onegov/org/models/search.py
Tschuppi81 Nov 12, 2024
39313d0
Update src/onegov/org/models/search.py
Tschuppi81 Nov 12, 2024
14bf8f2
Performance: use jsonb instead of json
Tschuppi81 Nov 14, 2024
c0f95ed
Rework search suggestions
Tschuppi81 Nov 18, 2024
2bdd14c
Fix static number of priorities for properties
Tschuppi81 Nov 25, 2024
8ab1dee
Merge master
Tschuppi81 Dec 2, 2024
d7f0958
mend
Tschuppi81 Dec 2, 2024
7c82279
Filter search models removing base classes
Tschuppi81 Dec 3, 2024
b13f212
Cleanup
Tschuppi81 Dec 3, 2024
d34584f
Also compare table name
Tschuppi81 Dec 3, 2024
2db81c5
Index only searchable base classes removing duplicates in search results
Tschuppi81 Dec 13, 2024
324ebbe
Update src/onegov/search/integration.py
Tschuppi81 Dec 16, 2024
e449cd7
Update src/onegov/search/utils.py
Tschuppi81 Dec 16, 2024
11e93ff
Fix syntax
Tschuppi81 Dec 16, 2024
5191975
Revert filter for base models for search
Tschuppi81 Dec 17, 2024
50ac000
Rework test
Tschuppi81 Dec 17, 2024
cad0655
Merge master
Tschuppi81 Dec 17, 2024
1f142d9
Move weighted tsvector to indexing step
Tschuppi81 Dec 23, 2024
d3912d3
Adjust search
Tschuppi81 Dec 24, 2024
127bc26
Adds a time decay function to search results
Tschuppi81 Dec 24, 2024
a68d9fd
Merge branch 'master' into ogc-508-replace-elastic-search-by-postgres-v3
Tschuppi81 Dec 24, 2024
775c48e
Fix test
Tschuppi81 Dec 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions src/onegov/org/models/search.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import itertools

from elasticsearch_dsl.function import SF # type:ignore
from elasticsearch_dsl.query import FunctionScore # type:ignore
from elasticsearch_dsl.query import Match
from elasticsearch_dsl.query import MatchPhrase
from elasticsearch_dsl.query import MultiMatch
from functools import cached_property
from itertools import chain, repeat
from sedate import utcnow
from sqlalchemy import func
from typing import TYPE_CHECKING, Any

from onegov.core.collection import Pagination, _M
from onegov.event.models import Event
from onegov.search.utils import (searchable_sqlalchemy_models,
filter_non_base_models)
filter_for_base_models)

if TYPE_CHECKING:
from onegov.org.request import OrgRequest
Expand Down Expand Up @@ -156,7 +155,7 @@
first_entry = self.batch[0].load()

# XXX the default view to the event should be doing the redirect
if first_entry.es_type_name == 'events':

Check warning on line 158 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L158

Added line #L158 was not covered by tests
Tschuppi81 marked this conversation as resolved.
Show resolved Hide resolved
return self.request.link(first_entry, 'latest')
else:
return self.request.link(first_entry)
Expand Down Expand Up @@ -193,9 +192,10 @@
self.number_of_docs = 0
self.number_of_results = 0

models = (model for base in self.request.app.session_manager.bases
for model in searchable_sqlalchemy_models(base))
self.search_models = filter_non_base_models(set(models))
self.search_models = {
model for base in self.request.app.session_manager.bases
for model in searchable_sqlalchemy_models(base)}
self.search_models = filter_for_base_models(self.search_models)

@cached_property
def available_documents(self) -> int:
Expand All @@ -218,24 +218,24 @@
return self.query

def __eq__(self, other: object) -> bool:
if not isinstance(other, SearchPostgres):
return NotImplemented
return self.page == other.page and self.query == other.query

Check warning on line 223 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L221-L223

Added lines #L221 - L223 were not covered by tests

def subset(self) -> 'list[Searchable] | None': # type:ignore[override]
return self.batch

Check warning on line 226 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L226

Added line #L226 was not covered by tests

@property
def page_index(self) -> int:
return self.page

Check warning on line 230 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L230

Added line #L230 was not covered by tests

def page_by_index(self, index: int) -> 'SearchPostgres[_M]':
return SearchPostgres(self.request, self.query, index)

Check warning on line 233 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L233

Added line #L233 was not covered by tests

@cached_property
def batch(self) -> 'list[Searchable]': # type:ignore[override]
if not self.query:
return []

Check warning on line 238 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L238

Added line #L238 was not covered by tests

if self.query.startswith('#'):
results = self.hashtag_search()
Expand Down Expand Up @@ -289,7 +289,7 @@
)
for field, weight in zip(
model.es_properties.keys(),
itertools.chain('A', itertools.repeat('B')))
chain('A', repeat('B')))
if not field.startswith('es_') # TODO: rename to fts_
]

Expand All @@ -299,7 +299,7 @@
for vector in weighted_vectors[1:]:
combined_vector = combined_vector.op('||')(vector)
else:
combined_vector = func.to_tsvector(language, '')

Check warning on line 302 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L302

Added line #L302 was not covered by tests

return combined_vector

Expand Down Expand Up @@ -376,15 +376,15 @@
return results

def feeling_lucky(self) -> str | None:
if self.batch:
first_entry = self.batch[0]

Check warning on line 380 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L379-L380

Added lines #L379 - L380 were not covered by tests

# XXX the default view to the event should be doing the redirect
if isinstance(first_entry, Event):
return self.request.link(first_entry, 'latest')

Check warning on line 384 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L383-L384

Added lines #L383 - L384 were not covered by tests
else:
return self.request.link(first_entry)
return None

Check warning on line 387 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L386-L387

Added lines #L386 - L387 were not covered by tests

@cached_property
def subset_count(self) -> int:
Expand All @@ -393,37 +393,37 @@
@cached_property
def get_all_hashtags(self) -> list[str]:
""" Returns all hashtags from the database in alphabetical order. """
all_tags: set[str] = set()

Check warning on line 396 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L396

Added line #L396 was not covered by tests

for base in self.request.app.session_manager.bases:
for model in searchable_sqlalchemy_models(base):
query = self.request.session.query(

Check warning on line 400 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L398-L400

Added lines #L398 - L400 were not covered by tests
model.fts_idx_data['es_tags'].distinct())
for tag_list in query.all():
all_tags.update(tag_list[0]) if tag_list[0] else None

Check warning on line 403 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L402-L403

Added lines #L402 - L403 were not covered by tests

# mark tags as hashtags; it also helps ot remain with the hashtag
# search (url) when clicking on a suggestion
all_tags = {f'#{tag}' for tag in all_tags}
return sorted(all_tags)

Check warning on line 408 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L407-L408

Added lines #L407 - L408 were not covered by tests

def suggestions(self) -> tuple[str, ...]:
suggestions = []
number_of_suggestions = 15

if self.query.startswith('#'): # hashtag search
q = self.query.lstrip('#').lower()
tags = self.get_all_hashtags

Check warning on line 416 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L415-L416

Added lines #L415 - L416 were not covered by tests

if len(q) == 0:
return tuple(tags[:number_of_suggestions])

Check warning on line 419 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L418-L419

Added lines #L418 - L419 were not covered by tests

suggestions = [tag for tag in tags if q in tag]

Check warning on line 421 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L421

Added line #L421 was not covered by tests

else:
for element in self.generic_search():
if element.es_type_name == 'files':
continue

Check warning on line 426 in src/onegov/org/models/search.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/org/models/search.py#L426

Added line #L426 was not covered by tests
suggest = getattr(element, 'es_suggestion', '')
if isinstance(suggest, tuple):
suggest = suggest[0]
Expand Down
27 changes: 11 additions & 16 deletions src/onegov/search/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@
from onegov.search.indexer import ORMEventTranslator
from onegov.search.indexer import TypeMappingRegistry
from onegov.search.utils import (searchable_sqlalchemy_models,
filter_non_base_models)
filter_for_base_models)
from sortedcontainers import SortedSet
from sedate import utcnow
from sqlalchemy import inspect
from sqlalchemy.orm import undefer
from urllib3.exceptions import HTTPError

Expand Down Expand Up @@ -444,28 +443,24 @@
""" Load all database objects and index them. """
session = self.session()
try:
q = session.query(model).options(undefer('*'))
i = inspect(model)

if i.polymorphic_on is not None:
q = q.filter(i.polymorphic_on == i.polymorphic_identity)

for obj in q:
for obj in session.query(model).options(undefer('*')):
Tschuppi81 marked this conversation as resolved.
Show resolved Hide resolved
self.es_orm_events.index(schema, obj)

except Exception as e:
print(f"Error psql indexing model '{model}': {e}")
print(f"Error psql indexing model '{model.__name__}': {e}")

Check warning on line 450 in src/onegov/search/integration.py

View check run for this annotation

Codecov / codecov/patch

src/onegov/search/integration.py#L450

Added line #L450 was not covered by tests
finally:
session.invalidate()
session.bind.dispose()

models = (model for base in self.session_manager.bases
for model in searchable_sqlalchemy_models(base))
models = {
model
for base in self.session_manager.bases
for model in searchable_sqlalchemy_models(base)
}
base_models = filter_for_base_models(models)

with ThreadPoolExecutor() as executor:
results = executor.map(
reindex_model, (
model for model in filter_non_base_models(set(models)))
)
results = executor.map(reindex_model, base_models)
if fail:
print(tuple(results))

Expand Down
48 changes: 28 additions & 20 deletions src/onegov/search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,17 @@
import os
import re

from sqlalchemy import inspect

from onegov.core.custom import json
from langdetect import DetectorFactory, PROFILES_DIRECTORY
from langdetect.utils.lang_profile import LangProfile
from onegov.core.orm import find_models

from typing import Any, Generic, TypeVar, TYPE_CHECKING

if TYPE_CHECKING:
from collections.abc import Callable, Iterator, Sequence
from collections.abc import Callable, Iterable, Iterator, Sequence
from langdetect.detector import Detector
from langdetect.language import Language
from onegov.search.mixins import Searchable
Expand Down Expand Up @@ -42,29 +45,34 @@ def searchable_sqlalchemy_models(
)


def filter_non_base_models(
models: 'set[type[T]]'
) -> 'set[type[T]]':
""" Remove model classes that are base classes of other models in the set.
Args: models: set of model classes to filter
Returns: set: Model classes that are not base classes of any other model
in the set.

def filter_for_base_models(
models: 'Iterable[type[Searchable]]'
) -> 'set[type[Searchable]]':
"""
non_base_models = set()
Filter out models that are polymorphic subclasses of other
models in order to save on queries.

for model in models:
is_base = False
for other_model in models:
if (model is not other_model and issubclass(other_model, model)
and model.__tablename__ == other_model.__tablename__): # type:ignore[attr-defined]
is_base = True
break
"""
from onegov.search.mixins import Searchable

if not is_base:
non_base_models.add(model)
new_models = set()

return non_base_models
for model in models:
i = inspect(model)
base_classes = {
e.base_mapper.class_ for e in
i.base_mapper.self_and_descendants
if issubclass(e.base_mapper.class_, Searchable)
if e.polymorphic_identity is not None
if e.base_mapper.class_ != model
}
if base_classes:
for base in base_classes:
new_models.add(base)
else:
new_models.add(model)
Tschuppi81 marked this conversation as resolved.
Show resolved Hide resolved

return new_models


_invalid_index_characters = re.compile(r'[\\/?"<>|\s,A-Z:]+')
Expand Down
44 changes: 34 additions & 10 deletions tests/onegov/search/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from onegov.search import ORMSearchable, Searchable
from onegov.search import ORMSearchable, Searchable, SearchableContent
from onegov.search import utils
from sqlalchemy import Column, Integer, Text, ForeignKey
from sqlalchemy.ext.declarative import declarative_base
Expand Down Expand Up @@ -50,42 +50,66 @@ class B(Base):
]


def test_filter_non_base_models():
def test_filter_for_base_models():
Base = declarative_base()

class Page(Base, Searchable):
class Ticket(Base, ORMSearchable):
__tablename__ = 'tickets'
id = Column(Integer, primary_key=True)
__tablename__ = 'pages'

class Topic(Page):
pass
class XTicket(Ticket):
__mapper_args__ = {'polymorphic_identity': 'X'} # type:ignore

class News(Page):
pass
class YTicket(Ticket):
__mapper_args__ = {'polymorphic_identity': 'Y'} # type:ignore

assert utils.filter_non_base_models({Page, Topic, News}) == {Topic, News}
assert utils.filter_for_base_models({XTicket, YTicket, Ticket}) == {Ticket}

class A(Base, Searchable):
__mapper_args__ = {'polymorphic_identity': 'a'} # type:ignore
id = Column(Integer, primary_key=True)
__tablename__ = 'a'

class AA(A):
__mapper_args__ = {'polymorphic_identity': 'aa'} # type:ignore
pass

class B(Base, Searchable):
__mapper_args__ = {'polymorphic_identity': 'b'} # type:ignore
id = Column(Integer, primary_key=True)
__tablename__ = 'b'

class C(Base, Searchable):
__mapper_args__ = {'polymorphic_identity': 'c'} # type:ignore
id = Column(Integer, primary_key=True)
__tablename__ = 'c'

class CC(C):
__mapper_args__ = {'polymorphic_identity': 'cc'} # type:ignore
id_2 = Column(Integer, primary_key=True)
c_id = Column(Integer, ForeignKey('c.id'))
__tablename__ = 'cc'

assert utils.filter_non_base_models({A, AA, B, C, CC}) == {AA, B, C, CC}
assert utils.filter_for_base_models({A, AA, B, C, CC}) == {A, B, C}

class AdjacencyList(Base):
__abstract__ = True
__mapper_args__ = {'polymorphic_identity': 'generic'}

class Page(AdjacencyList):
__tablename__ = 'pages'
id = Column(Integer, primary_key=True)

class Topic(Page, SearchableContent):
__mapper_args__ = {'polymorphic_identity': 'topic'}

class News(Page, SearchableContent):
__mapper_args__ = {'polymorphic_identity': 'news'}

searchable_models = {
m for m in utils.searchable_sqlalchemy_models(Base)}
assert utils.filter_for_base_models(searchable_models) == {
Topic, News, Ticket, A, B, C}


def test_related_types():
Expand Down
Loading