From 6e555965490124c1ac9a208b93f9c01f0b89c6de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Sede=C3=B1o?= Date: Tue, 15 Jun 2021 20:17:01 -0400 Subject: [PATCH 1/2] Give SegmentWriter a cachable Searcher When repeatedly calling `update_document()`, each call instantiates a `Searcher`, which is expensive. The `SegmentWriter` already has a write lock on the index, so if the conditions are right, let's cache a `Searcher`, override its closing method to keep it open, and close it on `_finish()` if it's around. --- src/whoosh/writing.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/whoosh/writing.py b/src/whoosh/writing.py index 8d466391..d23bfea2 100644 --- a/src/whoosh/writing.py +++ b/src/whoosh/writing.py @@ -545,6 +545,7 @@ def __init__(self, ix, poolclass=None, timeout=0.0, delay=0.1, _lk=True, self.merge = True self.optimize = False self.mergetype = None + self._searcher = None def __repr__(self): # Author: Ronald Evers @@ -809,6 +810,18 @@ def per_document_reader(self): raise Exception("Per-doc writer is still open") return self.codec.per_document_reader(self.storage, self.get_segment()) + def searcher(self, **kwargs): + # If possible, cache a Searcher that doesn't close until we want it to. + # We have a write lock, nothing is changing. Only cache if kwargs is emtpy. + if kwargs: + return super(SegmentWriter, self).searcher(**kwargs) + + if self._searcher is None: + s = self._searcher = super(SegmentWriter, self).searcher() + s._orig_close = s.close # called in _finish() + s.close = lambda: None + return self._searcher + # The following methods break out the commit functionality into smaller # pieces to allow MpWriter to call them individually @@ -890,6 +903,10 @@ def _commit_toc(self, segments): clean_files(self.storage, self.indexname, self.generation, segments) def _finish(self): + if self._searcher is not None: + # Close the cached Searcher if we have one. + self._searcher._orig_close() + self._searcher = None self._tempstorage.destroy() if self.writelock: self.writelock.release() From 5870705621dcd40bf48edaa6bd50b392dba5f99a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alejandro=20Sede=C3=B1o?= Date: Wed, 16 Jun 2021 09:30:54 -0400 Subject: [PATCH 2/2] more safeguards Also don't cache a `Searcher` if the `SegmentWriter` is closed. --- src/whoosh/writing.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/whoosh/writing.py b/src/whoosh/writing.py index d23bfea2..838258d2 100644 --- a/src/whoosh/writing.py +++ b/src/whoosh/writing.py @@ -812,12 +812,14 @@ def per_document_reader(self): def searcher(self, **kwargs): # If possible, cache a Searcher that doesn't close until we want it to. - # We have a write lock, nothing is changing. Only cache if kwargs is emtpy. - if kwargs: + # We have a write lock, nothing is changing. Only cache if kwargs is emtpy + # and the SegmentWriter is still open. + if kwargs or self.is_closed: return super(SegmentWriter, self).searcher(**kwargs) if self._searcher is None: - s = self._searcher = super(SegmentWriter, self).searcher() + s = super(SegmentWriter, self).searcher() + self._searcher = s s._orig_close = s.close # called in _finish() s.close = lambda: None return self._searcher