From 67cc87309e804ebd0519ba4be332933483dd3ac9 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Thu, 4 Apr 2024 22:52:41 +0200 Subject: [PATCH 1/9] gather searchables independentyl and sum them up Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../conquery/apiv1/FilterTemplate.java | 2 +- .../com/bakdata/conquery/apiv1/LabelMap.java | 70 ++++++++++++++++++ .../conquery/models/datasets/Column.java | 2 +- .../models/datasets/concepts/Searchable.java | 19 +---- .../filters/specific/SelectFilter.java | 72 ++++--------------- .../models/jobs/UpdateFilterSearchJob.java | 13 ++-- .../conquery/models/query/FilterSearch.java | 38 +++++----- .../resources/api/ConceptsProcessor.java | 30 ++++---- 8 files changed, 125 insertions(+), 121 deletions(-) create mode 100644 backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java b/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java index 03cda498ba..df4925e718 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java @@ -40,7 +40,7 @@ @ToString @Slf4j @CPSType(id = "CSV_TEMPLATE", base = SearchIndex.class) -public class FilterTemplate extends IdentifiableImpl implements Searchable, SearchIndex { +public class FilterTemplate extends IdentifiableImpl implements Searchable, SearchIndex { private static final long serialVersionUID = 1L; diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java new file mode 100644 index 0000000000..45361026af --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java @@ -0,0 +1,70 @@ +package com.bakdata.conquery.apiv1; + +import java.util.List; +import java.util.stream.Collectors; + +import com.bakdata.conquery.apiv1.frontend.FrontendValue; +import com.bakdata.conquery.io.storage.NamespaceStorage; +import com.bakdata.conquery.models.config.IndexConfig; +import com.bakdata.conquery.models.datasets.concepts.Searchable; +import com.bakdata.conquery.models.identifiable.ids.specific.FilterId; +import com.bakdata.conquery.models.query.FilterSearch; +import com.bakdata.conquery.util.search.TrieSearch; +import com.google.common.collect.BiMap; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.experimental.Delegate; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.time.StopWatch; + +@Getter +@RequiredArgsConstructor +@Slf4j +public class LabelMap implements Searchable { + + private final FilterId id; + @Delegate + private final BiMap delegate; + private final int minSuffixLength; + private final boolean generateSearchSuffixes; + + @Override + public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { + + final TrieSearch search = config.createTrieSearch(true); + + final List collected = delegate.entrySet().stream() + .map(entry -> new FrontendValue(entry.getKey(), entry.getValue())) + .collect(Collectors.toList()); + + if (log.isTraceEnabled()) { + log.trace("Labels for {}: `{}`", getId(), collected.stream().map(FrontendValue::toString).collect(Collectors.toList())); + } + + StopWatch timer = StopWatch.createStarted(); + log.trace("START-SELECT ADDING_ITEMS for {}", getId()); + + collected.forEach(feValue -> search.addItem(feValue, FilterSearch.extractKeywords(feValue))); + + log.trace("DONE-SELECT ADDING_ITEMS for {} in {}", getId(), timer); + + timer.reset(); + log.trace("START-SELECT SHRINKING for {}", getId()); + + search.shrinkToFit(); + + log.trace("DONE-SELECT SHRINKING for {} in {}", getId(), timer); + + return search; + } + + @Override + public boolean isGenerateSuffixes() { + return generateSearchSuffixes; + } + + @Override + public boolean isSearchDisabled() { + return false; + } +} diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java index 6859976095..b0a167ce76 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java @@ -26,7 +26,7 @@ @Setter @NoArgsConstructor @Slf4j -public class Column extends Labeled implements NamespacedIdentifiable, Searchable { +public class Column extends Labeled implements NamespacedIdentifiable, Searchable { public static final int UNKNOWN_POSITION = -1; diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java index ee8c13d5ab..190d9047a2 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java @@ -7,38 +7,21 @@ import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; -import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.identifiable.Identifiable; -import com.bakdata.conquery.models.identifiable.ids.Id; import com.bakdata.conquery.models.query.FilterSearch; import com.bakdata.conquery.util.search.TrieSearch; -import com.fasterxml.jackson.annotation.JsonIgnore; /** * @implNote This class is tightly coupled with {@link FilterSearch} and {@link com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter}. *

* Searchable classes describe how a search should be constructed, and provide the values with getSearchValues. */ -public interface Searchable>> extends Identifiable { - - public Dataset getDataset(); +public interface Searchable { /** * All available {@link FrontendValue}s for searching in a {@link TrieSearch}. */ TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage); - /** - * The actual Searchables to use, if there is potential for deduplication/pooling. - * - * @implSpec The order of objects returned is used to also sort search results from different sources. - */ - @JsonIgnore - default List> getSearchReferences() { - //Hopefully the only candidate will be Column - return List.of(this); - } - /** * Parameter used in the construction of {@link com.bakdata.conquery.util.search.TrieSearch}, defining the shortest suffix to create. * Ignored if isGenerateSuffixes is true. diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java index a3aab711a1..3e5a155abe 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java @@ -7,20 +7,16 @@ import java.util.stream.Collectors; import com.bakdata.conquery.apiv1.FilterTemplate; +import com.bakdata.conquery.apiv1.LabelMap; import com.bakdata.conquery.apiv1.frontend.FrontendFilterConfiguration; import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.jackson.View; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.ConqueryConfig; -import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.datasets.concepts.filters.SingleColumnFilter; import com.bakdata.conquery.models.events.MajorTypeId; import com.bakdata.conquery.models.exceptions.ConceptConfigurationException; -import com.bakdata.conquery.models.identifiable.ids.specific.FilterId; -import com.bakdata.conquery.models.query.FilterSearch; -import com.bakdata.conquery.util.search.TrieSearch; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.google.common.collect.BiMap; @@ -30,7 +26,6 @@ import lombok.NoArgsConstructor; import lombok.Setter; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.time.StopWatch; import org.jetbrains.annotations.NotNull; @Setter @@ -38,7 +33,7 @@ @NoArgsConstructor @Slf4j @JsonIgnoreProperties({"searchType"}) -public abstract class SelectFilter extends SingleColumnFilter implements Searchable { +public abstract class SelectFilter extends SingleColumnFilter { /** * user given mapping from the values in the columns to shown labels @@ -71,19 +66,25 @@ public void configureFrontend(FrontendFilterConfiguration.Top f, ConqueryConfig @JsonIgnore public abstract String getFilterType(); - @Override - public List> getSearchReferences() { - final List> out = new ArrayList<>(); + + /** + * The actual Searchables to use, if there is potential for deduplication/pooling. + * + * @implSpec The order of objects returned is used to also sort search results from different sources. + */ + @JsonIgnore + public List getSearchReferences() { + final List out = new ArrayList<>(); if (getTemplate() != null) { out.add(getTemplate()); } if (!labels.isEmpty()) { - out.add(this); + out.add(new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes)); } - out.addAll(getColumn().getSearchReferences()); + out.add(getColumn()); return out; } @@ -105,51 +106,4 @@ public boolean isNotUsingTemplateAndLabels() { return (getTemplate() == null) != labels.isEmpty(); } - - @Override - @JsonIgnore - public boolean isGenerateSuffixes() { - return generateSearchSuffixes; - } - - @Override - @JsonIgnore - public int getMinSuffixLength() { - return searchMinSuffixLength; - } - - /** - * Does not make sense to distinguish at Filter level since it's only referenced when labels are set. - */ - @Override - @JsonIgnore - public boolean isSearchDisabled() { - return false; - } - - @Override - public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { - - final TrieSearch search = config.createTrieSearch(true); - - if(log.isTraceEnabled()) { - log.trace("Labels for {}: `{}`", getId(), collectLabels().stream().map(FrontendValue::toString).collect(Collectors.toList())); - } - - StopWatch timer = StopWatch.createStarted(); - log.trace("START-SELECT ADDING_ITEMS for {}", getId()); - - collectLabels().forEach(feValue -> search.addItem(feValue, FilterSearch.extractKeywords(feValue))); - - log.trace("DONE-SELECT ADDING_ITEMS for {} in {}", getId(), timer); - - timer.reset(); - log.trace("START-SELECT SHRINKING for {}", getId()); - - search.shrinkToFit(); - - log.trace("DONE-SELECT SHRINKING for {} in {}", getId(), timer); - - return search; - } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java index c23d065b9b..9ec67f02eb 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java @@ -1,6 +1,5 @@ package com.bakdata.conquery.models.jobs; -import java.time.Duration; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -71,7 +70,7 @@ public void execute() throws Exception { // Unfortunately the is no ClassToInstanceMultimap yet - final Map, Set>> collectedSearchables = + final Map, Set> collectedSearchables = allSelectFilters.stream() .map(SelectFilter::getSearchReferences) .flatMap(Collection::stream) @@ -85,12 +84,12 @@ public void execute() throws Exception { // Most computations are cheap but data intensive: we fork here to use as many cores as possible. final ExecutorService service = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1); - final HashMap, TrieSearch> searchCache = new HashMap<>(); - final Map, TrieSearch> synchronizedResult = Collections.synchronizedMap(searchCache); + final HashMap> searchCache = new HashMap<>(); + final Map> synchronizedResult = Collections.synchronizedMap(searchCache); log.debug("Found {} searchable Objects.", collectedSearchables.values().stream().mapToLong(Set::size).sum()); - for (Searchable searchable : collectedSearchables.getOrDefault(Searchable.class, Collections.emptySet())) { + for (Searchable searchable : collectedSearchables.getOrDefault(Searchable.class, Collections.emptySet())) { if (searchable instanceof Column column) { throw new IllegalStateException("Columns should have been grouped out previously"); } @@ -99,7 +98,7 @@ public void execute() throws Exception { final StopWatch watch = StopWatch.createStarted(); - log.info("BEGIN collecting entries for `{}`", searchable.getId()); + log.info("BEGIN collecting entries for `{}`", searchable); try { final TrieSearch search = searchable.createTrieSearch(indexConfig, storage); @@ -109,7 +108,7 @@ public void execute() throws Exception { log.debug( "DONE collecting {} entries for `{}`, within {}", search.calculateSize(), - searchable.getId(), + searchable, watch ); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java index 8c6522eb3c..378403fb1b 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java @@ -40,8 +40,8 @@ public class FilterSearch { * In the code below, the keys of this map will usually be called "reference". */ @JsonIgnore - private Map, TrieSearch> searchCache = new HashMap<>(); - private Object2LongMap> totals = Object2LongMaps.emptyMap(); + private Map> searchCache = new HashMap<>(); + private Object2LongMap totals = Object2LongMaps.emptyMap(); /** * From a given {@link FrontendValue} extract all relevant keywords. @@ -62,11 +62,11 @@ public static List extractKeywords(FrontendValue value) { /** * For a {@link SelectFilter} collect all relevant {@link TrieSearch}. */ - public final List> getSearchesFor(Searchable searchable) { - final List> references = searchable.getSearchReferences(); + public final List> getSearchesFor(SelectFilter searchable) { + final List references = searchable.getSearchReferences(); if (log.isTraceEnabled()) { - log.trace("Got {} as searchables for {}", references.stream().map(Searchable::getId).collect(Collectors.toList()), searchable.getId()); + log.trace("Got {} as searchables for {}", references.stream().map(Searchable::toString).collect(Collectors.toList()), searchable.getId()); } return references.stream() @@ -75,15 +75,15 @@ public final List> getSearchesFor(Searchable search .collect(Collectors.toList()); } - public long getTotal(Searchable searchable) { - return totals.getOrDefault(searchable, 0); + public long getTotal(SelectFilter searchable) { + return searchable.getSearchReferences().stream().mapToLong(totals::getLong).sum(); } /** * Add ready searches to the cache. This assumes that the search already has been shrunken. */ - public synchronized void addSearches(Map, TrieSearch> searchCache) { + public synchronized void addSearches(Map> searchCache) { this.searchCache.putAll(searchCache); log.debug("BEGIN counting Search totals."); @@ -97,7 +97,7 @@ public synchronized void addSearches(Map, TrieSearch searchable, Collection values) { + public void registerValues(Searchable searchable, Collection values) { TrieSearch search = searchCache.computeIfAbsent(searchable, (ignored) -> searchable.createTrieSearch(indexConfig, storage)); synchronized (search) { @@ -110,8 +110,8 @@ public void registerValues(Searchable searchable, Collection values) } } - private void updateTotals(Map, TrieSearch> searchCache) { - final Object2LongOpenHashMap> partialTotals = new Object2LongOpenHashMap<>(searchCache.keySet().stream() + private void updateTotals(Map> searchCache) { + final Object2LongOpenHashMap partialTotals = new Object2LongOpenHashMap<>(searchCache.keySet().stream() .collect(Collectors.toMap( Functions.identity(), this::getTotalFor @@ -119,21 +119,17 @@ private void updateTotals(Map, TrieSearch> searchCa totals.putAll(partialTotals); } - private long getTotalFor(Searchable searchable) { - return searchable.getSearchReferences() - .stream() - .map(searchCache::get) - .filter(Objects::nonNull) // Failed or disabled searches are null - .flatMap(TrieSearch::stream) - .mapToInt(FrontendValue::hashCode) - .distinct() - .count(); + private long getTotalFor(Searchable searchable) { + return searchCache.get(searchable).stream() + .mapToInt(FrontendValue::hashCode) // TODO Use distinct directly? + .distinct() + .count(); } /** * Shrink the memory footprint of a search. After this action, no values can be registered anymore to a search. */ - public void shrinkSearch(Searchable searchable) { + public void shrinkSearch(Searchable searchable) { final TrieSearch search = getSearchCache().get(searchable); if (search == null) { diff --git a/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java b/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java index a1a16e1696..02b13d50e2 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java @@ -30,8 +30,6 @@ import com.bakdata.conquery.models.datasets.PreviewConfig; import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.datasets.concepts.FrontEndConceptBuilder; -import com.bakdata.conquery.models.datasets.concepts.Searchable; -import com.bakdata.conquery.models.datasets.concepts.filters.Filter; import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeChild; import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept; @@ -79,14 +77,14 @@ public FrontendList load(Concept concept) { /** * Cache of all search results on SelectFilters. */ - private final LoadingCache, String>, List> + private final LoadingCache, String>, List> searchResults = CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() { @Override - public List load(Pair, String> filterAndSearch) { + public List load(Pair, String> filterAndSearch) { final String searchTerm = filterAndSearch.getValue(); - final Searchable searchable = filterAndSearch.getKey(); + final SelectFilter searchable = filterAndSearch.getKey(); log.trace("Calculating a new search cache for the term \"{}\" on Searchable[{}]", searchTerm, searchable.getId()); @@ -98,9 +96,9 @@ public List load(Pair, String> filterAndSearch) { * Cache of raw listing of values on a filter. * We use Cursor here to reduce strain on memory and increase response time. */ - private final LoadingCache, CursorAndLength> listResults = CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() { + private final LoadingCache, CursorAndLength> listResults = CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() { @Override - public CursorAndLength load(Searchable searchable) { + public CursorAndLength load(SelectFilter searchable) { log.trace("Creating cursor for `{}`", searchable.getId()); return new CursorAndLength(listAllValues(searchable), countAllValues(searchable)); } @@ -160,7 +158,7 @@ public FrontendPreviewConfig getEntityPreviewFrontendConfig(Dataset dataset) { * Search for all search terms at once, with stricter scoring. * The user will upload a file and expect only well-corresponding resolutions. */ - public ResolvedFilterValues resolveFilterValues(Searchable searchable, List searchTerms) { + public ResolvedFilterValues resolveFilterValues(SelectFilter searchable, List searchTerms) { // search in the full text engine final Set openSearchTerms = new HashSet<>(searchTerms); @@ -184,13 +182,17 @@ public ResolvedFilterValues resolveFilterValues(Searchable searchable, List searchable, Optional maybeText, OptionalInt pageNumberOpt, OptionalInt itemsPerPageOpt) { + public AutoCompleteResult autocompleteTextFilter( + SelectFilter searchable, + Optional maybeText, + OptionalInt pageNumberOpt, + OptionalInt itemsPerPageOpt + ) { final int pageNumber = pageNumberOpt.orElse(0); final int itemsPerPage = itemsPerPageOpt.orElse(50); @@ -226,7 +228,7 @@ public AutoCompleteResult autocompleteTextFilter(Searchable searchable, Optio } } - private Cursor listAllValues(Searchable searchable) { + private Cursor listAllValues(SelectFilter searchable) { final Namespace namespace = namespaces.get(searchable.getDataset().getId()); /* Don't worry, I am as confused as you are! @@ -251,7 +253,7 @@ private Cursor listAllValues(Searchable searchable) { return new Cursor<>(Iterators.filter(iterators, seen::add)); } - private long countAllValues(Searchable searchable) { + private long countAllValues(SelectFilter searchable) { final Namespace namespace = namespaces.get(searchable.getDataset().getId()); return namespace.getFilterSearch().getTotal(searchable); @@ -261,7 +263,7 @@ private long countAllValues(Searchable searchable) { * Autocompletion for search terms. For values of {@link SelectFilter }. * Is used by the serach cache to load missing items */ - private List autocompleteTextFilter(Searchable searchable, String text) { + private List autocompleteTextFilter(SelectFilter searchable, String text) { final Namespace namespace = namespaces.get(searchable.getDataset().getId()); // Note that FEValues is equals/hashcode only on value: From bfd703a15322f6072bbeba3895ebdb5f836cef20 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 08:46:59 +0200 Subject: [PATCH 2/9] removes unecessary members from FilterSearch Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../java/com/bakdata/conquery/apiv1/FilterTemplate.java | 3 +-- .../main/java/com/bakdata/conquery/apiv1/LabelMap.java | 3 +-- .../java/com/bakdata/conquery/mode/NamespaceHandler.java | 2 +- .../java/com/bakdata/conquery/models/datasets/Column.java | 3 +-- .../conquery/models/datasets/concepts/Searchable.java | 3 +-- .../conquery/models/jobs/UpdateFilterSearchJob.java | 6 +++--- .../com/bakdata/conquery/models/query/FilterSearch.java | 8 +------- 7 files changed, 9 insertions(+), 19 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java b/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java index df4925e718..955b7cb42c 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java @@ -8,7 +8,6 @@ import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.cps.CPSType; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.datasets.concepts.Searchable; @@ -90,7 +89,7 @@ public boolean isSearchDisabled() { return false; } - public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { + public TrieSearch createTrieSearch(IndexConfig config) { final URI resolvedURI = FileUtil.getResolvedUri(config.getBaseUrl(), getFilePath()); log.trace("Resolved filter template reference url for search '{}': {}", this.getId(), resolvedURI); diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java index 45361026af..44dff45a8c 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java @@ -4,7 +4,6 @@ import java.util.stream.Collectors; import com.bakdata.conquery.apiv1.frontend.FrontendValue; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.identifiable.ids.specific.FilterId; @@ -29,7 +28,7 @@ public class LabelMap implements Searchable { private final boolean generateSearchSuffixes; @Override - public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { + public TrieSearch createTrieSearch(IndexConfig config) { final TrieSearch search = config.createTrieSearch(true); diff --git a/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java b/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java index 2d5740e4d1..8f3e4eb1e4 100644 --- a/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java +++ b/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java @@ -46,7 +46,7 @@ static NamespaceSetupData createNamespaceSetup(NamespaceStorage storage, final C JobManager jobManager = new JobManager(storage.getDataset().getName(), config.isFailOnError()); - FilterSearch filterSearch = new FilterSearch(storage, jobManager, config.getCsv(), config.getIndex()); + FilterSearch filterSearch = new FilterSearch(config.getIndex()); return new NamespaceSetupData(injectables, indexService, communicationMapper, preprocessMapper, jobManager, filterSearch); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java index b0a167ce76..5a7180976f 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java @@ -5,7 +5,6 @@ import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.events.MajorTypeId; @@ -75,7 +74,7 @@ public Dataset getDataset() { * We create only an empty search here, because the content is provided through {@link com.bakdata.conquery.models.messages.namespaces.specific.RegisterColumnValues} and filled by the caller. */ @Override - public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { + public TrieSearch createTrieSearch(IndexConfig config) { return config.createTrieSearch(isGenerateSuffixes()); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java index 190d9047a2..931655fd94 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java @@ -5,7 +5,6 @@ import javax.validation.constraints.Min; import com.bakdata.conquery.apiv1.frontend.FrontendValue; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.query.FilterSearch; import com.bakdata.conquery.util.search.TrieSearch; @@ -20,7 +19,7 @@ public interface Searchable { /** * All available {@link FrontendValue}s for searching in a {@link TrieSearch}. */ - TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage); + TrieSearch createTrieSearch(IndexConfig config); /** * Parameter used in the construction of {@link com.bakdata.conquery.util.search.TrieSearch}, defining the shortest suffix to create. diff --git a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java index 9ec67f02eb..8313b4de2d 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java @@ -90,7 +90,7 @@ public void execute() throws Exception { log.debug("Found {} searchable Objects.", collectedSearchables.values().stream().mapToLong(Set::size).sum()); for (Searchable searchable : collectedSearchables.getOrDefault(Searchable.class, Collections.emptySet())) { - if (searchable instanceof Column column) { + if (searchable instanceof Column) { throw new IllegalStateException("Columns should have been grouped out previously"); } @@ -101,7 +101,7 @@ public void execute() throws Exception { log.info("BEGIN collecting entries for `{}`", searchable); try { - final TrieSearch search = searchable.createTrieSearch(indexConfig, storage); + final TrieSearch search = searchable.createTrieSearch(indexConfig); synchronizedResult.put(searchable, search); @@ -119,7 +119,7 @@ public void execute() throws Exception { }); } - // The following cast is save + // The following cast is safe final Set searchableColumns = (Set) collectedSearchables.getOrDefault(Column.class, Collections.emptySet()); log.debug("Start collecting column values: {}", Arrays.toString(searchableColumns.toArray())); registerColumnValuesInSearch.accept(searchableColumns); diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java index 378403fb1b..89a8311e7e 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java @@ -9,12 +9,9 @@ import java.util.stream.Collectors; import com.bakdata.conquery.apiv1.frontend.FrontendValue; -import com.bakdata.conquery.io.storage.NamespaceStorage; -import com.bakdata.conquery.models.config.CSVConfig; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; -import com.bakdata.conquery.models.jobs.JobManager; import com.bakdata.conquery.util.search.TrieSearch; import com.fasterxml.jackson.annotation.JsonIgnore; import com.google.common.base.Functions; @@ -29,9 +26,6 @@ @Data public class FilterSearch { - private final NamespaceStorage storage; - private final JobManager jobManager; - private final CSVConfig parserConfig; private final IndexConfig indexConfig; /** @@ -98,7 +92,7 @@ public synchronized void addSearches(Map> * prevents from adding new values. */ public void registerValues(Searchable searchable, Collection values) { - TrieSearch search = searchCache.computeIfAbsent(searchable, (ignored) -> searchable.createTrieSearch(indexConfig, storage)); + TrieSearch search = searchCache.computeIfAbsent(searchable, (ignored) -> searchable.createTrieSearch(indexConfig)); synchronized (search) { values.stream() From fff0bbab87d70be083094c51293e03046ae686ae Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:01:03 +0200 Subject: [PATCH 3/9] generate hashcode for LabelMap Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java index 44dff45a8c..51663d3dfd 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java @@ -10,6 +10,7 @@ import com.bakdata.conquery.models.query.FilterSearch; import com.bakdata.conquery.util.search.TrieSearch; import com.google.common.collect.BiMap; +import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.experimental.Delegate; @@ -19,6 +20,7 @@ @Getter @RequiredArgsConstructor @Slf4j +@EqualsAndHashCode public class LabelMap implements Searchable { private final FilterId id; From 85a86e39db7d9e333ce0e67c944142db376cb94c Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:03:23 +0200 Subject: [PATCH 4/9] adds FilterSearchTest Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../filters/specific/SelectFilter.java | 9 ++- .../models/jobs/UpdateFilterSearchJob.java | 2 +- .../conquery/models/query/FilterSearch.java | 13 ++-- .../conquery/service/FilterSearchTest.java | 69 +++++++++++++++++++ 4 files changed, 85 insertions(+), 8 deletions(-) create mode 100644 backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java index 3e5a155abe..6e192b13b1 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java @@ -47,6 +47,9 @@ public abstract class SelectFilter extends SingleColumnFilter private int searchMinSuffixLength = 3; private boolean generateSearchSuffixes = true; + @JsonIgnore + private transient LabelMap searchableLabels; + @Override public EnumSet getAcceptedColumnTypes() { return EnumSet.of(MajorTypeId.STRING); @@ -81,7 +84,11 @@ public List getSearchReferences() { } if (!labels.isEmpty()) { - out.add(new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes)); + if (searchableLabels == null) { + // No synchronization here should be fine LabelMap and its delegate BiMap implement hash code + searchableLabels = new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes); + } + out.add(searchableLabels); } out.add(getColumn()); diff --git a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java index 8313b4de2d..ef893d86ab 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java @@ -59,7 +59,7 @@ public void execute() throws Exception { log.info("BEGIN loading SourceSearch"); - // collect all SelectFilters to the create searches for them + // collect all SelectFilters to create searches for them final List> allSelectFilters = storage.getAllConcepts().stream() .flatMap(c -> c.getConnectors().stream()) diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java index 89a8311e7e..64fc5a77fb 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java @@ -16,16 +16,17 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.google.common.base.Functions; import it.unimi.dsi.fastutil.objects.Object2LongMap; -import it.unimi.dsi.fastutil.objects.Object2LongMaps; import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap; -import lombok.Data; +import lombok.Getter; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @Slf4j -@Data +@RequiredArgsConstructor public class FilterSearch { + @Getter private final IndexConfig indexConfig; /** @@ -35,7 +36,7 @@ public class FilterSearch { */ @JsonIgnore private Map> searchCache = new HashMap<>(); - private Object2LongMap totals = Object2LongMaps.emptyMap(); + private Object2LongMap totals = new Object2LongOpenHashMap<>(); /** * From a given {@link FrontendValue} extract all relevant keywords. @@ -124,7 +125,7 @@ private long getTotalFor(Searchable searchable) { * Shrink the memory footprint of a search. After this action, no values can be registered anymore to a search. */ public void shrinkSearch(Searchable searchable) { - final TrieSearch search = getSearchCache().get(searchable); + final TrieSearch search = searchCache.get(searchable); if (search == null) { log.warn("Searchable has no search associated: {}", searchable); @@ -135,6 +136,6 @@ public void shrinkSearch(Searchable searchable) { public synchronized void clearSearch() { totals = new Object2LongOpenHashMap<>(); - searchCache.clear(); + searchCache = new HashMap<>(); } } diff --git a/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java b/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java new file mode 100644 index 0000000000..cf58a8305a --- /dev/null +++ b/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java @@ -0,0 +1,69 @@ +package com.bakdata.conquery.service; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.List; +import java.util.Map; + +import com.bakdata.conquery.models.config.IndexConfig; +import com.bakdata.conquery.models.datasets.Column; +import com.bakdata.conquery.models.datasets.Dataset; +import com.bakdata.conquery.models.datasets.Table; +import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; +import com.bakdata.conquery.models.datasets.concepts.filters.specific.SingleSelectFilter; +import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeConnector; +import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept; +import com.bakdata.conquery.models.query.FilterSearch; +import com.google.common.collect.ImmutableBiMap; +import org.junit.jupiter.api.Test; + +public class FilterSearchTest { + + @Test + public void totals() { + final IndexConfig indexConfig = new IndexConfig(); + FilterSearch search = new FilterSearch(indexConfig); + + // Column Searchable + SelectFilter filter = new SingleSelectFilter(); + ConceptTreeConnector connector = new ConceptTreeConnector(); + TreeConcept concept = new TreeConcept(); + Column column = new Column(); + Table table = new Table(); + Dataset dataset = new Dataset("test_dataset"); + + table.setName("test_table"); + table.setDataset(dataset); + concept.setDataset(dataset); + concept.setName("test_concept"); + concept.setConnectors(List.of(connector)); + connector.setName("test_connector"); + connector.setFilters(List.of(filter)); + connector.setConcept(concept); + column.setTable(table); + column.setName("test_column"); + filter.setColumn(column); + filter.setConnector(connector); + + + // Map Searchable + filter.setLabels(ImmutableBiMap.of( + "mm", "MM", + "nn", "NN" + )); + + // Register + filter.getSearchReferences().forEach(searchable -> { + search.addSearches(Map.of(searchable, searchable.createTrieSearch(indexConfig))); + }); + + search.registerValues(column, List.of( + "a", + "bb", + "cc" + )); + search.shrinkSearch(column); + + assertThat(search.getTotal(filter)).isEqualTo(5); + } +} From 8d4c0271055b2d087865b8f00c1c009b31169d49 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:33:52 +0200 Subject: [PATCH 5/9] request all search totals after UpdateFilterSearchJob Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../models/jobs/UpdateFilterSearchJob.java | 18 ++++++---- .../specific/CollectColumnValuesJob.java | 6 ++++ .../specific/FinalizeReactionMessage.java | 7 +++- .../conquery/models/query/FilterSearch.java | 33 ++++--------------- 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java index ef893d86ab..e980c21b1f 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java @@ -27,6 +27,7 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.time.StopWatch; +import org.jetbrains.annotations.NotNull; /** * Job that initializes the filter search for the frontend. @@ -61,12 +62,7 @@ public void execute() throws Exception { // collect all SelectFilters to create searches for them final List> allSelectFilters = - storage.getAllConcepts().stream() - .flatMap(c -> c.getConnectors().stream()) - .flatMap(co -> co.collectAllFilters().stream()) - .filter(SelectFilter.class::isInstance) - .map(f -> ((SelectFilter) f)) - .collect(Collectors.toList()); + getAllSelectFilters(storage); // Unfortunately the is no ClassToInstanceMultimap yet @@ -145,6 +141,16 @@ public void execute() throws Exception { } + @NotNull + public static List> getAllSelectFilters(NamespaceStorage storage) { + return storage.getAllConcepts().stream() + .flatMap(c -> c.getConnectors().stream()) + .flatMap(co -> co.collectAllFilters().stream()) + .filter(SelectFilter.class::isInstance) + .map(f -> ((SelectFilter) f)) + .collect(Collectors.toList()); + } + @Override public String getLabel() { return "UpdateFilterSearchJob"; diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java index 58058b92f9..84a9bdef53 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java @@ -16,6 +16,7 @@ import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.stores.root.StringStore; +import com.bakdata.conquery.models.jobs.UpdateFilterSearchJob; import com.bakdata.conquery.models.messages.namespaces.ActionReactionMessage; import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; import com.bakdata.conquery.models.messages.namespaces.WorkerMessage; @@ -103,5 +104,10 @@ public void afterAllReaction() { log.debug("{} shrinking searches", this); final FilterSearch filterSearch = namespace.getFilterSearch(); columns.forEach(filterSearch::shrinkSearch); + + + log.info("BEGIN counting Search totals."); + UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage()).forEach(namespace.getFilterSearch()::getTotal); + log.debug("FINISHED counting Search totals."); } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/FinalizeReactionMessage.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/FinalizeReactionMessage.java index fb383ccacc..e047a56963 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/FinalizeReactionMessage.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/FinalizeReactionMessage.java @@ -5,6 +5,7 @@ import com.bakdata.conquery.io.cps.CPSType; import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; import com.bakdata.conquery.models.messages.ReactionMessage; +import com.bakdata.conquery.models.messages.namespaces.ActionReactionMessage; import com.bakdata.conquery.models.messages.namespaces.NamespaceMessage; import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; import com.bakdata.conquery.models.worker.DistributedNamespace; @@ -14,12 +15,16 @@ import lombok.ToString; import lombok.extern.slf4j.Slf4j; + +/** + * Use {@link ActionReactionMessage#afterAllReaction()} to processing on initiator side after all reactions where collected. + */ @CPSType(id = "FINALIZE_REACTION_MESSAGE", base = NamespacedMessage.class) @AllArgsConstructor(onConstructor_ = @JsonCreator) @Getter @Slf4j @ToString -public class FinalizeReactionMessage extends NamespaceMessage implements ReactionMessage { +public final class FinalizeReactionMessage extends NamespaceMessage implements ReactionMessage { private UUID callerId; diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java index 64fc5a77fb..46273c979f 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java @@ -14,7 +14,6 @@ import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; import com.bakdata.conquery.util.search.TrieSearch; import com.fasterxml.jackson.annotation.JsonIgnore; -import com.google.common.base.Functions; import it.unimi.dsi.fastutil.objects.Object2LongMap; import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap; import lombok.Getter; @@ -36,7 +35,7 @@ public class FilterSearch { */ @JsonIgnore private Map> searchCache = new HashMap<>(); - private Object2LongMap totals = new Object2LongOpenHashMap<>(); + private Object2LongMap> totals = new Object2LongOpenHashMap<>(); /** * From a given {@link FrontendValue} extract all relevant keywords. @@ -70,8 +69,12 @@ public final List> getSearchesFor(SelectFilter sear .collect(Collectors.toList()); } - public long getTotal(SelectFilter searchable) { - return searchable.getSearchReferences().stream().mapToLong(totals::getLong).sum(); + public long getTotal(SelectFilter filter) { + return totals.computeIfAbsent(filter, (f) -> filter.getSearchReferences().stream() + .map(searchCache::get) + .flatMap(TrieSearch::stream) + .distinct() + .count()); } @@ -81,9 +84,6 @@ public long getTotal(SelectFilter searchable) { public synchronized void addSearches(Map> searchCache) { this.searchCache.putAll(searchCache); - log.debug("BEGIN counting Search totals."); - // Precompute totals as that can be slow when doing it on-demand. - updateTotals(searchCache); } @@ -99,28 +99,9 @@ public void registerValues(Searchable searchable, Collection values) { values.stream() .map(value -> new FrontendValue(value, value)) .forEach(value -> search.addItem(value, extractKeywords(value))); - - // Update totals for search - totals.put(searchable, getTotalFor(searchable)); } } - private void updateTotals(Map> searchCache) { - final Object2LongOpenHashMap partialTotals = new Object2LongOpenHashMap<>(searchCache.keySet().stream() - .collect(Collectors.toMap( - Functions.identity(), - this::getTotalFor - ))); - totals.putAll(partialTotals); - } - - private long getTotalFor(Searchable searchable) { - return searchCache.get(searchable).stream() - .mapToInt(FrontendValue::hashCode) // TODO Use distinct directly? - .distinct() - .count(); - } - /** * Shrink the memory footprint of a search. After this action, no values can be registered anymore to a search. */ From 1ec91d8d36ad5fec484f45255a63b8335ca92e5e Mon Sep 17 00:00:00 2001 From: awildturtok <1553491+awildturtok@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:35:13 +0200 Subject: [PATCH 6/9] Update backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java --- .../java/com/bakdata/conquery/service/FilterSearchTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java b/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java index cf58a8305a..825c13df7c 100644 --- a/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java +++ b/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java @@ -60,7 +60,8 @@ public void totals() { search.registerValues(column, List.of( "a", "bb", - "cc" + "cc", + "mm" )); search.shrinkSearch(column); From ece2ced46d0215feaff8c129e51a6e71e3686430 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:40:32 +0200 Subject: [PATCH 7/9] don't cache label map in SelectFilter Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../datasets/concepts/filters/specific/SelectFilter.java | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java index 6e192b13b1..3e5a155abe 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java @@ -47,9 +47,6 @@ public abstract class SelectFilter extends SingleColumnFilter private int searchMinSuffixLength = 3; private boolean generateSearchSuffixes = true; - @JsonIgnore - private transient LabelMap searchableLabels; - @Override public EnumSet getAcceptedColumnTypes() { return EnumSet.of(MajorTypeId.STRING); @@ -84,11 +81,7 @@ public List getSearchReferences() { } if (!labels.isEmpty()) { - if (searchableLabels == null) { - // No synchronization here should be fine LabelMap and its delegate BiMap implement hash code - searchableLabels = new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes); - } - out.add(searchableLabels); + out.add(new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes)); } out.add(getColumn()); From 78de7950492e63b20ac9f350d2723d87f91f67d5 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:57:40 +0200 Subject: [PATCH 8/9] optimize imports Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../bakdata/conquery/models/datasets/concepts/Searchable.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java index 931655fd94..b6c53a1544 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java @@ -1,7 +1,5 @@ package com.bakdata.conquery.models.datasets.concepts; -import java.util.List; - import javax.validation.constraints.Min; import com.bakdata.conquery.apiv1.frontend.FrontendValue; From 2eb9ec60f34e81a196fb2017d0234f017bd302c9 Mon Sep 17 00:00:00 2001 From: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> Date: Mon, 8 Apr 2024 12:43:45 +0200 Subject: [PATCH 9/9] puts search finalizer in job Signed-off-by: Max Thonagel <12283268+thoniTUB@users.noreply.github.com> --- .../specific/CollectColumnValuesJob.java | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java index 84a9bdef53..32f1f03b97 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java @@ -16,6 +16,7 @@ import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.stores.root.StringStore; +import com.bakdata.conquery.models.jobs.SimpleJob; import com.bakdata.conquery.models.jobs.UpdateFilterSearchJob; import com.bakdata.conquery.models.messages.namespaces.ActionReactionMessage; import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; @@ -94,20 +95,29 @@ public void react(Worker context) throws Exception { log.debug("Still waiting for jobs: {} of {} done", done.get(), futures.size()); } } - + log.info("Finished collecting values from these columns: {}", Arrays.toString(columns.toArray())); context.send(new FinalizeReactionMessage(getMessageId(), context.getInfo().getId())); } @Override public void afterAllReaction() { - log.debug("{} shrinking searches", this); - final FilterSearch filterSearch = namespace.getFilterSearch(); - columns.forEach(filterSearch::shrinkSearch); - - log.info("BEGIN counting Search totals."); - UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage()).forEach(namespace.getFilterSearch()::getTotal); - log.debug("FINISHED counting Search totals."); + // Run this in a job, so it is definitely processed after UpdateFilterSearchJob + namespace.getJobManager().addSlowJob( + new SimpleJob( + "Finalize Search update", + () -> { + log.debug("{} shrinking searches", this); + final FilterSearch filterSearch = namespace.getFilterSearch(); + columns.forEach(filterSearch::shrinkSearch); + + + log.info("BEGIN counting Search totals."); + UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage()).forEach(namespace.getFilterSearch()::getTotal); + log.debug("FINISHED counting Search totals."); + } + ) + ); } }