From 5b23af934c89b855e7aafbc4e2debfd1da0a6154 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Pettersen?= Date: Fri, 25 Oct 2024 11:51:50 +0000 Subject: [PATCH] allow more hits per group with very few documents --- .../src/vespa/searchcore/proton/matching/match_master.cpp | 2 +- .../src/vespa/searchcore/proton/matching/match_params.cpp | 1 + .../src/vespa/searchcore/proton/matching/match_params.h | 1 + .../src/vespa/searchcore/proton/matching/match_tools.cpp | 6 +++--- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp index 152ba978cd10..13252e414d0a 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_master.cpp @@ -90,7 +90,7 @@ MatchMaster::match(search::engine::Trace & trace, * We need a non-const first phase rank lookup since it will be populated * later on when selecting documents for second phase ranking. */ - MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.heapSize), + MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.diversity_want_hits), mtf.get_first_phase_rank_lookup(), [&mtf]() noexcept { mtf.query().set_matching_phase(MatchingPhase::SECOND_PHASE); }); TimedMatchLoopCommunicator timedCommunicator(communicator); diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp index 316ef003a28f..bdd83ae9462c 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_params.cpp @@ -32,6 +32,7 @@ MatchParams::MatchParams(uint32_t numDocs_in, : 0), offset(std::min(numDocs_in, offset_in)), hits(std::min(numDocs_in - offset, hits_in)), + diversity_want_hits(heapSize_in), first_phase_rank_score_drop_limit(first_phase_rank_score_drop_limit_in), second_phase_rank_score_drop_limit(second_phase_rank_score_drop_limit_in) { } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_params.h b/searchcore/src/vespa/searchcore/proton/matching/match_params.h index 19abcd8e4498..a308e94be3de 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_params.h +++ b/searchcore/src/vespa/searchcore/proton/matching/match_params.h @@ -18,6 +18,7 @@ struct MatchParams { const uint32_t arraySize; const uint32_t offset; const uint32_t hits; + const uint32_t diversity_want_hits; const std::optional first_phase_rank_score_drop_limit; const std::optional second_phase_rank_score_drop_limit; diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp index c7fc684aa427..8a0bc33a2169 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_tools.cpp @@ -257,7 +257,7 @@ MatchToolsFactory::createMatchTools() const } std::unique_ptr -MatchToolsFactory::createDiversifier(uint32_t heapSize) const +MatchToolsFactory::createDiversifier(uint32_t want_hits) const { if ( !_diversityParams.enabled() ) { return {}; @@ -267,8 +267,8 @@ MatchToolsFactory::createDiversifier(uint32_t heapSize) const Issue::report("Skipping diversity due to no %s attribute.", _diversityParams.attribute.c_str()); return {}; } - size_t max_per_group = std::max(size_t(1), size_t(heapSize / _diversityParams.min_groups)); - return DiversityFilter::create(*attr, heapSize, max_per_group, _diversityParams.min_groups, + size_t max_per_group = std::max(size_t(1), size_t(want_hits / _diversityParams.min_groups)); + return DiversityFilter::create(*attr, want_hits, max_per_group, _diversityParams.min_groups, _diversityParams.cutoff_strategy == DiversityParams::CutoffStrategy::STRICT); }