Skip to content

Commit

Permalink
Merge pull request #32665 from vespa-engine/havardpe/better-diversify…
Browse files Browse the repository at this point in the history
…-with-few-documents

allow more hits per group with very few documents
  • Loading branch information
hmusum authored Oct 25, 2024
2 parents e4eea63 + 5b23af9 commit b8d2453
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ MatchMaster::match(search::engine::Trace & trace,
* We need a non-const first phase rank lookup since it will be populated
* later on when selecting documents for second phase ranking.
*/
MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.heapSize),
MatchLoopCommunicator communicator(threadBundle.size(), params.heapSize, mtf.createDiversifier(params.diversity_want_hits),
mtf.get_first_phase_rank_lookup(),
[&mtf]() noexcept { mtf.query().set_matching_phase(MatchingPhase::SECOND_PHASE); });
TimedMatchLoopCommunicator timedCommunicator(communicator);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ MatchParams::MatchParams(uint32_t numDocs_in,
: 0),
offset(std::min(numDocs_in, offset_in)),
hits(std::min(numDocs_in - offset, hits_in)),
diversity_want_hits(heapSize_in),
first_phase_rank_score_drop_limit(first_phase_rank_score_drop_limit_in),
second_phase_rank_score_drop_limit(second_phase_rank_score_drop_limit_in)
{ }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct MatchParams {
const uint32_t arraySize;
const uint32_t offset;
const uint32_t hits;
const uint32_t diversity_want_hits;
const std::optional<search::feature_t> first_phase_rank_score_drop_limit;
const std::optional<search::feature_t> second_phase_rank_score_drop_limit;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ MatchToolsFactory::createMatchTools() const
}

std::unique_ptr<IDiversifier>
MatchToolsFactory::createDiversifier(uint32_t heapSize) const
MatchToolsFactory::createDiversifier(uint32_t want_hits) const
{
if ( !_diversityParams.enabled() ) {
return {};
Expand All @@ -267,8 +267,8 @@ MatchToolsFactory::createDiversifier(uint32_t heapSize) const
Issue::report("Skipping diversity due to no %s attribute.", _diversityParams.attribute.c_str());
return {};
}
size_t max_per_group = std::max(size_t(1), size_t(heapSize / _diversityParams.min_groups));
return DiversityFilter::create(*attr, heapSize, max_per_group, _diversityParams.min_groups,
size_t max_per_group = std::max(size_t(1), size_t(want_hits / _diversityParams.min_groups));
return DiversityFilter::create(*attr, want_hits, max_per_group, _diversityParams.min_groups,
_diversityParams.cutoff_strategy == DiversityParams::CutoffStrategy::STRICT);
}

Expand Down

0 comments on commit b8d2453

Please sign in to comment.