Skip to content

Commit

Permalink
Remove idf range from weak and blueprint.
Browse files Browse the repository at this point in the history
  • Loading branch information
toregge committed Nov 29, 2024
1 parent 2c40d77 commit 1149b6a
Show file tree
Hide file tree
Showing 6 changed files with 11 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ class BlueprintBuilderVisitor :

void buildWeakAnd(ProtonWeakAnd &n) {
auto *wand = new WeakAndBlueprint(n.getTargetNumHits(),
1.0 /* weakand_range */,
_requestContext.get_create_blueprint_params().weakand_stop_word_strategy,
is_search_multi_threaded());
Blueprint::UP result(wand);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -796,10 +796,10 @@ struct make {
static make ONEAR(uint32_t window) { return make(std::make_unique<ONearBlueprint>(window)); }
static make WEAKAND(uint32_t n) { return make(std::make_unique<WeakAndBlueprint>(n)); }
static make WEAKAND_ADJUST(double limit) {
return make(std::make_unique<WeakAndBlueprint>(100, 0.0, wand::StopWordStrategy(-limit, 1.0, 0), true));
return make(std::make_unique<WeakAndBlueprint>(100, wand::StopWordStrategy(-limit, 1.0, 0), true));
}
static make WEAKAND_DROP(double limit) {
return make(std::make_unique<WeakAndBlueprint>(100, 0.0, wand::StopWordStrategy(1.0, -limit, 0), true));
return make(std::make_unique<WeakAndBlueprint>(100, wand::StopWordStrategy(1.0, -limit, 0), true));
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ TEST("require that DotProductScorer calculates term score")

TEST("test bm25 idf scorer for wand")
{
wand::Bm25TermFrequencyScorer scorer(1000000, 1.0);
wand::Bm25TermFrequencyScorer scorer(1000000);
EXPECT_EQUAL(13410046, scorer.calculateMaxScore(1, 1));
EXPECT_EQUAL(11464136, scorer.calculateMaxScore(10, 1));
EXPECT_EQUAL(6907256, scorer.calculateMaxScore(1000, 1));
Expand All @@ -76,14 +76,4 @@ TEST("test bm25 idf scorer for wand")
EXPECT_EQUAL(10050, scorer.calculateMaxScore(990000, 1));
}

TEST("test limited range of bm25 idf scorer for wand")
{
wand::Bm25TermFrequencyScorer scorer08(1000000, 0.8);
wand::Bm25TermFrequencyScorer scorer10(1000000, 1.0);
EXPECT_EQUAL(8207814, scorer08.calculateMaxScore(1000, 1));
EXPECT_EQUAL(2690049, scorer08.calculateMaxScore(990000, 1));
EXPECT_EQUAL(6907256, scorer10.calculateMaxScore(1000, 1));
EXPECT_EQUAL(10050, scorer10.calculateMaxScore(990000, 1));
}

TEST_MAIN() { TEST_RUN_ALL(); }
Original file line number Diff line number Diff line change
Expand Up @@ -419,10 +419,9 @@ WeakAndBlueprint::my_flow(InFlow in_flow) const
return AnyFlow::create<OrFlow>(in_flow);
}

WeakAndBlueprint::WeakAndBlueprint(uint32_t n, float idf_range, wand::StopWordStrategy stop_word_strategy, bool thread_safe)
WeakAndBlueprint::WeakAndBlueprint(uint32_t n, wand::StopWordStrategy stop_word_strategy, bool thread_safe)
: _scores(WeakAndPriorityQueue::createHeap(n, thread_safe)),
_n(n),
_idf_range(idf_range),
_stop_word_strategy(stop_word_strategy),
_weights(),
_matching_phase(MatchingPhase::FIRST_PHASE)
Expand Down Expand Up @@ -520,11 +519,8 @@ WeakAndBlueprint::createIntermediateSearch(MultiSearch::Children sub_searches,
}
bool readonly_scores_heap = (_matching_phase != MatchingPhase::FIRST_PHASE);
wand::MatchParams innerParams{*_scores, _stop_word_strategy, wand::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY, get_docid_limit()};
return (_idf_range == 0.0)
? WeakAndSearch::create(terms, innerParams, wand::TermFrequencyScorer(), _n, strict(),
readonly_scores_heap)
: WeakAndSearch::create(terms, innerParams, wand::Bm25TermFrequencyScorer(get_docid_limit(), _idf_range), _n, strict(),
readonly_scores_heap);
return WeakAndSearch::create(terms, innerParams, wand::Bm25TermFrequencyScorer(get_docid_limit()), _n, strict(),
readonly_scores_heap);
}

SearchIterator::UP
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ class WeakAndBlueprint : public IntermediateBlueprint
private:
std::unique_ptr<WeakAndPriorityQueue> _scores;
uint32_t _n;
float _idf_range;
wand::StopWordStrategy _stop_word_strategy;
std::vector<uint32_t> _weights;
MatchingPhase _matching_phase;
Expand All @@ -112,8 +111,8 @@ class WeakAndBlueprint : public IntermediateBlueprint
fef::MatchData &md) const override;
SearchIterator::UP createFilterSearch(FilterConstraint constraint) const override;

explicit WeakAndBlueprint(uint32_t n) : WeakAndBlueprint(n, 0.0, wand::StopWordStrategy::none(), true) {}
WeakAndBlueprint(uint32_t n, float idf_range, wand::StopWordStrategy stop_word_strategy, bool thread_safe);
explicit WeakAndBlueprint(uint32_t n) : WeakAndBlueprint(n, wand::StopWordStrategy::none(), true) {}
WeakAndBlueprint(uint32_t n, wand::StopWordStrategy stop_word_strategy, bool thread_safe);
~WeakAndBlueprint() override;
void addTerm(Blueprint::UP bp, uint32_t weight) {
addChild(std::move(bp));
Expand Down
13 changes: 3 additions & 10 deletions searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,18 +480,13 @@ class Bm25TermFrequencyScorer
{
public:
using Bm25Executor = features::Bm25Executor;
Bm25TermFrequencyScorer(uint32_t num_docs, float range) noexcept
: _num_docs(num_docs),
_range(range),
_max_idf(Bm25Executor::calculate_inverse_document_frequency({1, _num_docs}))
Bm25TermFrequencyScorer(uint32_t num_docs) noexcept
: _num_docs(num_docs)
{ }
double apply_range(double idf) const noexcept {
return (1.0 - _range)*_max_idf + _range * idf;
}
// weight * scaled_bm25_idf, scaled to fixedpoint
score_t calculateMaxScore(double estHits, double weight) const noexcept {
return score_t(TermFrequencyScorer_TERM_SCORE_FACTOR * weight *
apply_range(Bm25Executor::calculate_inverse_document_frequency({static_cast<uint64_t>(estHits), _num_docs})));
Bm25Executor::calculate_inverse_document_frequency({static_cast<uint64_t>(estHits), _num_docs}));
}

score_t calculateMaxScore(const Term &term) const noexcept {
Expand All @@ -504,8 +499,6 @@ class Bm25TermFrequencyScorer
}
private:
uint32_t _num_docs;
float _range;
double _max_idf;
};

//-----------------------------------------------------------------------------
Expand Down

0 comments on commit 1149b6a

Please sign in to comment.