From 1149b6a3bff8e286e36f7bfd73314906fc301b02 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Fri, 29 Nov 2024 15:17:42 +0100 Subject: [PATCH] Remove idf range from weak and blueprint. --- .../searchcore/proton/matching/blueprintbuilder.cpp | 1 - .../blueprint/intermediate_blueprints_test.cpp | 4 ++-- .../weak_and_scorers/weak_and_scorers_test.cpp | 12 +----------- .../searchlib/queryeval/intermediate_blueprints.cpp | 10 +++------- .../searchlib/queryeval/intermediate_blueprints.h | 5 ++--- .../src/vespa/searchlib/queryeval/wand/wand_parts.h | 13 +++---------- 6 files changed, 11 insertions(+), 34 deletions(-) diff --git a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp index 1f671ac58ae5..0b5e5e6c3458 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/blueprintbuilder.cpp @@ -71,7 +71,6 @@ class BlueprintBuilderVisitor : void buildWeakAnd(ProtonWeakAnd &n) { auto *wand = new WeakAndBlueprint(n.getTargetNumHits(), - 1.0 /* weakand_range */, _requestContext.get_create_blueprint_params().weakand_stop_word_strategy, is_search_multi_threaded()); Blueprint::UP result(wand); diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index 304a1e56738a..7c0ff58e7c95 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -796,10 +796,10 @@ struct make { static make ONEAR(uint32_t window) { return make(std::make_unique(window)); } static make WEAKAND(uint32_t n) { return make(std::make_unique(n)); } static make WEAKAND_ADJUST(double limit) { - return make(std::make_unique(100, 0.0, wand::StopWordStrategy(-limit, 1.0, 0), true)); + return make(std::make_unique(100, wand::StopWordStrategy(-limit, 1.0, 0), true)); } static make WEAKAND_DROP(double limit) { - return make(std::make_unique(100, 0.0, wand::StopWordStrategy(1.0, -limit, 0), true)); + return make(std::make_unique(100, wand::StopWordStrategy(1.0, -limit, 0), true)); } }; diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp index 8a0bc28f4ddc..5999c21391ae 100644 --- a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp +++ b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp @@ -65,7 +65,7 @@ TEST("require that DotProductScorer calculates term score") TEST("test bm25 idf scorer for wand") { - wand::Bm25TermFrequencyScorer scorer(1000000, 1.0); + wand::Bm25TermFrequencyScorer scorer(1000000); EXPECT_EQUAL(13410046, scorer.calculateMaxScore(1, 1)); EXPECT_EQUAL(11464136, scorer.calculateMaxScore(10, 1)); EXPECT_EQUAL(6907256, scorer.calculateMaxScore(1000, 1)); @@ -76,14 +76,4 @@ TEST("test bm25 idf scorer for wand") EXPECT_EQUAL(10050, scorer.calculateMaxScore(990000, 1)); } -TEST("test limited range of bm25 idf scorer for wand") -{ - wand::Bm25TermFrequencyScorer scorer08(1000000, 0.8); - wand::Bm25TermFrequencyScorer scorer10(1000000, 1.0); - EXPECT_EQUAL(8207814, scorer08.calculateMaxScore(1000, 1)); - EXPECT_EQUAL(2690049, scorer08.calculateMaxScore(990000, 1)); - EXPECT_EQUAL(6907256, scorer10.calculateMaxScore(1000, 1)); - EXPECT_EQUAL(10050, scorer10.calculateMaxScore(990000, 1)); -} - TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp index f89cae857858..9739d935d0a4 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp @@ -419,10 +419,9 @@ WeakAndBlueprint::my_flow(InFlow in_flow) const return AnyFlow::create(in_flow); } -WeakAndBlueprint::WeakAndBlueprint(uint32_t n, float idf_range, wand::StopWordStrategy stop_word_strategy, bool thread_safe) +WeakAndBlueprint::WeakAndBlueprint(uint32_t n, wand::StopWordStrategy stop_word_strategy, bool thread_safe) : _scores(WeakAndPriorityQueue::createHeap(n, thread_safe)), _n(n), - _idf_range(idf_range), _stop_word_strategy(stop_word_strategy), _weights(), _matching_phase(MatchingPhase::FIRST_PHASE) @@ -520,11 +519,8 @@ WeakAndBlueprint::createIntermediateSearch(MultiSearch::Children sub_searches, } bool readonly_scores_heap = (_matching_phase != MatchingPhase::FIRST_PHASE); wand::MatchParams innerParams{*_scores, _stop_word_strategy, wand::DEFAULT_PARALLEL_WAND_SCORES_ADJUST_FREQUENCY, get_docid_limit()}; - return (_idf_range == 0.0) - ? WeakAndSearch::create(terms, innerParams, wand::TermFrequencyScorer(), _n, strict(), - readonly_scores_heap) - : WeakAndSearch::create(terms, innerParams, wand::Bm25TermFrequencyScorer(get_docid_limit(), _idf_range), _n, strict(), - readonly_scores_heap); + return WeakAndSearch::create(terms, innerParams, wand::Bm25TermFrequencyScorer(get_docid_limit()), _n, strict(), + readonly_scores_heap); } SearchIterator::UP diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h index c4a973fcff62..34665259fd8a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h +++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h @@ -92,7 +92,6 @@ class WeakAndBlueprint : public IntermediateBlueprint private: std::unique_ptr _scores; uint32_t _n; - float _idf_range; wand::StopWordStrategy _stop_word_strategy; std::vector _weights; MatchingPhase _matching_phase; @@ -112,8 +111,8 @@ class WeakAndBlueprint : public IntermediateBlueprint fef::MatchData &md) const override; SearchIterator::UP createFilterSearch(FilterConstraint constraint) const override; - explicit WeakAndBlueprint(uint32_t n) : WeakAndBlueprint(n, 0.0, wand::StopWordStrategy::none(), true) {} - WeakAndBlueprint(uint32_t n, float idf_range, wand::StopWordStrategy stop_word_strategy, bool thread_safe); + explicit WeakAndBlueprint(uint32_t n) : WeakAndBlueprint(n, wand::StopWordStrategy::none(), true) {} + WeakAndBlueprint(uint32_t n, wand::StopWordStrategy stop_word_strategy, bool thread_safe); ~WeakAndBlueprint() override; void addTerm(Blueprint::UP bp, uint32_t weight) { addChild(std::move(bp)); diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h index c62d92fc7386..f48b9578ee1a 100644 --- a/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h +++ b/searchlib/src/vespa/searchlib/queryeval/wand/wand_parts.h @@ -480,18 +480,13 @@ class Bm25TermFrequencyScorer { public: using Bm25Executor = features::Bm25Executor; - Bm25TermFrequencyScorer(uint32_t num_docs, float range) noexcept - : _num_docs(num_docs), - _range(range), - _max_idf(Bm25Executor::calculate_inverse_document_frequency({1, _num_docs})) + Bm25TermFrequencyScorer(uint32_t num_docs) noexcept + : _num_docs(num_docs) { } - double apply_range(double idf) const noexcept { - return (1.0 - _range)*_max_idf + _range * idf; - } // weight * scaled_bm25_idf, scaled to fixedpoint score_t calculateMaxScore(double estHits, double weight) const noexcept { return score_t(TermFrequencyScorer_TERM_SCORE_FACTOR * weight * - apply_range(Bm25Executor::calculate_inverse_document_frequency({static_cast(estHits), _num_docs}))); + Bm25Executor::calculate_inverse_document_frequency({static_cast(estHits), _num_docs})); } score_t calculateMaxScore(const Term &term) const noexcept { @@ -504,8 +499,6 @@ class Bm25TermFrequencyScorer } private: uint32_t _num_docs; - float _range; - double _max_idf; }; //-----------------------------------------------------------------------------