From 6643f031721f2125ec12984bf951add27d5f48e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Pettersen?= Date: Fri, 5 Jan 2024 15:28:37 +0000 Subject: [PATCH 1/2] take strictness into account for flow/cost/sorting use common code with adapters make cost calculation const (sort index, not children) empty AND/ANDNOT estimate now equals input flow --- .../queryeval/blueprint/blueprint_test.cpp | 4 +- .../intermediate_blueprints_test.cpp | 3 +- .../queryeval/flow/queryeval_flow_test.cpp | 256 ++++++++++++++---- .../src/vespa/searchlib/queryeval/blueprint.h | 30 -- .../src/vespa/searchlib/queryeval/flow.h | 227 +++++++++++++++- .../queryeval/intermediate_blueprints.cpp | 40 +-- 6 files changed, 442 insertions(+), 118 deletions(-) diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp index f800e124bdcd..bbd2744119ae 100644 --- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -24,10 +24,10 @@ class MyOr : public IntermediateBlueprint private: public: double calculate_cost() const final { - return cost_of(get_children(), OrFlow()); + return OrFlow::cost_of(get_children()); } double calculate_relative_estimate() const final { - return estimate_of(get_children(), OrFlow()); + return OrFlow::estimate_of(get_children()); } HitEstimate combine(const std::vector &data) const override { return max(data); diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index ab1c004c721c..ed2bfdeca6fd 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -1289,7 +1289,6 @@ TEST("require that OR blueprint use saturated sum as estimate") { } void verify_relative_estimate(make &&mk, double expect) { - EXPECT_EQUAL(mk.making->estimate(), 0.0); Blueprint::UP bp = std::move(mk).leafs({200,300,950}); bp->setDocIdLimit(1000); EXPECT_EQUAL(bp->estimate(), expect); @@ -1380,7 +1379,7 @@ TEST("cost for ONEAR") { } TEST("cost for WEAKAND") { - verify_cost(make::WEAKAND(1000), calc_cost({{1.1, 0.8},{1.2, 0.7},{1.3, 0.5}})); + verify_cost(make::WEAKAND(1000), calc_cost({{1.3, 0.5},{1.2, 0.7},{1.1, 0.8}})); } TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp index ceda30f169a2..c92f85357b5b 100644 --- a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp +++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp @@ -5,44 +5,46 @@ #include #include -using search::queryeval::AndFlow; -using search::queryeval::OrFlow; +constexpr size_t loop_cnt = 64; + +using namespace search::queryeval; + +struct ItemAdapter { + double estimate(const auto &child) const noexcept { return child.rel_est; } + double cost(const auto &child) const noexcept { return child.cost; } + double strict_cost(const auto &child) const noexcept { return child.strict_cost; } +}; struct Item { double rel_est; double cost; - Item(double rel_est_in, double cost_in) noexcept - : rel_est(rel_est_in), cost(cost_in) {} - static void sort_for_and(std::vector &data) { - std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept { - return (1.0 - a.rel_est) / a.cost > (1.0 - b.rel_est) / b.cost; - }); + double strict_cost; + Item(double rel_est_in, double cost_in, double strict_cost_in) noexcept + : rel_est(rel_est_in), cost(cost_in), strict_cost(strict_cost_in) {} + template static double estimate_of(std::vector &data) { + return FLOW::estimate_of(ItemAdapter(), data); } - static void sort_for_or(std::vector &data) { - std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept { - return a.rel_est / a.cost > b.rel_est / b.cost; - }); + template static void sort(std::vector &data, bool strict) { + FLOW::sort(ItemAdapter(), data, strict); } - static double cost_of(const std::vector &data, auto flow) { - double cost = 0.0; - for (const Item &item: data) { - cost += flow.flow() * item.cost; - flow.add(item.rel_est); - } - return cost; + template static double cost_of(const std::vector &data, bool strict) { + return FLOW::cost_of(ItemAdapter(), data, strict); + } + template static double ordered_cost_of(const std::vector &data, bool strict) { + return flow::ordered_cost_of(ItemAdapter(), data, FLOW(1.0, strict)); } - static double cost_of_and(const std::vector &data) { return cost_of(data, AndFlow()); } - static double cost_of_or(const std::vector &data) { return cost_of(data, OrFlow()); } + auto operator <=>(const Item &rhs) const noexcept = default; }; std::vector gen_data(size_t size) { static std::mt19937 gen; - static std::uniform_real_distribution rel_est(0.1, 0.9); - static std::uniform_real_distribution cost(1.0, 10.0); + static std::uniform_real_distribution rel_est(0.1, 0.9); + static std::uniform_real_distribution cost(1.0, 10.0); + static std::uniform_real_distribution strict_cost(0.1, 5.0); std::vector result; result.reserve(size); for (size_t i = 0; i < size; ++i) { - result.emplace_back(rel_est(gen), cost(gen)); + result.emplace_back(rel_est(gen), cost(gen), strict_cost(gen)); } return result; } @@ -80,37 +82,191 @@ TEST(FlowTest, perm_test) { EXPECT_EQ(seen.size(), 120); } +template