Skip to content

Commit

Permalink
Merge pull request #29485 from vespa-engine/havardpe/andnot-and-andno…
Browse files Browse the repository at this point in the history
…t-collapsing

collapse co-nested and/andnot in first optimize pass
  • Loading branch information
baldersheim authored Nov 28, 2023
2 parents d4f030f + 86e70f1 commit 6359834
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 54 deletions.
170 changes: 130 additions & 40 deletions searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,48 +114,52 @@ TEST("test AndNot Blueprint") {
// createSearch tested by iterator unit test
}

template <typename BP>
void optimize(std::unique_ptr<BP> &ref) {
auto optimized = Blueprint::optimize(std::move(ref));
ref.reset(dynamic_cast<BP*>(optimized.get()));
ASSERT_TRUE(ref);
optimized.release();
}

TEST("test And propagates updated histestimate") {
AndBlueprint bp;
bp.setSourceId(2);
bp.addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
bp.addChild(ap(MyLeafSpec(200).create<RememberExecuteInfo>()->setSourceId(2)));
bp.addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2)));
bp.optimize_self(Blueprint::OptimizePass::FIRST);
bp.optimize_self(Blueprint::OptimizePass::SECOND);
bp.optimize_self(Blueprint::OptimizePass::LAST);
bp.setDocIdLimit(5000);
bp.fetchPostings(ExecuteInfo::TRUE);
EXPECT_EQUAL(3u, bp.childCnt());
for (uint32_t i = 0; i < bp.childCnt(); i++) {
const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp.getChild(i));
auto bp = std::make_unique<AndBlueprint>();
bp->setSourceId(2);
bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
bp->addChild(ap(MyLeafSpec(200).create<RememberExecuteInfo>()->setSourceId(2)));
bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2)));
optimize(bp);
bp->setDocIdLimit(5000);
bp->fetchPostings(ExecuteInfo::TRUE);
EXPECT_EQUAL(3u, bp->childCnt());
for (uint32_t i = 0; i < bp->childCnt(); i++) {
const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp->getChild(i));
EXPECT_EQUAL((i == 0), child.executeInfo.isStrict());
}
EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(0)).executeInfo.hitRate());
EXPECT_EQUAL(1.0f/250, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(1)).executeInfo.hitRate());
EXPECT_EQUAL(1.0f/(250*25), dynamic_cast<const RememberExecuteInfo &>(bp.getChild(2)).executeInfo.hitRate());
EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(0)).executeInfo.hitRate());
EXPECT_EQUAL(1.0f/250, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(1)).executeInfo.hitRate());
EXPECT_EQUAL(1.0f/(250*25), dynamic_cast<const RememberExecuteInfo &>(bp->getChild(2)).executeInfo.hitRate());
}

TEST("test Or propagates updated histestimate") {
OrBlueprint bp;
bp.setSourceId(2);
bp.addChild(ap(MyLeafSpec(5000).create<RememberExecuteInfo>()->setSourceId(2)));
bp.addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2)));
bp.addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2)));
bp.addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
bp.optimize_self(Blueprint::OptimizePass::FIRST);
bp.optimize_self(Blueprint::OptimizePass::SECOND);
bp.optimize_self(Blueprint::OptimizePass::LAST);
bp.setDocIdLimit(5000);
bp.fetchPostings(ExecuteInfo::TRUE);
EXPECT_EQUAL(4u, bp.childCnt());
for (uint32_t i = 0; i < bp.childCnt(); i++) {
const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp.getChild(i));
auto bp = std::make_unique<OrBlueprint>();
bp->setSourceId(2);
bp->addChild(ap(MyLeafSpec(5000).create<RememberExecuteInfo>()->setSourceId(2)));
bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2)));
bp->addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2)));
bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
optimize(bp);
bp->setDocIdLimit(5000);
bp->fetchPostings(ExecuteInfo::TRUE);
EXPECT_EQUAL(4u, bp->childCnt());
for (uint32_t i = 0; i < bp->childCnt(); i++) {
const auto & child = dynamic_cast<const RememberExecuteInfo &>(bp->getChild(i));
EXPECT_TRUE(child.executeInfo.isStrict());
}
EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(0)).executeInfo.hitRate());
EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(1)).executeInfo.hitRate());
EXPECT_EQUAL(3.0f/5.0f, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(2)).executeInfo.hitRate());
EXPECT_EQUAL(3.0f*42.0f/(5.0f*50.0f), dynamic_cast<const RememberExecuteInfo &>(bp.getChild(3)).executeInfo.hitRate());
EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(0)).executeInfo.hitRate());
EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(1)).executeInfo.hitRate());
EXPECT_EQUAL(3.0f/5.0f, dynamic_cast<const RememberExecuteInfo &>(bp->getChild(2)).executeInfo.hitRate());
EXPECT_EQUAL(3.0f*42.0f/(5.0f*50.0f), dynamic_cast<const RememberExecuteInfo &>(bp->getChild(3)).executeInfo.hitRate());
}

TEST("test And Blueprint") {
Expand Down Expand Up @@ -469,6 +473,7 @@ struct SourceBlenderTestFixture {
InvalidSelector selector_1; // the one
InvalidSelector selector_2; // not the one
void addChildrenForSBTest(IntermediateBlueprint & parent);
void addChildrenForSimpleSBTest(IntermediateBlueprint & parent);
};

void
Expand All @@ -488,7 +493,13 @@ void SourceBlenderTestFixture::addChildrenForSBTest(IntermediateBlueprint & pare
parent.addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(selector_1), {{2000, 2}, {1000, 1}}));
}

TEST_F("test SourceBlender below AND optimization", SourceBlenderTestFixture) {
void SourceBlenderTestFixture::addChildrenForSimpleSBTest(IntermediateBlueprint & parent) {
parent.addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(selector_1), {{200, 2}, {100, 1}, {300, 3}}));
parent.addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(selector_1), {{20, 2}, {10, 1}, {30, 3}}));
parent.addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(selector_1), {{2000, 2}, {1000, 1}}));
}

TEST_F("test SourceBlender below AND partial optimization", SourceBlenderTestFixture) {
auto top = std::make_unique<AndBlueprint>();
f.addChildrenForSBTest(*top);

Expand All @@ -505,7 +516,19 @@ TEST_F("test SourceBlender below AND optimization", SourceBlenderTestFixture) {
optimize_and_compare(std::move(top), std::move(expect));
}

TEST_F("test SourceBlender below OR optimization", SourceBlenderTestFixture) {
TEST_F("test AND replaced by source blender after full optimization", SourceBlenderTestFixture) {
auto top = std::make_unique<AndBlueprint>();
f.addChildrenForSimpleSBTest(*top);

auto expect = std::make_unique<SourceBlenderBlueprint>(f.selector_1);
expect->addChild(addLeafsWithSourceId(3, std::make_unique<AndBlueprint>(), {{30, 3}, {300, 3}}));
expect->addChild(addLeafsWithSourceId(2, std::make_unique<AndBlueprint>(), {{20, 2}, {200, 2}, {2000, 2}}));
expect->addChild(addLeafsWithSourceId(1, std::make_unique<AndBlueprint>(), {{10, 1}, {100, 1}, {1000, 1}}));

optimize_and_compare(std::move(top), std::move(expect));
}

TEST_F("test SourceBlender below OR partial optimization", SourceBlenderTestFixture) {
auto top = std::make_unique<OrBlueprint>();
f.addChildrenForSBTest(*top);
//-------------------------------------------------------------------------
Expand All @@ -521,6 +544,18 @@ TEST_F("test SourceBlender below OR optimization", SourceBlenderTestFixture) {
optimize_and_compare(std::move(top), std::move(expect));
}

TEST_F("test OR replaced by source blender after full optimization", SourceBlenderTestFixture) {
auto top = std::make_unique<OrBlueprint>();
f.addChildrenForSimpleSBTest(*top);

auto expect = std::make_unique<SourceBlenderBlueprint>(f.selector_1);
expect->addChild(addLeafsWithSourceId(3, std::make_unique<OrBlueprint>(), {{300, 3}, {30, 3}}));
expect->addChild(addLeafsWithSourceId(2, std::make_unique<OrBlueprint>(), {{2000, 2}, {200, 2}, {20, 2}}));
expect->addChild(addLeafsWithSourceId(1, std::make_unique<OrBlueprint>(), {{1000, 1}, {100, 1}, {10, 1}}));

optimize_and_compare(std::move(top), std::move(expect));
}

TEST_F("test SourceBlender below AND_NOT optimization", SourceBlenderTestFixture) {
auto top = std::make_unique<AndNotBlueprint>();
top->addChild(addLeafsWithSourceId(std::make_unique<SourceBlenderBlueprint>(f.selector_1), {{42, 1}}));
Expand Down Expand Up @@ -592,14 +627,69 @@ TEST("and with one empty child is optimized away") {
EXPECT_EQUAL(expect_up->asString(), top->asString());
}

TEST("AND AND collapsing") {
auto top = std::make_unique<AndBlueprint>();
addLeafs(*top, {1,3,5});
auto sub_and = std::make_unique<AndBlueprint>();
addLeafs(*sub_and, {2,4});
top->addChild(std::move(sub_and));
auto expect = std::make_unique<AndBlueprint>();
addLeafs(*expect, {1,2,3,4,5});
optimize_and_compare(std::move(top), std::move(expect));
}

TEST("OR OR collapsing") {
auto top = std::make_unique<OrBlueprint>();
addLeafs(*top, {1,3,5});
auto sub_and = std::make_unique<OrBlueprint>();
addLeafs(*sub_and, {2,4});
top->addChild(std::move(sub_and));
auto expect = std::make_unique<OrBlueprint>();
addLeafs(*expect, {5,4,3,2,1});
optimize_and_compare(std::move(top), std::move(expect));
}

TEST("AND_NOT AND_NOT collapsing") {
auto top = std::make_unique<AndNotBlueprint>();
auto sub_and_not = std::make_unique<AndNotBlueprint>();
addLeafs(*sub_and_not, {1,3,5});
top->addChild(std::move(sub_and_not));
addLeafs(*top, {2,4});
auto expect = std::make_unique<AndNotBlueprint>();
addLeafs(*expect, {1,5,4,3,2});
optimize_and_compare(std::move(top), std::move(expect));
}

TEST("AND_NOT AND AND_NOT collapsing") {
auto top = std::make_unique<AndNotBlueprint>();
auto sub_and = std::make_unique<AndBlueprint>();
auto sub_and_not = std::make_unique<AndNotBlueprint>();
addLeafs(*sub_and_not, {1,5,6});
sub_and->addChild(std::move(sub_and_not));
addLeafs(*sub_and, {3,2});
sub_and_not = std::make_unique<AndNotBlueprint>();
addLeafs(*sub_and_not, {4,8,9});
sub_and->addChild(std::move(sub_and_not));
top->addChild(std::move(sub_and));
addLeafs(*top, {7});
//-------------------------------------------------------------------------
auto expect = std::make_unique<AndNotBlueprint>();
auto sub_expect = std::make_unique<AndBlueprint>();
addLeafs(*sub_expect, {1,2,3,4});
expect->addChild(std::move(sub_expect));
addLeafs(*expect, {9,8,7,6,5});
optimize_and_compare(std::move(top), std::move(expect));
}

TEST("test single child optimization") {
InvalidSelector selector;
//-------------------------------------------------------------------------
Blueprint::UP top = ap((new AndNotBlueprint())->
addChild(ap((new AndBlueprint())->
addChild(ap((new OrBlueprint())->
addChild(ap((new SourceBlenderBlueprint(selector))->
addChild(addLeafs(std::make_unique<RankBlueprint>(), {42})))))))));
addChild(ap((new AndBlueprint())->
addChild(ap((new RankBlueprint())->
addChild(ap((new OrBlueprint())->
addChild(ap((new SourceBlenderBlueprint(selector))->
addChild(addLeafs(std::make_unique<RankBlueprint>(), {42})))))))))));
//-------------------------------------------------------------------------
Blueprint::UP expect = addLeafs(std::make_unique<SourceBlenderBlueprint>(selector), {42});
//-------------------------------------------------------------------------
Expand Down
5 changes: 2 additions & 3 deletions searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ void maybe_eliminate_self(Blueprint* &self, Blueprint::UP replacement) {
self->setSourceId(discard->getSourceId());
discard->setParent(nullptr);
}
// replace with empty blueprint if empty
if (self->getState().estimate().empty) {
// replace with empty blueprint if empty, skip if already empty blueprint
if ((self->as_empty() == nullptr) && self->getState().estimate().empty) {
Blueprint::UP discard(self);
self = new EmptyBlueprint(discard->getState().fields());
self->setParent(discard->getParent());
Expand Down Expand Up @@ -130,7 +130,6 @@ Blueprint::UP
Blueprint::optimize(Blueprint::UP bp) {
Blueprint *root = bp.release();
root->optimize(root, OptimizePass::FIRST);
root->optimize(root, OptimizePass::SECOND);
root->optimize(root, OptimizePass::LAST);
return Blueprint::UP(root);
}
Expand Down
7 changes: 6 additions & 1 deletion searchlib/src/vespa/searchlib/queryeval/blueprint.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class SourceBlenderBlueprint;
class AndBlueprint;
class AndNotBlueprint;
class OrBlueprint;
class EmptyBlueprint;

/**
* A Blueprint is an intermediate representation of a search. More
Expand All @@ -50,7 +51,7 @@ class Blueprint
using Children = std::vector<Blueprint::UP>;
using SearchIteratorUP = std::unique_ptr<SearchIterator>;

enum class OptimizePass { FIRST, SECOND, LAST };
enum class OptimizePass { FIRST, LAST };

struct HitEstimate {
uint32_t estHits;
Expand Down Expand Up @@ -270,6 +271,9 @@ class Blueprint
virtual bool isRank() const noexcept { return false; }
virtual const attribute::ISearchContext *get_attribute_search_context() const noexcept { return nullptr; }

// to avoid replacing an empty blueprint with another empty blueprint
virtual EmptyBlueprint *as_empty() noexcept { return nullptr; }

// For document summaries with matched-elements-only set.
virtual std::unique_ptr<MatchingElementsSearch> create_matching_elements_search(const MatchingElementsFields &fields) const;
};
Expand Down Expand Up @@ -347,6 +351,7 @@ class IntermediateBlueprint : public blueprint::StateCache
IntermediateBlueprint & insertChild(size_t n, Blueprint::UP child);
IntermediateBlueprint &addChild(Blueprint::UP child);
Blueprint::UP removeChild(size_t n);
Blueprint::UP removeLastChild() { return removeChild(childCnt() - 1); }
SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override;

virtual HitEstimate combine(const std::vector<HitEstimate> &data) const = 0;
Expand Down
28 changes: 18 additions & 10 deletions searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) {
source_blenders.pop_back();
SourceBlenderBlueprint * blender = blender_up->asSourceBlender();
while (blender->childCnt() > 0) {
Blueprint::UP child_up = blender->removeChild(blender->childCnt() - 1);
Blueprint::UP child_up = blender->removeLastChild();
size_t source_idx = lookup_create_source(sources, child_up->getSourceId(), self.get_docid_limit());
sources[source_idx]->addChild(std::move(child_up));
}
Expand Down Expand Up @@ -107,14 +107,24 @@ AndNotBlueprint::optimize_self(OptimizePass pass)
return;
}
if (pass == OptimizePass::FIRST) {
AndNotBlueprint * child = getChild(0).asAndNot();
if (child != nullptr) {
if (auto *child = getChild(0).asAndNot()) {
while (child->childCnt() > 1) {
addChild(child->removeChild(1));
addChild(child->removeLastChild());
}
insertChild(1, child->removeChild(0));
removeChild(0);
}
if (auto *child = getChild(0).asAnd()) {
for (size_t i = 0; i < child->childCnt(); ++i) {
if (auto *grand_child = child->getChild(i).asAndNot()) {
while (grand_child->childCnt() > 1) {
addChild(grand_child->removeLastChild());
}
child->addChild(grand_child->removeChild(0));
child->removeChild(i--);
}
}
}
for (size_t i = 1; i < childCnt(); ++i) {
if (getChild(i).getState().estimate().empty) {
removeChild(i--);
Expand Down Expand Up @@ -195,10 +205,9 @@ AndBlueprint::optimize_self(OptimizePass pass)
{
if (pass == OptimizePass::FIRST) {
for (size_t i = 0; i < childCnt(); ++i) {
AndBlueprint * child = getChild(i).asAnd();
if (child != nullptr) {
if (auto *child = getChild(i).asAnd()) {
while (child->childCnt() > 0) {
addChild(child->removeChild(0));
addChild(child->removeLastChild());
}
removeChild(i--);
}
Expand Down Expand Up @@ -297,10 +306,9 @@ OrBlueprint::optimize_self(OptimizePass pass)
{
if (pass == OptimizePass::FIRST) {
for (size_t i = 0; (childCnt() > 1) && (i < childCnt()); ++i) {
OrBlueprint * child = getChild(i).asOr();
if (child != nullptr) {
if (auto *child = getChild(i).asOr()) {
while (child->childCnt() > 0) {
addChild(child->removeChild(0));
addChild(child->removeLastChild());
}
removeChild(i--);
} else if (getChild(i).getState().estimate().empty) {
Expand Down
1 change: 1 addition & 0 deletions searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class EmptyBlueprint : public SimpleLeafBlueprint
EmptyBlueprint(FieldSpecBase field) : SimpleLeafBlueprint(field) {}
EmptyBlueprint() = default;
SearchIterator::UP createFilterSearch(bool strict, FilterConstraint constraint) const override;
EmptyBlueprint *as_empty() noexcept final override { return this; }
};

class AlwaysTrueBlueprint : public SimpleLeafBlueprint
Expand Down

0 comments on commit 6359834

Please sign in to comment.