Skip to content

Commit

Permalink
Merge pull request #24663 from vespa-engine/havardpe/improve-filter-c…
Browse files Browse the repository at this point in the history
…alculation

short-circuit filter evaluation if filter iterator is trivial
  • Loading branch information
baldersheim authored Oct 31, 2022
2 parents 1bfd169 + 7fe19e2 commit e9cefec
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 12 deletions.
18 changes: 18 additions & 0 deletions searchlib/src/tests/queryeval/global_filter/global_filter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/util/require.h>
#include <vespa/vespalib/util/classname.h>
#include <vespa/vespalib/util/simple_thread_bundle.h>
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/queryeval/global_filter.h>
Expand Down Expand Up @@ -199,4 +200,21 @@ TEST(GlobalFilterTest, multi_threaded_global_filter_works_with_docid_limit_0) {
verify(*filter, 2, 1);
}

TEST(GlobalFilterTest, global_filter_matching_any_document_becomes_invalid) {
SimpleThreadBundle thread_bundle(7);
AlwaysTrueBlueprint blueprint;
auto filter = GlobalFilter::create(blueprint, 100, thread_bundle);
EXPECT_FALSE(filter->is_active());
}

TEST(GlobalFilterTest, global_filter_not_matching_any_document_becomes_empty) {
SimpleThreadBundle thread_bundle(7);
EmptyBlueprint blueprint;
auto filter = GlobalFilter::create(blueprint, 100, thread_bundle);
auto class_name = vespalib::getClassName(*filter);
fprintf(stderr, "empty global filter class name: %s\n", class_name.c_str());
EXPECT_TRUE(class_name.find("EmptyFilter") < class_name.size());
verify(*filter, 1000, 100);
}

GTEST_MAIN_RUN_ALL_TESTS()
55 changes: 43 additions & 12 deletions searchlib/src/vespa/searchlib/queryeval/global_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

using vespalib::Runnable;
using vespalib::ThreadBundle;
using vespalib::Trinary;

namespace search::queryeval {

Expand All @@ -21,6 +22,15 @@ struct Inactive : GlobalFilter {
bool check(uint32_t) const override { abort(); }
};

struct EmptyFilter : GlobalFilter {
uint32_t docid_limit;
EmptyFilter(uint32_t docid_limit_in) : docid_limit(docid_limit_in) {}
bool is_active() const override { return true; }
uint32_t size() const override { return docid_limit; }
uint32_t count() const override { return 0; }
bool check(uint32_t) const override { return false; }
};

struct BitVectorFilter : public GlobalFilter {
std::unique_ptr<BitVector> vector;
BitVectorFilter(std::unique_ptr<BitVector> vector_in)
Expand Down Expand Up @@ -56,22 +66,38 @@ struct MultiBitVectorFilter : public GlobalFilter {
}
};

std::unique_ptr<BitVector> make_part(Blueprint &blueprint, uint32_t begin, uint32_t end) {
struct PartResult {
Trinary matches_any;
std::unique_ptr<BitVector> bits;
PartResult()
: matches_any(Trinary::False), bits() {}
PartResult(Trinary matches_any_in)
: matches_any(matches_any_in), bits() {}
PartResult(std::unique_ptr<BitVector> &&bits_in)
: matches_any(Trinary::Undefined), bits(std::move(bits_in)) {}
};

PartResult make_part(Blueprint &blueprint, uint32_t begin, uint32_t end) {
bool strict = true;
auto constraint = Blueprint::FilterConstraint::UPPER_BOUND;
auto filter_iterator = blueprint.createFilterSearch(strict, constraint);
filter_iterator->initRange(begin, end);
auto result = filter_iterator->get_hits(begin);
// count bits in parallel and cache the results for later
result->countTrueBits();
return result;
auto filter = blueprint.createFilterSearch(strict, constraint);
auto matches_any = filter->matches_any();
if (matches_any == Trinary::Undefined) {
filter->initRange(begin, end);
auto bits = filter->get_hits(begin);
// count bits in parallel and cache the results for later
bits->countTrueBits();
return PartResult(std::move(bits));
} else {
return PartResult(matches_any);
}
}

struct MakePart : Runnable {
Blueprint &blueprint;
uint32_t begin;
uint32_t end;
std::unique_ptr<BitVector> result;
PartResult result;
MakePart(Blueprint &blueprint_in, uint32_t begin_in, uint32_t end_in) noexcept
: blueprint(blueprint_in), begin(begin_in), end(end_in), result() {}
void run() override { result = make_part(blueprint, begin, end); }
Expand Down Expand Up @@ -146,13 +172,18 @@ GlobalFilter::create(Blueprint &blueprint, uint32_t docid_limit, ThreadBundle &t
assert(parts.size() <= num_threads);
assert((docid == docid_limit) || parts.empty());
thread_bundle.run(parts);
if (parts.size() == 1) {
return create(std::move(parts[0].result));
}
std::vector<std::unique_ptr<BitVector>> vectors;
vectors.reserve(parts.size());
for (MakePart &part: parts) {
vectors.push_back(std::move(part.result));
switch (part.result.matches_any) {
case Trinary::False: return std::make_unique<EmptyFilter>(docid_limit);
case Trinary::True: return create(); // filter not needed after all
case Trinary::Undefined:
vectors.push_back(std::move(part.result.bits));
}
}
if (vectors.size() == 1) {
return create(std::move(vectors[0]));
}
return create(std::move(vectors));
}
Expand Down

0 comments on commit e9cefec

Please sign in to comment.