Skip to content

Commit

Permalink
Merge pull request #32967 from vespa-engine/toregge/add-bitvector-ite…
Browse files Browse the repository at this point in the history
…rator-variant-that-resets-term-field-match-data-on-unpack

Add bitvector iterator variant that resets term field match data on u…
  • Loading branch information
geirst authored Nov 28, 2024
2 parents 557383a + 3b44441 commit e6a50fd
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct StringKey : public LookupKey {
const std::string field_name = "test";
constexpr uint32_t field_id = 3;
uint32_t doc_id_limit = 500;
const std::string strict_bitvector_iterator_class_name_prefix = "search::BitVectorIteratorTT<search::BitVectorIteratorStrictT";

using Docids = std::vector<uint32_t>;

Expand Down Expand Up @@ -326,7 +327,7 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_used_instead_of_btree_iterators_
add_terms({1, 100});
auto itr = create_leaf_search();
expect_or_iterator(*itr, 2);
expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 0, strict_bitvector_iterator_class_name_prefix);
expect_or_child(*itr, 1, iterator_unpack_docid);
expect_hits(concat({10}, range(100, 128)), *itr);
}
Expand All @@ -349,8 +350,8 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_btree_iterators_used_for_fil
add_terms({1, 3, 100, 300});
auto itr = create_leaf_search();
expect_or_iterator(*itr, 3);
expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 0, strict_bitvector_iterator_class_name_prefix);
expect_or_child(*itr, 1, strict_bitvector_iterator_class_name_prefix);
expect_or_child(*itr, 2, iterator_unpack_docid);
expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr);
}
Expand All @@ -361,8 +362,8 @@ TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field)
add_terms({100, 300});
auto itr = create_leaf_search();
expect_or_iterator(*itr, 2);
expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 0, strict_bitvector_iterator_class_name_prefix);
expect_or_child(*itr, 1, strict_bitvector_iterator_class_name_prefix);
expect_hits(concat(range(100, 128), range(300, 128)), *itr);
}

Expand All @@ -381,8 +382,8 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_btree_iterators_used_for_fil
add_terms({1, 3, 100, 300});
auto itr = create_leaf_search();
expect_or_iterator(*itr, 3);
expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 0, strict_bitvector_iterator_class_name_prefix);
expect_or_child(*itr, 1, strict_bitvector_iterator_class_name_prefix);
expect_or_child(*itr, 2, iterator_unpack_none);
expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr);
}
Expand All @@ -393,8 +394,8 @@ TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_when_
add_terms({100, 300});
auto itr = create_leaf_search();
expect_or_iterator(*itr, 2);
expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 0, strict_bitvector_iterator_class_name_prefix);
expect_or_child(*itr, 1, strict_bitvector_iterator_class_name_prefix);
expect_hits(concat(range(100, 128), range(300, 128)), *itr);
}

Expand Down
24 changes: 24 additions & 0 deletions searchlib/src/tests/common/bitvector/bitvector_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -799,4 +799,28 @@ TEST("Require that parallell OR computes same result as serial") {
}
}

namespace {

bool check_full_term_field_match_data_reset_on_unpack(bool strict, bool full_reset)
{
AllocatedBitVector bv(10);
bv.setBit(5);
fef::TermFieldMatchData tfmd;
auto iterator = BitVectorIterator::create(&bv, bv.size(), tfmd, strict, false, full_reset);
tfmd.setNumOccs(10);
iterator->initRange(1, bv.size());
iterator->unpack(5);
return tfmd.getNumOccs() == 0;
}

}

TEST("reset term field match data on unpack")
{
EXPECT_FALSE(check_full_term_field_match_data_reset_on_unpack(false, false));
EXPECT_FALSE(check_full_term_field_match_data_reset_on_unpack(true, false));
EXPECT_TRUE(check_full_term_field_match_data_reset_on_unpack(false, true));
EXPECT_TRUE(check_full_term_field_match_data_reset_on_unpack(true, true));
}

TEST_MAIN() { TEST_RUN_ALL(); }
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ using vespalib::normalize_class_name;
using Path = std::vector<std::variant<size_t,std::string_view>>;

std::string strict_equiv_name = "search::queryeval::EquivImpl<true, search::queryeval::StrictHeapOrSearch<search::queryeval::NoUnpack, vespalib::LeftArrayHeap, unsigned char> >";
const std::string strict_bitvector_iterator_class_name = "search::BitVectorIteratorTT<search::BitVectorIteratorStrictT<false>, false>";

struct InvalidSelector : ISourceSelector {
InvalidSelector() : ISourceSelector(Source()) {}
Expand Down Expand Up @@ -1196,7 +1197,7 @@ TEST("require that children does not optimize when parents refuse them to") {
EXPECT_EQUAL(strict_equiv_name, normalize_class_name(search->getClassName()));
{
const auto & e = dynamic_cast<const MultiSearch &>(*search);
EXPECT_EQUAL("search::BitVectorIteratorStrictT<false>", e.getChildren()[0]->getClassName());
EXPECT_EQUAL(strict_bitvector_iterator_class_name, e.getChildren()[0]->getClassName());
EXPECT_EQUAL("search::diskindex::ZcRareWordPosOccIterator<true, false>", e.getChildren()[1]->getClassName());
EXPECT_EQUAL("search::diskindex::ZcRareWordPosOccIterator<true, false>", e.getChildren()[2]->getClassName());
}
Expand All @@ -1206,7 +1207,7 @@ TEST("require that children does not optimize when parents refuse them to") {
EXPECT_EQUAL(strict_equiv_name, normalize_class_name(search->getClassName()));
{
const auto & e = dynamic_cast<const MultiSearch &>(*search);
EXPECT_EQUAL("search::BitVectorIteratorStrictT<false>", e.getChildren()[0]->getClassName());
EXPECT_EQUAL(strict_bitvector_iterator_class_name, e.getChildren()[0]->getClassName());
EXPECT_EQUAL("search::diskindex::ZcRareWordPosOccIterator<true, false>", e.getChildren()[1]->getClassName());
EXPECT_EQUAL("search::diskindex::ZcRareWordPosOccIterator<true, false>", e.getChildren()[2]->getClassName());
}
Expand Down
85 changes: 74 additions & 11 deletions searchlib/src/vespa/searchlib/common/bitvectoriterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,30 +125,93 @@ BitVectorIteratorStrictT<inverse>::initRange(uint32_t begin, uint32_t end)
}
}

template <typename Parent, bool full_reset>
class BitVectorIteratorTT : public Parent {
public:
BitVectorIteratorTT(const BitVector& bv, uint32_t docid_limit, TermFieldMatchData& tfmd);
~BitVectorIteratorTT() override;
void doUnpack(uint32_t docId) override;
};

template<typename Parent, bool full_reset>
BitVectorIteratorTT<Parent, full_reset>::BitVectorIteratorTT(const BitVector& bv, uint32_t docid_limit, TermFieldMatchData& tfmd)
: Parent(bv, docid_limit, tfmd)
{
}

template<typename Parent, bool full_reset>
BitVectorIteratorTT<Parent, full_reset>::~BitVectorIteratorTT() = default;

template <typename Parent, bool full_reset>
void
BitVectorIteratorTT<Parent, full_reset>::doUnpack(uint32_t docId)
{
if constexpr (full_reset) {
this->_tfmd.reset(docId);
} else {
this->_tfmd.resetOnlyDocId(docId);
}
}

namespace
{

struct StrictIteratorType {
template <bool inverted, bool full_reset> using type = BitVectorIteratorTT<BitVectorIteratorStrictT<inverted>, full_reset>;
};

struct IteratorType {
template <bool inverted, bool full_reset> using type = BitVectorIteratorTT<BitVectorIteratorT<inverted>, full_reset>;
};

template <typename IteratorType, bool full_reset>
std::unique_ptr<queryeval::SearchIterator>
create_helper_helper(const BitVector& bv, uint32_t docid_limit, TermFieldMatchData& tfmd, bool inverted)
{
if (inverted) {
return std::make_unique<typename IteratorType::template type<true, full_reset>>(bv, docid_limit, tfmd);
} else {
return std::make_unique<typename IteratorType::template type<false, full_reset>>(bv, docid_limit, tfmd);
}
}

template <typename IteratorType>
std::unique_ptr<queryeval::SearchIterator>
create_helper(const BitVector& bv, uint32_t docid_limit, TermFieldMatchData& tfmd, bool inverted, bool full_reset)
{
if (full_reset) {
return create_helper_helper<IteratorType, true>(bv, docid_limit, tfmd, inverted);
} else {
return create_helper_helper<IteratorType, false>(bv, docid_limit, tfmd, inverted);

}
}

}

queryeval::SearchIterator::UP
BitVectorIterator::create(const BitVector *const bv, TermFieldMatchData &matchData, bool strict, bool inverted)
{
return create(bv, bv->size(), matchData, strict, inverted);
return create(bv, bv->size(), matchData, strict, inverted, false);
}

queryeval::SearchIterator::UP
BitVectorIterator::create(const BitVector *const bv, uint32_t docIdLimit,
TermFieldMatchData &matchData, bool strict, bool inverted)
{
return create(bv, docIdLimit, matchData, strict, inverted, false);
}

queryeval::SearchIterator::UP
BitVectorIterator::create(const BitVector *const bv, uint32_t docid_limit,
TermFieldMatchData& tfmd, bool strict, bool inverted, bool full_reset)
{
if (bv == nullptr) {
return std::make_unique<queryeval::EmptySearch>();
} else if (strict) {
if (inverted) {
return std::make_unique<BitVectorIteratorStrictT<true>>(*bv, docIdLimit, matchData);
} else {
return std::make_unique<BitVectorIteratorStrictT<false>>(*bv, docIdLimit, matchData);
}
return create_helper<StrictIteratorType>(*bv, docid_limit, tfmd, inverted, full_reset);
} else {
if (inverted) {
return std::make_unique<BitVectorIteratorT<true>>(*bv, docIdLimit, matchData);
} else {
return std::make_unique<BitVectorIteratorT<false>>(*bv, docIdLimit, matchData);
}
return create_helper<IteratorType>(*bv, docid_limit, tfmd, inverted, full_reset);
}
}

Expand Down
7 changes: 3 additions & 4 deletions searchlib/src/vespa/searchlib/common/bitvectoriterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,10 @@ class BitVectorIterator : public queryeval::SearchIterator

uint32_t _docIdLimit;
const BitVector & _bv;
fef::TermFieldMatchData &_tfmd;
private:
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
void doUnpack(uint32_t docId) final {
_tfmd.resetOnlyDocId(docId);
}
BitVectorMeta asBitVector() const noexcept override { return {&_bv, _docIdLimit, isInverted()}; }
fef::TermFieldMatchData &_tfmd;
public:
virtual bool isInverted() const = 0;

Expand All @@ -33,6 +30,8 @@ class BitVectorIterator : public queryeval::SearchIterator
static UP create(const BitVector *const other, fef::TermFieldMatchData &matchData, bool strict, bool inverted = false);
static UP create(const BitVector *const other, uint32_t docIdLimit,
fef::TermFieldMatchData &matchData, bool strict, bool inverted = false);
static UP create(const BitVector *const other, uint32_t docIdLimit,
fef::TermFieldMatchData &matchData, bool strict, bool inverted, bool full_reset);
};

} // namespace search
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,13 @@ DiskTermBlueprint::createLeafSearch(const TermFieldMatchDataArray & tfmda) const
if (_bitvector_lookup_result.valid() && (_bitVector || tfmda[0]->isNotNeeded())) {
LOG(debug, "Return BitVectorIterator: %s, wordNum(%" PRIu64 "), docCount(%" PRIu64 ")",
getName(_field_index.get_field_id()).c_str(), _lookupRes.wordNum, _lookupRes.counts._numDocs);
return BitVectorIterator::create(get_bitvector(), *tfmda[0], strict());
auto bv = get_bitvector();
/*
* If bitvectors are used when _is_filter_field is false due to word being very common in this disk index
* then the term field match data needs a full reset during unpack to clear out values set during unpack
* from another iterator for the same term and another disk index or memory index.
*/
return BitVectorIterator::create(bv, bv->size(), *tfmda[0], strict(), false, !_is_filter_field);
}
auto search(_field_index.create_iterator(_lookupRes, _postingHandle, tfmda));
if (use_bitvector()) {
Expand Down

0 comments on commit e6a50fd

Please sign in to comment.