diff --git a/searchlib/src/tests/common/bitvector/bitvector_test.cpp b/searchlib/src/tests/common/bitvector/bitvector_test.cpp index 7a26202682b7..9afc46018018 100644 --- a/searchlib/src/tests/common/bitvector/bitvector_test.cpp +++ b/searchlib/src/tests/common/bitvector/bitvector_test.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include using namespace search; @@ -135,7 +136,7 @@ assertBV(const std::string & exp, const BitVector & act) bool res1 = EXPECT_EQUAL(exp, toString(act)); search::fef::TermFieldMatchData f; queryeval::SearchIterator::UP it(BitVectorIterator::create(&act, f, true)); - BitVectorIterator & b(dynamic_cast(*it)); + auto & b(dynamic_cast(*it)); bool res2 = EXPECT_EQUAL(exp, toString(b)); return res1 && res2; } @@ -295,6 +296,15 @@ TEST("requireThatSequentialOperationsOnPartialWorks") EXPECT_EQUAL(5u, p2.countTrueBits()); p2.orWith(full); EXPECT_EQUAL(202u, p2.countTrueBits()); + + AllocatedBitVector before(100); + before.setInterval(0, 100); + p2.orWith(before); + EXPECT_EQUAL(202u, p2.countTrueBits()); + + PartialBitVector after(1000, 1100); + after.setInterval(1000, 1100); + EXPECT_EQUAL(202u, p2.countTrueBits()); } TEST("requireThatInitRangeStaysWithinBounds") { @@ -342,7 +352,26 @@ verifyThatLongerWithShorterWorksAsZeroPadded(uint32_t offset, uint32_t sz1, uint func(*aLarger2, *bSmall); func(*aLarger3, *bEmpty); EXPECT_TRUE(*aLarger == *aLarger2); - //EXPECT_TRUE(*aLarger == *aLarger3); +} + +template +void +verifyNonOverlappingWorksAsZeroPadded(bool clear, Func func) { + constexpr size_t CNT = 34; + BitVector::UP left = createEveryNthBitSet(3, 1000, 100); + BitVector::UP right = createEveryNthBitSet(3, 2000, 100); + EXPECT_EQUAL(CNT, left->countTrueBits()); + EXPECT_EQUAL(CNT, right->countTrueBits()); + func(*left, *right); + EXPECT_EQUAL(clear ? 0 : CNT, left->countTrueBits()); + EXPECT_EQUAL(CNT, right->countTrueBits()); + left = createEveryNthBitSet(3, 1000, 100); + right = createEveryNthBitSet(3, 2000, 100); + EXPECT_EQUAL(CNT, left->countTrueBits()); + EXPECT_EQUAL(CNT, right->countTrueBits()); + func(*right, *left); + EXPECT_EQUAL(CNT, left->countTrueBits()); + EXPECT_EQUAL(clear ? 0 : CNT, right->countTrueBits()); } TEST("requireThatAndWorks") { @@ -351,6 +380,7 @@ TEST("requireThatAndWorks") { verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3, [](BitVector & a, const BitVector & b) { a.andWith(b); }); } + verifyNonOverlappingWorksAsZeroPadded(true, [](BitVector & a, const BitVector & b) { a.andWith(b); }); } TEST("requireThatOrWorks") { @@ -359,6 +389,7 @@ TEST("requireThatOrWorks") { verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3, [](BitVector & a, const BitVector & b) { a.orWith(b); }); } + verifyNonOverlappingWorksAsZeroPadded(false, [](BitVector & a, const BitVector & b) { a.orWith(b); }); } @@ -368,6 +399,7 @@ TEST("requireThatAndNotWorks") { verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3, [](BitVector & a, const BitVector & b) { a.andNotWith(b); }); } + verifyNonOverlappingWorksAsZeroPadded(false, [](BitVector & a, const BitVector & b) { a.andNotWith(b); }); } TEST("test that empty bitvectors does not crash") { diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp index 47ca590c9bce..cba3ef4f842f 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.cpp +++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp @@ -23,17 +23,6 @@ using vespalib::alloc::Alloc; namespace { -void verifyInclusiveStart(const search::BitVector & a, const search::BitVector & b) __attribute__((noinline)); - -void verifyInclusiveStart(const search::BitVector & a, const search::BitVector & b) -{ - if (a.getStartIndex() < b.getStartIndex()) { - throw IllegalArgumentException(make_string("[%d, %d] starts before which is not allowed currently [%d, %d]", - a.getStartIndex(), a.size(), b.getStartIndex(), b.size()), - VESPA_STRLOC); - } -} - constexpr size_t MMAP_LIMIT = 256_Mi; } @@ -90,10 +79,15 @@ void BitVector::clearInterval(Index start, Index end) { clearIntervalNoInvalidation(Range(start, end)); - invalidateCachedCount(); } +void +BitVector::store(Word &word, Word value) { + assert(!_enable_range_check || ((&word >= getActiveStart()) && (&word < (getActiveStart() + numActiveWords())))); + return store_unchecked(word, value); +} + void BitVector::clearIntervalNoInvalidation(Range range_in) { @@ -107,7 +101,7 @@ BitVector::clearIntervalNoInvalidation(Range range_in) if (endw > startw) { store(_words[startw], _words[startw] & startBits(range.start())); for (Index i = startw + 1; i < endw; ++i) { - store(_words[i], 0); + store_unchecked(_words[i], 0); } store(_words[endw], _words[endw] & endBits(last)); } else { @@ -128,7 +122,7 @@ BitVector::setInterval(Index start_in, Index end_in) if (endw > startw) { store(_words[startw], _words[startw] | checkTab(range.start())); for (Index i = startw + 1; i < endw; ++i) { - store(_words[i], allBits()); + store_unchecked(_words[i], allBits()); } store(_words[endw], _words[endw] | ~endBits(last)); } else { @@ -187,19 +181,18 @@ BitVector::countInterval(Range range_in) const void BitVector::orWith(const BitVector & right) { - verifyInclusiveStart(*this, right); + Range range = sanitize(right.range()); + if ( ! range.validNonZero()) return; if (right.size() < size()) { - if (right.size() > 0) { - ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word); - if (commonBytes > 0) { - IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); - } - Index last(right.size() - 1); - store(getWordIndex(last)[0], getWordIndex(last)[0] | (load(right.getWordIndex(last)[0]) & ~endBits(last))); + ssize_t commonBytes = numActiveBytes(range.start(), range.end()) - sizeof(Word); + if (commonBytes > 0) { + IAccelrated::getAccelerator().orBit(getWordIndex(range.start()), right.getWordIndex(range.start()), commonBytes); } + Index last(range.end() - 1); + store(getWordIndex(last)[0], getWordIndex(last)[0] | (load(right.getWordIndex(last)[0]) & ~endBits(last))); } else { - IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); + IAccelrated::getAccelerator().orBit(getWordIndex(range.start()), right.getWordIndex(range.start()), getActiveBytes()); } repairEnds(); invalidateCachedCount(); @@ -221,7 +214,11 @@ BitVector::repairEnds() void BitVector::andWith(const BitVector & right) { - verifyInclusiveStart(*this, right); + Range range = sanitize(right.range()); + if ( ! range.validNonZero()) { + clear(); + return; + } uint32_t commonBytes = std::min(getActiveBytes(), numActiveBytes(getStartIndex(), right.size())); IAccelrated::getAccelerator().andBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); @@ -237,19 +234,18 @@ BitVector::andWith(const BitVector & right) void BitVector::andNotWith(const BitVector& right) { - verifyInclusiveStart(*this, right); + Range range = sanitize(right.range()); + if ( ! range.validNonZero()) return; if (right.size() < size()) { - if (right.size() > 0) { - ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word); - if (commonBytes > 0) { - IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes); - } - Index last(right.size() - 1); - store(getWordIndex(last)[0], getWordIndex(last)[0] & ~(load(right.getWordIndex(last)[0]) & ~endBits(last))); + ssize_t commonBytes = numActiveBytes(range.start(), range.end()) - sizeof(Word); + if (commonBytes > 0) { + IAccelrated::getAccelerator().andNotBit(getWordIndex(range.start()), right.getWordIndex(range.start()), commonBytes); } + Index last(range.end() - 1); + store(getWordIndex(last)[0], getWordIndex(last)[0] & ~(load(right.getWordIndex(last)[0]) & ~endBits(last))); } else { - IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes()); + IAccelrated::getAccelerator().andNotBit(getWordIndex(range.start()), right.getWordIndex(range.start()), getActiveBytes()); } repairEnds(); diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h index 1d289435f6d2..67f3e2ad502f 100644 --- a/searchlib/src/vespa/searchlib/common/bitvector.h +++ b/searchlib/src/vespa/searchlib/common/bitvector.h @@ -28,10 +28,10 @@ class BitVector : protected BitWord using UP = std::unique_ptr; class Range { public: - Range(Index start_in, Index end_in) : _start(start_in), _end(end_in) {} - Index start() const { return _start; } - Index end() const { return _end; } - bool validNonZero() const { return _end > _start; } + Range(Index start_in, Index end_in) noexcept : _start(start_in), _end(end_in) {} + [[nodiscard]] Index start() const noexcept { return _start; } + [[nodiscard]] Index end() const noexcept { return _end; } + [[nodiscard]] bool validNonZero() const noexcept { return _end > _start; } private: Index _start; Index _end; @@ -42,6 +42,7 @@ class BitVector : protected BitWord bool operator == (const BitVector &right) const; const void * getStart() const { return _words; } void * getStart() { return _words; } + Range range() const noexcept { return {getStartIndex(), size()}; } Index size() const { return vespalib::atomic::load_ref_relaxed(_sz); } Index sizeBytes() const { return numBytes(getActiveSize()); } bool testBit(Index idx) const { @@ -144,15 +145,15 @@ class BitVector : protected BitWord vespalib::atomic::store_ref_release(_sz, sz); } void set_bit_no_range_check(Index idx) { - store(_words[wordNum(idx)], _words[wordNum(idx)] | mask(idx)); + store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] | mask(idx)); } void clear_bit_no_range_check(Index idx) { - store(_words[wordNum(idx)], _words[wordNum(idx)] & ~ mask(idx)); + store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] & ~ mask(idx)); } void flip_bit_no_range_check(Index idx) { - store(_words[wordNum(idx)], _words[wordNum(idx)] ^ mask(idx)); + store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] ^ mask(idx)); } - void range_check(Index idx) { + void range_check(Index idx) const { assert(!_enable_range_check || (idx >= _startOffset && idx < _sz)); } void setBit(Index idx) { @@ -301,8 +302,11 @@ class BitVector : protected BitWord static Alloc allocatePaddedAndAligned(Index start, Index end, Index capacity, const Alloc* init_alloc = nullptr); private: - Word load(const Word &word) const { return vespalib::atomic::load_ref_relaxed(word); } - void store(Word &word, Word value) { return vespalib::atomic::store_ref_relaxed(word, value); } + static Word load(const Word &word) { return vespalib::atomic::load_ref_relaxed(word); } + VESPA_DLL_LOCAL void store(Word &word, Word value); + static void store_unchecked(Word &word, Word value) { + return vespalib::atomic::store_ref_relaxed(word, value); + } friend PartialBitVector; const Word * getWordIndex(Index index) const { return static_cast(getStart()) + wordNum(index); } Word * getWordIndex(Index index) { return static_cast(getStart()) + wordNum(index); } @@ -330,8 +334,8 @@ class BitVector : protected BitWord } VESPA_DLL_LOCAL void repairEnds(); Range sanitize(Range range) const { - return Range(std::max(range.start(), getStartIndex()), - std::min(range.end(), size())); + return {std::max(range.start(), getStartIndex()), + std::min(range.end(), size())}; } Index count() const; bool hasTrueBitsInternal() const; diff --git a/searchlib/src/vespa/searchlib/common/partialbitvector.h b/searchlib/src/vespa/searchlib/common/partialbitvector.h index d1baa22f9a1a..4fb5c3c0d18c 100644 --- a/searchlib/src/vespa/searchlib/common/partialbitvector.h +++ b/searchlib/src/vespa/searchlib/common/partialbitvector.h @@ -8,7 +8,7 @@ namespace search { /** * search::PartialBitVector is a bitvector that is only represents 1 part - * of the full space. All operations concerning the whole vector while only + * of the full space. All operations concerning the whole vector will only * be conducted on this smaller area. */ class PartialBitVector : public BitVector