Skip to content

Commit

Permalink
Merge pull request #25688 from vespa-engine/balder/also-handle-non-ov…
Browse files Browse the repository at this point in the history
…erlapping-bitvectors

Balder/also handle non overlapping bitvectors
  • Loading branch information
arnej27959 authored Jan 23, 2023
2 parents 0137c3f + 4b6f692 commit 0c6c77b
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 48 deletions.
36 changes: 34 additions & 2 deletions searchlib/src/tests/common/bitvector/bitvector_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
#include <vespa/vespalib/test/memory_allocator_observer.h>
#include <vespa/vespalib/util/rand48.h>
#include <vespa/vespalib/util/exceptions.h>
#include <algorithm>

using namespace search;
Expand Down Expand Up @@ -135,7 +136,7 @@ assertBV(const std::string & exp, const BitVector & act)
bool res1 = EXPECT_EQUAL(exp, toString(act));
search::fef::TermFieldMatchData f;
queryeval::SearchIterator::UP it(BitVectorIterator::create(&act, f, true));
BitVectorIterator & b(dynamic_cast<BitVectorIterator &>(*it));
auto & b(dynamic_cast<BitVectorIterator &>(*it));
bool res2 = EXPECT_EQUAL(exp, toString(b));
return res1 && res2;
}
Expand Down Expand Up @@ -295,6 +296,15 @@ TEST("requireThatSequentialOperationsOnPartialWorks")
EXPECT_EQUAL(5u, p2.countTrueBits());
p2.orWith(full);
EXPECT_EQUAL(202u, p2.countTrueBits());

AllocatedBitVector before(100);
before.setInterval(0, 100);
p2.orWith(before);
EXPECT_EQUAL(202u, p2.countTrueBits());

PartialBitVector after(1000, 1100);
after.setInterval(1000, 1100);
EXPECT_EQUAL(202u, p2.countTrueBits());
}

TEST("requireThatInitRangeStaysWithinBounds") {
Expand Down Expand Up @@ -342,7 +352,26 @@ verifyThatLongerWithShorterWorksAsZeroPadded(uint32_t offset, uint32_t sz1, uint
func(*aLarger2, *bSmall);
func(*aLarger3, *bEmpty);
EXPECT_TRUE(*aLarger == *aLarger2);
//EXPECT_TRUE(*aLarger == *aLarger3);
}

template<typename Func>
void
verifyNonOverlappingWorksAsZeroPadded(bool clear, Func func) {
constexpr size_t CNT = 34;
BitVector::UP left = createEveryNthBitSet(3, 1000, 100);
BitVector::UP right = createEveryNthBitSet(3, 2000, 100);
EXPECT_EQUAL(CNT, left->countTrueBits());
EXPECT_EQUAL(CNT, right->countTrueBits());
func(*left, *right);
EXPECT_EQUAL(clear ? 0 : CNT, left->countTrueBits());
EXPECT_EQUAL(CNT, right->countTrueBits());
left = createEveryNthBitSet(3, 1000, 100);
right = createEveryNthBitSet(3, 2000, 100);
EXPECT_EQUAL(CNT, left->countTrueBits());
EXPECT_EQUAL(CNT, right->countTrueBits());
func(*right, *left);
EXPECT_EQUAL(CNT, left->countTrueBits());
EXPECT_EQUAL(clear ? 0 : CNT, right->countTrueBits());
}

TEST("requireThatAndWorks") {
Expand All @@ -351,6 +380,7 @@ TEST("requireThatAndWorks") {
verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3,
[](BitVector & a, const BitVector & b) { a.andWith(b); });
}
verifyNonOverlappingWorksAsZeroPadded(true, [](BitVector & a, const BitVector & b) { a.andWith(b); });
}

TEST("requireThatOrWorks") {
Expand All @@ -359,6 +389,7 @@ TEST("requireThatOrWorks") {
verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3,
[](BitVector & a, const BitVector & b) { a.orWith(b); });
}
verifyNonOverlappingWorksAsZeroPadded(false, [](BitVector & a, const BitVector & b) { a.orWith(b); });
}


Expand All @@ -368,6 +399,7 @@ TEST("requireThatAndNotWorks") {
verifyThatLongerWithShorterWorksAsZeroPadded(offset, offset+256, offset+256 + offset + 3,
[](BitVector & a, const BitVector & b) { a.andNotWith(b); });
}
verifyNonOverlappingWorksAsZeroPadded(false, [](BitVector & a, const BitVector & b) { a.andNotWith(b); });
}

TEST("test that empty bitvectors does not crash") {
Expand Down
62 changes: 29 additions & 33 deletions searchlib/src/vespa/searchlib/common/bitvector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,6 @@ using vespalib::alloc::Alloc;

namespace {

void verifyInclusiveStart(const search::BitVector & a, const search::BitVector & b) __attribute__((noinline));

void verifyInclusiveStart(const search::BitVector & a, const search::BitVector & b)
{
if (a.getStartIndex() < b.getStartIndex()) {
throw IllegalArgumentException(make_string("[%d, %d] starts before which is not allowed currently [%d, %d]",
a.getStartIndex(), a.size(), b.getStartIndex(), b.size()),
VESPA_STRLOC);
}
}

constexpr size_t MMAP_LIMIT = 256_Mi;

}
Expand Down Expand Up @@ -90,10 +79,15 @@ void
BitVector::clearInterval(Index start, Index end)
{
clearIntervalNoInvalidation(Range(start, end));

invalidateCachedCount();
}

void
BitVector::store(Word &word, Word value) {
assert(!_enable_range_check || ((&word >= getActiveStart()) && (&word < (getActiveStart() + numActiveWords()))));
return store_unchecked(word, value);
}

void
BitVector::clearIntervalNoInvalidation(Range range_in)
{
Expand All @@ -107,7 +101,7 @@ BitVector::clearIntervalNoInvalidation(Range range_in)
if (endw > startw) {
store(_words[startw], _words[startw] & startBits(range.start()));
for (Index i = startw + 1; i < endw; ++i) {
store(_words[i], 0);
store_unchecked(_words[i], 0);
}
store(_words[endw], _words[endw] & endBits(last));
} else {
Expand All @@ -128,7 +122,7 @@ BitVector::setInterval(Index start_in, Index end_in)
if (endw > startw) {
store(_words[startw], _words[startw] | checkTab(range.start()));
for (Index i = startw + 1; i < endw; ++i) {
store(_words[i], allBits());
store_unchecked(_words[i], allBits());
}
store(_words[endw], _words[endw] | ~endBits(last));
} else {
Expand Down Expand Up @@ -187,19 +181,18 @@ BitVector::countInterval(Range range_in) const
void
BitVector::orWith(const BitVector & right)
{
verifyInclusiveStart(*this, right);
Range range = sanitize(right.range());
if ( ! range.validNonZero()) return;

if (right.size() < size()) {
if (right.size() > 0) {
ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word);
if (commonBytes > 0) {
IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
}
Index last(right.size() - 1);
store(getWordIndex(last)[0], getWordIndex(last)[0] | (load(right.getWordIndex(last)[0]) & ~endBits(last)));
ssize_t commonBytes = numActiveBytes(range.start(), range.end()) - sizeof(Word);
if (commonBytes > 0) {
IAccelrated::getAccelerator().orBit(getWordIndex(range.start()), right.getWordIndex(range.start()), commonBytes);
}
Index last(range.end() - 1);
store(getWordIndex(last)[0], getWordIndex(last)[0] | (load(right.getWordIndex(last)[0]) & ~endBits(last)));
} else {
IAccelrated::getAccelerator().orBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
IAccelrated::getAccelerator().orBit(getWordIndex(range.start()), right.getWordIndex(range.start()), getActiveBytes());
}
repairEnds();
invalidateCachedCount();
Expand All @@ -221,7 +214,11 @@ BitVector::repairEnds()
void
BitVector::andWith(const BitVector & right)
{
verifyInclusiveStart(*this, right);
Range range = sanitize(right.range());
if ( ! range.validNonZero()) {
clear();
return;
}

uint32_t commonBytes = std::min(getActiveBytes(), numActiveBytes(getStartIndex(), right.size()));
IAccelrated::getAccelerator().andBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
Expand All @@ -237,19 +234,18 @@ BitVector::andWith(const BitVector & right)
void
BitVector::andNotWith(const BitVector& right)
{
verifyInclusiveStart(*this, right);
Range range = sanitize(right.range());
if ( ! range.validNonZero()) return;

if (right.size() < size()) {
if (right.size() > 0) {
ssize_t commonBytes = numActiveBytes(getStartIndex(), right.size()) - sizeof(Word);
if (commonBytes > 0) {
IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), commonBytes);
}
Index last(right.size() - 1);
store(getWordIndex(last)[0], getWordIndex(last)[0] & ~(load(right.getWordIndex(last)[0]) & ~endBits(last)));
ssize_t commonBytes = numActiveBytes(range.start(), range.end()) - sizeof(Word);
if (commonBytes > 0) {
IAccelrated::getAccelerator().andNotBit(getWordIndex(range.start()), right.getWordIndex(range.start()), commonBytes);
}
Index last(range.end() - 1);
store(getWordIndex(last)[0], getWordIndex(last)[0] & ~(load(right.getWordIndex(last)[0]) & ~endBits(last)));
} else {
IAccelrated::getAccelerator().andNotBit(getActiveStart(), right.getWordIndex(getStartIndex()), getActiveBytes());
IAccelrated::getAccelerator().andNotBit(getWordIndex(range.start()), right.getWordIndex(range.start()), getActiveBytes());
}

repairEnds();
Expand Down
28 changes: 16 additions & 12 deletions searchlib/src/vespa/searchlib/common/bitvector.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ class BitVector : protected BitWord
using UP = std::unique_ptr<BitVector>;
class Range {
public:
Range(Index start_in, Index end_in) : _start(start_in), _end(end_in) {}
Index start() const { return _start; }
Index end() const { return _end; }
bool validNonZero() const { return _end > _start; }
Range(Index start_in, Index end_in) noexcept : _start(start_in), _end(end_in) {}
[[nodiscard]] Index start() const noexcept { return _start; }
[[nodiscard]] Index end() const noexcept { return _end; }
[[nodiscard]] bool validNonZero() const noexcept { return _end > _start; }
private:
Index _start;
Index _end;
Expand All @@ -42,6 +42,7 @@ class BitVector : protected BitWord
bool operator == (const BitVector &right) const;
const void * getStart() const { return _words; }
void * getStart() { return _words; }
Range range() const noexcept { return {getStartIndex(), size()}; }
Index size() const { return vespalib::atomic::load_ref_relaxed(_sz); }
Index sizeBytes() const { return numBytes(getActiveSize()); }
bool testBit(Index idx) const {
Expand Down Expand Up @@ -144,15 +145,15 @@ class BitVector : protected BitWord
vespalib::atomic::store_ref_release(_sz, sz);
}
void set_bit_no_range_check(Index idx) {
store(_words[wordNum(idx)], _words[wordNum(idx)] | mask(idx));
store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] | mask(idx));
}
void clear_bit_no_range_check(Index idx) {
store(_words[wordNum(idx)], _words[wordNum(idx)] & ~ mask(idx));
store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] & ~ mask(idx));
}
void flip_bit_no_range_check(Index idx) {
store(_words[wordNum(idx)], _words[wordNum(idx)] ^ mask(idx));
store_unchecked(_words[wordNum(idx)], _words[wordNum(idx)] ^ mask(idx));
}
void range_check(Index idx) {
void range_check(Index idx) const {
assert(!_enable_range_check || (idx >= _startOffset && idx < _sz));
}
void setBit(Index idx) {
Expand Down Expand Up @@ -301,8 +302,11 @@ class BitVector : protected BitWord
static Alloc allocatePaddedAndAligned(Index start, Index end, Index capacity, const Alloc* init_alloc = nullptr);

private:
Word load(const Word &word) const { return vespalib::atomic::load_ref_relaxed(word); }
void store(Word &word, Word value) { return vespalib::atomic::store_ref_relaxed(word, value); }
static Word load(const Word &word) { return vespalib::atomic::load_ref_relaxed(word); }
VESPA_DLL_LOCAL void store(Word &word, Word value);
static void store_unchecked(Word &word, Word value) {
return vespalib::atomic::store_ref_relaxed(word, value);
}
friend PartialBitVector;
const Word * getWordIndex(Index index) const { return static_cast<const Word *>(getStart()) + wordNum(index); }
Word * getWordIndex(Index index) { return static_cast<Word *>(getStart()) + wordNum(index); }
Expand Down Expand Up @@ -330,8 +334,8 @@ class BitVector : protected BitWord
}
VESPA_DLL_LOCAL void repairEnds();
Range sanitize(Range range) const {
return Range(std::max(range.start(), getStartIndex()),
std::min(range.end(), size()));
return {std::max(range.start(), getStartIndex()),
std::min(range.end(), size())};
}
Index count() const;
bool hasTrueBitsInternal() const;
Expand Down
2 changes: 1 addition & 1 deletion searchlib/src/vespa/searchlib/common/partialbitvector.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace search {

/**
* search::PartialBitVector is a bitvector that is only represents 1 part
* of the full space. All operations concerning the whole vector while only
* of the full space. All operations concerning the whole vector will only
* be conducted on this smaller area.
*/
class PartialBitVector : public BitVector
Expand Down

0 comments on commit 0c6c77b

Please sign in to comment.