Skip to content

Commit

Permalink
Merge pull request #32950 from vespa-engine/toregge/log-posting-list-…
Browse files Browse the repository at this point in the history
…reads

Log posting list reads.
  • Loading branch information
geirst authored Nov 26, 2024
2 parents 6b3597f + 9545567 commit 36b8d1b
Show file tree
Hide file tree
Showing 11 changed files with 128 additions and 26 deletions.
11 changes: 11 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
LOG_SETUP(".diskindex.bitvectordictionary");

using search::index::BitVectorDictionaryLookupResult;
using search::index::PostingListFileRange;

namespace search::diskindex {

Expand Down Expand Up @@ -117,4 +118,14 @@ BitVectorDictionary::read_bitvector(BitVectorDictionaryLookupResult lookup_resul
return read_bitvector(lookup_result, read_stats);
}

PostingListFileRange
BitVectorDictionary::get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const
{
if (!lookup_result.valid()) {
return {0, 0};
}
uint64_t offset = ((uint64_t) _vectorSize) * lookup_result.idx + _datHeaderLen;
return {offset, offset + _vectorSize};
}

}
2 changes: 2 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "bitvectorkeyscope.h"
#include <vespa/searchlib/index/bitvector_dictionary_lookup_result.h>
#include <vespa/searchlib/index/posting_list_file_range.h>
#include <vespa/searchlib/index/bitvectorkeys.h>
#include <vespa/searchlib/common/tunefileinfo.h>
#include <string>
Expand Down Expand Up @@ -70,6 +71,7 @@ class BitVectorDictionary
std::unique_ptr<BitVector> read_bitvector(index::BitVectorDictionaryLookupResult lookup_result,
ReadStats &read_stats);
std::unique_ptr<BitVector> read_bitvector(index::BitVectorDictionaryLookupResult lookup_result);
index::PostingListFileRange get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const;

uint32_t getDocIdLimit() const noexcept { return _docIdLimit; }

Expand Down
34 changes: 34 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,46 @@ DiskTermBlueprint::DiskTermBlueprint(const FieldSpec & field,
_lookupRes.counts._numDocs == 0));
}

void
DiskTermBlueprint::log_bitvector_read() const
{
auto range = _field_index.get_bitvector_file_range(_bitvector_lookup_result);
LOG(debug, "DiskTermBlueprint::fetchPosting "
"bitvector %s %s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu32 " %" PRIu64 " %" PRIu64,
_field.getName().c_str(), _query_term.c_str(), _field_index.get_file_id(),
_lookupRes.wordNum, _lookupRes.counts._numDocs,
_bitvector_lookup_result.idx,
range.start_offset, range.size());

}

void
DiskTermBlueprint::log_posting_list_read() const
{
auto range = _field_index.get_posting_list_file_range(_lookupRes);
LOG(debug, "DiskTermBlueprint::fetchPosting "
"posting %s %s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64,
_field.getName().c_str(), _query_term.c_str(), _field_index.get_file_id(),
_lookupRes.wordNum, _lookupRes.counts._numDocs,
_lookupRes.bitOffset, _lookupRes.counts._bitLength,
range.start_offset, range.size());
}

void
DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo)
{
(void) execInfo;
if (!_fetchPostingsDone) {
if (_useBitVector && _bitvector_lookup_result.valid()) {
if (LOG_WOULD_LOG(debug)) [[unlikely]] {
log_bitvector_read();
}
_bitVector = _field_index.read_bit_vector(_bitvector_lookup_result);
}
if (!_bitVector) {
if (LOG_WOULD_LOG(debug)) [[unlikely]] {
log_posting_list_read();
}
_postingHandle = _field_index.read_posting_list(_lookupRes);
}
}
Expand All @@ -90,6 +121,9 @@ DiskTermBlueprint::get_bitvector() const
}
std::lock_guard guard(_mutex);
if (!_late_bitvector) {
if (LOG_WOULD_LOG(debug)) [[unlikely]] {
log_bitvector_read();
}
_late_bitvector = _field_index.read_bit_vector(_bitvector_lookup_result);
assert(_late_bitvector);
}
Expand Down
2 changes: 2 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint
mutable std::shared_ptr<BitVector> _late_bitvector;

const BitVector* get_bitvector() const;
void log_bitvector_read() const __attribute__((noinline));
void log_posting_list_read() const __attribute__((noinline));
public:
/**
* Create a new blueprint.
Expand Down
8 changes: 8 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/field_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking {
using DiskPostingFile = index::PostingListFileRandRead;
using DiskPostingFileReal = Zc4PosOccRandRead;
using DiskPostingFileDynamicKReal = ZcPosOccRandRead;
using PostingListFileRange = index::PostingListFileRange;

class LockedFieldIndexIoStats {
FieldIndexIoStats _stats;
Expand Down Expand Up @@ -76,17 +77,24 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking {
bool trim) const;
index::PostingListHandle read(const IPostingListCache::Key& key, IPostingListCache::Context& ctx) const override;
index::PostingListHandle read_posting_list(const search::index::DictionaryLookupResult& lookup_result) const;
PostingListFileRange get_posting_list_file_range(const search::index::DictionaryLookupResult& lookup_result) const {
return _posting_file->get_posting_list_file_range(lookup_result);
}
index::BitVectorDictionaryLookupResult lookup_bit_vector(const search::index::DictionaryLookupResult& lookup_result) const;
std::shared_ptr<BitVector> read_uncached_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const;
std::shared_ptr<BitVector> read(const IPostingListCache::BitVectorKey& key, IPostingListCache::Context& ctx) const override;
std::shared_ptr<BitVector> read_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const;
PostingListFileRange get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const {
return _bit_vector_dict->get_bitvector_file_range(lookup_result);
}
std::unique_ptr<search::queryeval::SearchIterator> create_iterator(const search::index::DictionaryLookupResult& lookup_result,
const index::PostingListHandle& handle,
const search::fef::TermFieldMatchDataArray& tfmda) const;
index::FieldLengthInfo get_field_length_info() const;

index::DictionaryFileRandRead* get_dictionary() noexcept { return _dict.get(); }
FieldIndexStats get_stats(bool clear_disk_io_stats) const;
uint64_t get_file_id() const noexcept { return _file_id; }
uint32_t get_field_id() const noexcept { return _field_id; }
bool is_posting_list_cache_enabled() const noexcept { return _posting_list_cache_enabled; }
};
Expand Down
59 changes: 34 additions & 25 deletions searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ using search::bitcompression::EGPosOccDecodeContext;
using search::bitcompression::EGPosOccDecodeContextCooked;
using search::bitcompression::PosOccFieldsParams;
using search::bitcompression::FeatureDecodeContext;
using search::index::DictionaryLookupResult;
using search::index::FieldLengthInfo;
using search::index::PostingListCounts;
using search::index::PostingListFileRange;
using search::index::PostingListHandle;
using search::ComprFileReadContext;

Expand All @@ -31,6 +33,17 @@ std::string myId4("Zc.4");
std::string myId5("Zc.5");
std::string interleaved_features("interleaved_features");

PostingListFileRange get_file_range(const DictionaryLookupResult& lookup_result, uint64_t header_bit_size)
{
uint64_t start_offset = (lookup_result.bitOffset + header_bit_size) >> 3;
// Align start at 64-bit boundary
start_offset -= (start_offset & 7);
uint64_t end_offset = (lookup_result.bitOffset + header_bit_size + lookup_result.counts._bitLength + 7) >> 3;
// Align end at 64-bit boundary
end_offset += (-end_offset & 7);
return {start_offset, end_offset};
}

}

namespace search::diskindex {
Expand Down Expand Up @@ -91,25 +104,18 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result)
return handle;
}

uint64_t startOffset = (lookup_result.bitOffset + _headerBitSize) >> 3;
// Align start at 64-bit boundary
startOffset -= (startOffset & 7);
uint64_t endOffset = (lookup_result.bitOffset + _headerBitSize +
lookup_result.counts._bitLength + 7) >> 3;
// Align end at 64-bit boundary
endOffset += (-endOffset & 7);

void *mapPtr = _file->MemoryMapPtr(startOffset);
auto file_range = get_file_range(lookup_result, _headerBitSize);
void *mapPtr = _file->MemoryMapPtr(file_range.start_offset);
if (mapPtr != nullptr) {
handle._mem = mapPtr;
size_t pad_before = startOffset - vespalib::round_down_to_page_boundary(startOffset);
handle._read_bytes = vespalib::round_up_to_page_size(pad_before + endOffset - startOffset + decode_prefetch_size);
size_t pad_before = file_range.start_offset - vespalib::round_down_to_page_boundary(file_range.start_offset);
handle._read_bytes = vespalib::round_up_to_page_size(pad_before + file_range.size() + decode_prefetch_size);
} else {
uint64_t vectorLen = endOffset - startOffset;
uint64_t vectorLen = file_range.size();
size_t padBefore;
size_t padAfter;
size_t padExtraAfter; // Decode prefetch space
_file->DirectIOPadding(startOffset, vectorLen, padBefore, padAfter);
_file->DirectIOPadding(file_range.start_offset, vectorLen, padBefore, padAfter);
padExtraAfter = 0;
if (padAfter < decode_prefetch_size) {
padExtraAfter = decode_prefetch_size - padAfter;
Expand All @@ -120,10 +126,10 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result)
if (mallocLen > 0) {
alignedBuffer = _file->AllocateDirectIOBuffer(mallocLen);
assert(alignedBuffer != nullptr);
assert(endOffset + padAfter + padExtraAfter <= _fileSize);
assert(file_range.end_offset + padAfter + padExtraAfter <= _fileSize);
_file->ReadBuf(alignedBuffer,
padBefore + vectorLen + padAfter,
startOffset - padBefore);
file_range.start_offset - padBefore);
}
// Zero decode prefetch memory to avoid uninitialized reads
if (padExtraAfter > 0) {
Expand All @@ -136,7 +142,7 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result)
handle._allocSize = mallocLen;
handle._read_bytes = padBefore + vectorLen + padAfter;
}
handle._bitOffsetMem = (startOffset << 3) - _headerBitSize;
handle._bitOffsetMem = (file_range.start_offset << 3) - _headerBitSize;
return handle;
}

Expand All @@ -147,14 +153,8 @@ ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult &looku
if (lookup_result.counts._bitLength == 0 || _memoryMapped) {
return;
}
uint64_t start_offset = (lookup_result.bitOffset + _headerBitSize) >> 3;
// Align start at 64-bit boundary
start_offset -= (start_offset & 7);
uint64_t end_offset = (lookup_result.bitOffset + _headerBitSize +
lookup_result.counts._bitLength + 7) >> 3;
// Align end at 64-bit boundary
end_offset += (-end_offset & 7);
size_t malloc_len = end_offset - start_offset + decode_prefetch_size;
auto file_range = get_file_range(lookup_result, _headerBitSize);
size_t malloc_len = file_range.size() + decode_prefetch_size;
if (handle._allocSize == malloc_len) {
assert(handle._allocMem.get() == handle._mem);
return;
Expand All @@ -169,7 +169,16 @@ ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult &looku
handle._allocMem = std::shared_ptr<void>(mem, free);
handle._mem = mem;
handle._allocSize = malloc_len;
handle._read_bytes = end_offset - start_offset;
handle._read_bytes = file_range.size();
}

PostingListFileRange
ZcPosOccRandRead::get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const
{
if (lookup_result.counts._bitLength == 0) {
return {0, 0};
}
return get_file_range(lookup_result, _headerBitSize);
}

bool
Expand Down
2 changes: 2 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead

using DictionaryLookupResult = index::DictionaryLookupResult;
using PostingListCounts = index::PostingListCounts;
using PostingListFileRange = index::PostingListFileRange;
using PostingListHandle = index::PostingListHandle;

/**
Expand All @@ -46,6 +47,7 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead
PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override;
void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle,
double bloat_factor) const override;
PostingListFileRange get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const override;

bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override;
bool close() override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace search::index {
class BitVectorDictionaryLookupResult {
public:
static constexpr uint32_t invalid = std::numeric_limits<uint32_t>::max();
uint64_t idx;
uint32_t idx;

explicit BitVectorDictionaryLookupResult(uint32_t idx_in) noexcept
: idx(idx_in)
Expand Down
24 changes: 24 additions & 0 deletions searchlib/src/vespa/searchlib/index/posting_list_file_range.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include <cstdint>

namespace search::index {

/*
* Range of a posting list file used for posting list. Might include padding
* at start and end due to file format. Offsets are in bytes.
*/
struct PostingListFileRange {
uint64_t start_offset;
uint64_t end_offset;

PostingListFileRange(uint64_t start_offset_in, uint64_t end_offset_in)
: start_offset(start_offset_in),
end_offset(end_offset_in)
{
}
uint64_t size() const noexcept { return end_offset - start_offset; }
};

}
6 changes: 6 additions & 0 deletions searchlib/src/vespa/searchlib/index/postinglistfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,12 @@ PostingListFileRandReadPassThrough::consider_trim_posting_list(const DictionaryL
return _lower->consider_trim_posting_list(lookup_result, handle, bloat_factor);
}

PostingListFileRange
PostingListFileRandReadPassThrough::get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const
{
return _lower->get_posting_list_file_range(lookup_result);
}

bool
PostingListFileRandReadPassThrough::open(const std::string &name,
const TuneFileRandRead &tuneFileRead)
Expand Down
4 changes: 4 additions & 0 deletions searchlib/src/vespa/searchlib/index/postinglistfile.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#pragma once

#include "dictionary_lookup_result.h"
#include "posting_list_file_range.h"
#include "postinglistcounts.h"
#include "postinglisthandle.h"
#include <vespa/searchlib/common/tunefileinfo.h>
Expand Down Expand Up @@ -167,6 +168,8 @@ class PostingListFileRandRead {
virtual void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle,
double bloat_factor) const = 0;

virtual PostingListFileRange get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const = 0;

/**
* Open posting list file for random read.
*/
Expand Down Expand Up @@ -206,6 +209,7 @@ class PostingListFileRandReadPassThrough : public PostingListFileRandRead {
PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override;
void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle,
double bloat_factor) const override;
PostingListFileRange get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const override;

bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override;
bool close() override;
Expand Down

0 comments on commit 36b8d1b

Please sign in to comment.