diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp index e9dce0f06e5..7486ba0ece9 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp @@ -12,6 +12,7 @@ LOG_SETUP(".diskindex.bitvectordictionary"); using search::index::BitVectorDictionaryLookupResult; +using search::index::PostingListFileRange; namespace search::diskindex { @@ -117,4 +118,14 @@ BitVectorDictionary::read_bitvector(BitVectorDictionaryLookupResult lookup_resul return read_bitvector(lookup_result, read_stats); } +PostingListFileRange +BitVectorDictionary::get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const +{ + if (!lookup_result.valid()) { + return {0, 0}; + } + uint64_t offset = ((uint64_t) _vectorSize) * lookup_result.idx + _datHeaderLen; + return {offset, offset + _vectorSize}; +} + } diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h index 76f8c5d039b..07b1bb5184f 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.h @@ -3,6 +3,7 @@ #include "bitvectorkeyscope.h" #include +#include #include #include #include @@ -70,6 +71,7 @@ class BitVectorDictionary std::unique_ptr read_bitvector(index::BitVectorDictionaryLookupResult lookup_result, ReadStats &read_stats); std::unique_ptr read_bitvector(index::BitVectorDictionaryLookupResult lookup_result); + index::PostingListFileRange get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const; uint32_t getDocIdLimit() const noexcept { return _docIdLimit; } diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp index f6ccd089410..d83d48fced4 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.cpp @@ -60,15 +60,46 @@ DiskTermBlueprint::DiskTermBlueprint(const FieldSpec & field, _lookupRes.counts._numDocs == 0)); } +void +DiskTermBlueprint::log_bitvector_read() const +{ + auto range = _field_index.get_bitvector_file_range(_bitvector_lookup_result); + LOG(debug, "DiskTermBlueprint::fetchPosting " + "bitvector %s %s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu32 " %" PRIu64 " %" PRIu64, + _field.getName().c_str(), _query_term.c_str(), _field_index.get_file_id(), + _lookupRes.wordNum, _lookupRes.counts._numDocs, + _bitvector_lookup_result.idx, + range.start_offset, range.size()); + +} + +void +DiskTermBlueprint::log_posting_list_read() const +{ + auto range = _field_index.get_posting_list_file_range(_lookupRes); + LOG(debug, "DiskTermBlueprint::fetchPosting " + "posting %s %s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64, + _field.getName().c_str(), _query_term.c_str(), _field_index.get_file_id(), + _lookupRes.wordNum, _lookupRes.counts._numDocs, + _lookupRes.bitOffset, _lookupRes.counts._bitLength, + range.start_offset, range.size()); +} + void DiskTermBlueprint::fetchPostings(const queryeval::ExecuteInfo &execInfo) { (void) execInfo; if (!_fetchPostingsDone) { if (_useBitVector && _bitvector_lookup_result.valid()) { + if (LOG_WOULD_LOG(debug)) [[unlikely]] { + log_bitvector_read(); + } _bitVector = _field_index.read_bit_vector(_bitvector_lookup_result); } if (!_bitVector) { + if (LOG_WOULD_LOG(debug)) [[unlikely]] { + log_posting_list_read(); + } _postingHandle = _field_index.read_posting_list(_lookupRes); } } @@ -90,6 +121,9 @@ DiskTermBlueprint::get_bitvector() const } std::lock_guard guard(_mutex); if (!_late_bitvector) { + if (LOG_WOULD_LOG(debug)) [[unlikely]] { + log_bitvector_read(); + } _late_bitvector = _field_index.read_bit_vector(_bitvector_lookup_result); assert(_late_bitvector); } diff --git a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h index 1eb20f72f86..bffdb54df4c 100644 --- a/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h +++ b/searchlib/src/vespa/searchlib/diskindex/disktermblueprint.h @@ -26,6 +26,8 @@ class DiskTermBlueprint : public queryeval::SimpleLeafBlueprint mutable std::shared_ptr _late_bitvector; const BitVector* get_bitvector() const; + void log_bitvector_read() const __attribute__((noinline)); + void log_posting_list_read() const __attribute__((noinline)); public: /** * Create a new blueprint. diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.h b/searchlib/src/vespa/searchlib/diskindex/field_index.h index 6aaf09113c7..303213ab452 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.h +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.h @@ -24,6 +24,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { using DiskPostingFile = index::PostingListFileRandRead; using DiskPostingFileReal = Zc4PosOccRandRead; using DiskPostingFileDynamicKReal = ZcPosOccRandRead; + using PostingListFileRange = index::PostingListFileRange; class LockedFieldIndexIoStats { FieldIndexIoStats _stats; @@ -76,10 +77,16 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { bool trim) const; index::PostingListHandle read(const IPostingListCache::Key& key, IPostingListCache::Context& ctx) const override; index::PostingListHandle read_posting_list(const search::index::DictionaryLookupResult& lookup_result) const; + PostingListFileRange get_posting_list_file_range(const search::index::DictionaryLookupResult& lookup_result) const { + return _posting_file->get_posting_list_file_range(lookup_result); + } index::BitVectorDictionaryLookupResult lookup_bit_vector(const search::index::DictionaryLookupResult& lookup_result) const; std::shared_ptr read_uncached_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const; std::shared_ptr read(const IPostingListCache::BitVectorKey& key, IPostingListCache::Context& ctx) const override; std::shared_ptr read_bit_vector(index::BitVectorDictionaryLookupResult lookup_result) const; + PostingListFileRange get_bitvector_file_range(index::BitVectorDictionaryLookupResult lookup_result) const { + return _bit_vector_dict->get_bitvector_file_range(lookup_result); + } std::unique_ptr create_iterator(const search::index::DictionaryLookupResult& lookup_result, const index::PostingListHandle& handle, const search::fef::TermFieldMatchDataArray& tfmda) const; @@ -87,6 +94,7 @@ class FieldIndex : public IPostingListCache::IPostingListFileBacking { index::DictionaryFileRandRead* get_dictionary() noexcept { return _dict.get(); } FieldIndexStats get_stats(bool clear_disk_io_stats) const; + uint64_t get_file_id() const noexcept { return _file_id; } uint32_t get_field_id() const noexcept { return _field_id; } bool is_posting_list_cache_enabled() const noexcept { return _posting_list_cache_enabled; } }; diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp index 233a144b39a..09a4e072f77 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp @@ -20,8 +20,10 @@ using search::bitcompression::EGPosOccDecodeContext; using search::bitcompression::EGPosOccDecodeContextCooked; using search::bitcompression::PosOccFieldsParams; using search::bitcompression::FeatureDecodeContext; +using search::index::DictionaryLookupResult; using search::index::FieldLengthInfo; using search::index::PostingListCounts; +using search::index::PostingListFileRange; using search::index::PostingListHandle; using search::ComprFileReadContext; @@ -31,6 +33,17 @@ std::string myId4("Zc.4"); std::string myId5("Zc.5"); std::string interleaved_features("interleaved_features"); +PostingListFileRange get_file_range(const DictionaryLookupResult& lookup_result, uint64_t header_bit_size) +{ + uint64_t start_offset = (lookup_result.bitOffset + header_bit_size) >> 3; + // Align start at 64-bit boundary + start_offset -= (start_offset & 7); + uint64_t end_offset = (lookup_result.bitOffset + header_bit_size + lookup_result.counts._bitLength + 7) >> 3; + // Align end at 64-bit boundary + end_offset += (-end_offset & 7); + return {start_offset, end_offset}; +} + } namespace search::diskindex { @@ -91,25 +104,18 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) return handle; } - uint64_t startOffset = (lookup_result.bitOffset + _headerBitSize) >> 3; - // Align start at 64-bit boundary - startOffset -= (startOffset & 7); - uint64_t endOffset = (lookup_result.bitOffset + _headerBitSize + - lookup_result.counts._bitLength + 7) >> 3; - // Align end at 64-bit boundary - endOffset += (-endOffset & 7); - - void *mapPtr = _file->MemoryMapPtr(startOffset); + auto file_range = get_file_range(lookup_result, _headerBitSize); + void *mapPtr = _file->MemoryMapPtr(file_range.start_offset); if (mapPtr != nullptr) { handle._mem = mapPtr; - size_t pad_before = startOffset - vespalib::round_down_to_page_boundary(startOffset); - handle._read_bytes = vespalib::round_up_to_page_size(pad_before + endOffset - startOffset + decode_prefetch_size); + size_t pad_before = file_range.start_offset - vespalib::round_down_to_page_boundary(file_range.start_offset); + handle._read_bytes = vespalib::round_up_to_page_size(pad_before + file_range.size() + decode_prefetch_size); } else { - uint64_t vectorLen = endOffset - startOffset; + uint64_t vectorLen = file_range.size(); size_t padBefore; size_t padAfter; size_t padExtraAfter; // Decode prefetch space - _file->DirectIOPadding(startOffset, vectorLen, padBefore, padAfter); + _file->DirectIOPadding(file_range.start_offset, vectorLen, padBefore, padAfter); padExtraAfter = 0; if (padAfter < decode_prefetch_size) { padExtraAfter = decode_prefetch_size - padAfter; @@ -120,10 +126,10 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) if (mallocLen > 0) { alignedBuffer = _file->AllocateDirectIOBuffer(mallocLen); assert(alignedBuffer != nullptr); - assert(endOffset + padAfter + padExtraAfter <= _fileSize); + assert(file_range.end_offset + padAfter + padExtraAfter <= _fileSize); _file->ReadBuf(alignedBuffer, padBefore + vectorLen + padAfter, - startOffset - padBefore); + file_range.start_offset - padBefore); } // Zero decode prefetch memory to avoid uninitialized reads if (padExtraAfter > 0) { @@ -136,7 +142,7 @@ ZcPosOccRandRead::read_posting_list(const DictionaryLookupResult& lookup_result) handle._allocSize = mallocLen; handle._read_bytes = padBefore + vectorLen + padAfter; } - handle._bitOffsetMem = (startOffset << 3) - _headerBitSize; + handle._bitOffsetMem = (file_range.start_offset << 3) - _headerBitSize; return handle; } @@ -147,14 +153,8 @@ ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult &looku if (lookup_result.counts._bitLength == 0 || _memoryMapped) { return; } - uint64_t start_offset = (lookup_result.bitOffset + _headerBitSize) >> 3; - // Align start at 64-bit boundary - start_offset -= (start_offset & 7); - uint64_t end_offset = (lookup_result.bitOffset + _headerBitSize + - lookup_result.counts._bitLength + 7) >> 3; - // Align end at 64-bit boundary - end_offset += (-end_offset & 7); - size_t malloc_len = end_offset - start_offset + decode_prefetch_size; + auto file_range = get_file_range(lookup_result, _headerBitSize); + size_t malloc_len = file_range.size() + decode_prefetch_size; if (handle._allocSize == malloc_len) { assert(handle._allocMem.get() == handle._mem); return; @@ -169,7 +169,16 @@ ZcPosOccRandRead::consider_trim_posting_list(const DictionaryLookupResult &looku handle._allocMem = std::shared_ptr(mem, free); handle._mem = mem; handle._allocSize = malloc_len; - handle._read_bytes = end_offset - start_offset; + handle._read_bytes = file_range.size(); +} + +PostingListFileRange +ZcPosOccRandRead::get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const +{ + if (lookup_result.counts._bitLength == 0) { + return {0, 0}; + } + return get_file_range(lookup_result, _headerBitSize); } bool diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h index 63da1cf883b..a251fbfabb9 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.h @@ -30,6 +30,7 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead using DictionaryLookupResult = index::DictionaryLookupResult; using PostingListCounts = index::PostingListCounts; + using PostingListFileRange = index::PostingListFileRange; using PostingListHandle = index::PostingListHandle; /** @@ -46,6 +47,7 @@ class ZcPosOccRandRead : public index::PostingListFileRandRead PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override; void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle, double bloat_factor) const override; + PostingListFileRange get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const override; bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override; bool close() override; diff --git a/searchlib/src/vespa/searchlib/index/bitvector_dictionary_lookup_result.h b/searchlib/src/vespa/searchlib/index/bitvector_dictionary_lookup_result.h index a81ca98127e..c16d4ed70d2 100644 --- a/searchlib/src/vespa/searchlib/index/bitvector_dictionary_lookup_result.h +++ b/searchlib/src/vespa/searchlib/index/bitvector_dictionary_lookup_result.h @@ -13,7 +13,7 @@ namespace search::index { class BitVectorDictionaryLookupResult { public: static constexpr uint32_t invalid = std::numeric_limits::max(); - uint64_t idx; + uint32_t idx; explicit BitVectorDictionaryLookupResult(uint32_t idx_in) noexcept : idx(idx_in) diff --git a/searchlib/src/vespa/searchlib/index/posting_list_file_range.h b/searchlib/src/vespa/searchlib/index/posting_list_file_range.h new file mode 100644 index 00000000000..8df373fec80 --- /dev/null +++ b/searchlib/src/vespa/searchlib/index/posting_list_file_range.h @@ -0,0 +1,24 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include + +namespace search::index { + +/* + * Range of a posting list file used for posting list. Might include padding + * at start and end due to file format. Offsets are in bytes. + */ +struct PostingListFileRange { + uint64_t start_offset; + uint64_t end_offset; + + PostingListFileRange(uint64_t start_offset_in, uint64_t end_offset_in) + : start_offset(start_offset_in), + end_offset(end_offset_in) + { + } + uint64_t size() const noexcept { return end_offset - start_offset; } +}; + +} diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp index ce3f3994e2a..75475b21758 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.cpp +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.cpp @@ -117,6 +117,12 @@ PostingListFileRandReadPassThrough::consider_trim_posting_list(const DictionaryL return _lower->consider_trim_posting_list(lookup_result, handle, bloat_factor); } +PostingListFileRange +PostingListFileRandReadPassThrough::get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const +{ + return _lower->get_posting_list_file_range(lookup_result); +} + bool PostingListFileRandReadPassThrough::open(const std::string &name, const TuneFileRandRead &tuneFileRead) diff --git a/searchlib/src/vespa/searchlib/index/postinglistfile.h b/searchlib/src/vespa/searchlib/index/postinglistfile.h index 29aa44de08b..d520f7d07ea 100644 --- a/searchlib/src/vespa/searchlib/index/postinglistfile.h +++ b/searchlib/src/vespa/searchlib/index/postinglistfile.h @@ -2,6 +2,7 @@ #pragma once #include "dictionary_lookup_result.h" +#include "posting_list_file_range.h" #include "postinglistcounts.h" #include "postinglisthandle.h" #include @@ -167,6 +168,8 @@ class PostingListFileRandRead { virtual void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle, double bloat_factor) const = 0; + virtual PostingListFileRange get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const = 0; + /** * Open posting list file for random read. */ @@ -206,6 +209,7 @@ class PostingListFileRandReadPassThrough : public PostingListFileRandRead { PostingListHandle read_posting_list(const DictionaryLookupResult& lookup_result) override; void consider_trim_posting_list(const DictionaryLookupResult &lookup_result, PostingListHandle &handle, double bloat_factor) const override; + PostingListFileRange get_posting_list_file_range(const DictionaryLookupResult& lookup_result) const override; bool open(const std::string &name, const TuneFileRandRead &tuneFileRead) override; bool close() override;