From 5e4c7502ef7b3d7f0be1d61272cc224a884b87da Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 22 Oct 2024 16:32:30 +0200 Subject: [PATCH] Track disk io for reading disk index posting lists. --- .../searchable_stats_test.cpp | 22 ++++++--- .../vespa/searchlib/diskindex/field_index.cpp | 18 +++++-- .../vespa/searchlib/diskindex/field_index.h | 19 ++++++++ .../searchlib/diskindex/zcposoccrandread.cpp | 1 + .../vespa/searchlib/index/postinglisthandle.h | 5 +- .../src/vespa/searchlib/util/CMakeLists.txt | 1 + .../vespa/searchlib/util/disk_io_stats.cpp | 13 +++++ .../src/vespa/searchlib/util/disk_io_stats.h | 48 +++++++++++++++++++ .../searchlib/util/field_index_stats.cpp | 3 +- .../vespa/searchlib/util/field_index_stats.h | 13 ++++- 10 files changed, 129 insertions(+), 14 deletions(-) create mode 100644 searchlib/src/vespa/searchlib/util/disk_io_stats.cpp create mode 100644 searchlib/src/vespa/searchlib/util/disk_io_stats.h diff --git a/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp b/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp index 90e56adb32c5..b48a6fc57a10 100644 --- a/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp +++ b/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp @@ -41,17 +41,25 @@ TEST(SearchableStatsTest, stats_can_be_merged) TEST(SearchableStatsTest, field_stats_can_be_merged) { SearchableStats base_stats; - base_stats.add_field_stats("f1", FieldIndexStats().memory_usage({100, 40, 10, 5}).size_on_disk(1000)). - add_field_stats("f2", FieldIndexStats().memory_usage({400, 200, 60, 10}).size_on_disk(1500)); + auto f1_stats = FieldIndexStats().memory_usage({100, 40, 10, 5}).size_on_disk(1000). + disk_io_stats(DiskIoStats().read_operations(1).read_bytes(1000)); + auto f2_stats1 = FieldIndexStats().memory_usage({400, 200, 60, 10}).size_on_disk(1500). + disk_io_stats(DiskIoStats().read_operations(1).read_bytes(1000)); + auto f2_stats2 = FieldIndexStats().memory_usage({300, 100, 40, 5}).size_on_disk(500). + disk_io_stats(DiskIoStats().read_operations(4).read_bytes(6000)); + auto f2_stats3 = FieldIndexStats().memory_usage({700, 300, 100, 15}).size_on_disk(2000). + disk_io_stats(DiskIoStats().read_operations(5).read_bytes(7000)); + auto f3_stats = FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500). + disk_io_stats(DiskIoStats().read_operations(1).read_bytes(1000)); + base_stats.add_field_stats("f1", f1_stats).add_field_stats("f2", f2_stats1); SearchableStats added_stats; - added_stats.add_field_stats("f2", FieldIndexStats().memory_usage({300, 100, 40, 5}).size_on_disk(500)). - add_field_stats("f3", FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500)); + added_stats.add_field_stats("f2", f2_stats2).add_field_stats("f3", f3_stats); SearchableStats act_stats = base_stats; act_stats.merge(added_stats); SearchableStats exp_stats; - exp_stats.add_field_stats("f1", FieldIndexStats().memory_usage({100, 40, 10, 5}).size_on_disk(1000)). - add_field_stats("f2", FieldIndexStats().memory_usage({700, 300, 100, 15}).size_on_disk(2000)). - add_field_stats("f3", FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500)); + exp_stats.add_field_stats("f1", f1_stats). + add_field_stats("f2", f2_stats3). + add_field_stats("f3", f3_stats); EXPECT_EQ(exp_stats, act_stats); } diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp index daa84d384b65..a77a9f7889cd 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.cpp @@ -27,13 +27,21 @@ const std::vector field_file_names{ } +FieldIndex::LockedDiskIoStats::LockedDiskIoStats() noexcept + : DiskIoStats(), + _mutex() +{ +} + +FieldIndex::LockedDiskIoStats::~LockedDiskIoStats() = default; + FieldIndex::FieldIndex() : _posting_file(), _bit_vector_dict(), _dict(), - _size_on_disk(0) + _size_on_disk(0), + _disk_io_stats(std::make_shared()) { - } FieldIndex::FieldIndex(FieldIndex&&) = default; @@ -146,6 +154,9 @@ FieldIndex::read_posting_list(const DictionaryLookupResult& lookup_result) const return {}; } handle->_file->readPostingList(*handle); + if (handle->_read_bytes != 0) { + _disk_io_stats->add_read_operation(handle->_read_bytes); + } return handle; } @@ -167,7 +178,8 @@ FieldIndex::get_field_length_info() const FieldIndexStats FieldIndex::get_stats() const { - return FieldIndexStats().size_on_disk(_size_on_disk); + auto disk_io_stats = _disk_io_stats->read_and_clear(); + return FieldIndexStats().size_on_disk(_size_on_disk).disk_io_stats(disk_io_stats); } } diff --git a/searchlib/src/vespa/searchlib/diskindex/field_index.h b/searchlib/src/vespa/searchlib/diskindex/field_index.h index 1ffb257b4894..a2c83a631210 100644 --- a/searchlib/src/vespa/searchlib/diskindex/field_index.h +++ b/searchlib/src/vespa/searchlib/diskindex/field_index.h @@ -22,10 +22,29 @@ class FieldIndex { using DiskPostingFileReal = Zc4PosOccRandRead; using DiskPostingFileDynamicKReal = ZcPosOccRandRead; + class LockedDiskIoStats : public DiskIoStats { + std::mutex _mutex; + + public: + LockedDiskIoStats() noexcept; + ~LockedDiskIoStats(); + + void add_read_operation(uint64_t bytes) { + std::lock_guard guard(_mutex); + DiskIoStats::add_read_operation(bytes); + } + + DiskIoStats read_and_clear() { + std::lock_guard guard(_mutex); + return DiskIoStats::read_and_clear(); + } + }; + std::shared_ptr _posting_file; std::shared_ptr _bit_vector_dict; std::unique_ptr _dict; uint64_t _size_on_disk; + std::shared_ptr _disk_io_stats; public: FieldIndex(); diff --git a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp index 2f7d4c1de4b7..ad3f80373c63 100644 --- a/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/zcposoccrandread.cpp @@ -136,6 +136,7 @@ ZcPosOccRandRead::readPostingList(PostingListHandle &handle) handle._mem = static_cast(alignedBuffer) + padBefore; handle._allocMem = mallocStart; handle._allocSize = mallocLen; + handle._read_bytes = padBefore + vectorLen + padAfter; } handle._bitOffsetMem = (startOffset << 3) - _headerBitSize; } diff --git a/searchlib/src/vespa/searchlib/index/postinglisthandle.h b/searchlib/src/vespa/searchlib/index/postinglisthandle.h index eca7458369c4..ace1e16f766b 100644 --- a/searchlib/src/vespa/searchlib/index/postinglisthandle.h +++ b/searchlib/src/vespa/searchlib/index/postinglisthandle.h @@ -31,6 +31,7 @@ class PostingListHandle { const void *_mem; // Memory backing posting list after read/mmap void *_allocMem; // What to free after posting list size_t _allocSize; // Size of allocated memory + uint64_t _read_bytes; // Bytes read from disk (used by disk io stats) PostingListHandle() : _file(nullptr), @@ -39,7 +40,8 @@ class PostingListHandle { _bitOffsetMem(0), _mem(nullptr), _allocMem(nullptr), - _allocSize(0) + _allocSize(0), + _read_bytes(0) { } ~PostingListHandle() @@ -68,6 +70,7 @@ class PostingListHandle { _allocMem = nullptr; } _allocSize = 0; + _read_bytes = 0; } }; diff --git a/searchlib/src/vespa/searchlib/util/CMakeLists.txt b/searchlib/src/vespa/searchlib/util/CMakeLists.txt index f2e4b8241d11..b863d7307401 100644 --- a/searchlib/src/vespa/searchlib/util/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/util/CMakeLists.txt @@ -5,6 +5,7 @@ vespa_add_library(searchlib_util OBJECT comprbuffer.cpp comprfile.cpp data_buffer_writer.cpp + disk_io_stats.cpp dirtraverse.cpp drainingbufferwriter.cpp field_index_stats.cpp diff --git a/searchlib/src/vespa/searchlib/util/disk_io_stats.cpp b/searchlib/src/vespa/searchlib/util/disk_io_stats.cpp new file mode 100644 index 000000000000..b4b5693277cb --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/disk_io_stats.cpp @@ -0,0 +1,13 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "disk_io_stats.h" +#include + +namespace search { + +std::ostream& operator<<(std::ostream& os, const DiskIoStats& stats) { + os << "{read_operations: " << stats.read_operations() << ", read_bytes: " << stats.read_bytes() << "}"; + return os; +} + +} diff --git a/searchlib/src/vespa/searchlib/util/disk_io_stats.h b/searchlib/src/vespa/searchlib/util/disk_io_stats.h new file mode 100644 index 000000000000..e7e274515918 --- /dev/null +++ b/searchlib/src/vespa/searchlib/util/disk_io_stats.h @@ -0,0 +1,48 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include +#include + +namespace search { + +/* + * Class tracking disk io. + */ +class DiskIoStats { + uint64_t _read_operations; + uint64_t _read_bytes; + +public: + DiskIoStats() noexcept + : _read_operations(0), + _read_bytes(0) + {} + + void add_read_operation(uint64_t bytes) noexcept { + ++_read_operations; + _read_bytes += bytes; + } + void merge(const DiskIoStats& rhs) noexcept { + _read_operations += rhs._read_operations; + _read_bytes += rhs._read_bytes; + } + bool operator==(const DiskIoStats& rhs) const noexcept { + return _read_operations == rhs._read_operations && + _read_bytes == rhs._read_bytes; + } + void clear() noexcept { + _read_operations = 0; + _read_bytes = 0; + } + DiskIoStats read_and_clear() noexcept { auto result = *this; clear(); return result; } + + DiskIoStats& read_operations(uint64_t value) { _read_operations = value; return *this; } + DiskIoStats& read_bytes(uint64_t value) { _read_bytes = value; return *this; } + uint64_t read_operations() const noexcept { return _read_operations; } + uint64_t read_bytes() const noexcept { return _read_bytes; } +}; + +std::ostream& operator<<(std::ostream& os, const DiskIoStats& stats); + +} diff --git a/searchlib/src/vespa/searchlib/util/field_index_stats.cpp b/searchlib/src/vespa/searchlib/util/field_index_stats.cpp index 5348f19afe69..9cd4fed304f5 100644 --- a/searchlib/src/vespa/searchlib/util/field_index_stats.cpp +++ b/searchlib/src/vespa/searchlib/util/field_index_stats.cpp @@ -6,7 +6,8 @@ namespace search { std::ostream& operator<<(std::ostream& os, const FieldIndexStats& stats) { - os << "{memory: " << stats.memory_usage() << ", disk: " << stats.size_on_disk() << "}"; + os << "{memory: " << stats.memory_usage() << ", disk: " << stats.size_on_disk() << + ", diskio: " << stats.disk_io_stats() << "}"; return os; } diff --git a/searchlib/src/vespa/searchlib/util/field_index_stats.h b/searchlib/src/vespa/searchlib/util/field_index_stats.h index 01a3340affe0..d0845a29d987 100644 --- a/searchlib/src/vespa/searchlib/util/field_index_stats.h +++ b/searchlib/src/vespa/searchlib/util/field_index_stats.h @@ -1,7 +1,9 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include "disk_io_stats.h" #include +#include namespace search { @@ -13,11 +15,13 @@ class FieldIndexStats private: vespalib::MemoryUsage _memory_usage; size_t _size_on_disk; // in bytes + DiskIoStats _disk_io_stats; public: FieldIndexStats() noexcept : _memory_usage(), - _size_on_disk(0) + _size_on_disk(0), + _disk_io_stats() {} FieldIndexStats &memory_usage(const vespalib::MemoryUsage &usage) noexcept { _memory_usage = usage; @@ -30,14 +34,19 @@ class FieldIndexStats } size_t size_on_disk() const noexcept { return _size_on_disk; } + FieldIndexStats& disk_io_stats(const DiskIoStats& stats) { _disk_io_stats = stats; return *this; } + const DiskIoStats& disk_io_stats() const noexcept { return _disk_io_stats; } + void merge(const FieldIndexStats &rhs) noexcept { _memory_usage.merge(rhs._memory_usage); _size_on_disk += rhs._size_on_disk; + _disk_io_stats.merge(rhs._disk_io_stats); } bool operator==(const FieldIndexStats& rhs) const noexcept { return _memory_usage == rhs._memory_usage && - _size_on_disk == rhs._size_on_disk; + _size_on_disk == rhs._size_on_disk && + _disk_io_stats == rhs._disk_io_stats; } };