Skip to content

Commit

Permalink
Track disk io for reading disk index posting lists.
Browse files Browse the repository at this point in the history
  • Loading branch information
toregge committed Oct 22, 2024
1 parent bbc6c94 commit 5e4c750
Show file tree
Hide file tree
Showing 10 changed files with 129 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,25 @@ TEST(SearchableStatsTest, stats_can_be_merged)
TEST(SearchableStatsTest, field_stats_can_be_merged)
{
SearchableStats base_stats;
base_stats.add_field_stats("f1", FieldIndexStats().memory_usage({100, 40, 10, 5}).size_on_disk(1000)).
add_field_stats("f2", FieldIndexStats().memory_usage({400, 200, 60, 10}).size_on_disk(1500));
auto f1_stats = FieldIndexStats().memory_usage({100, 40, 10, 5}).size_on_disk(1000).
disk_io_stats(DiskIoStats().read_operations(1).read_bytes(1000));
auto f2_stats1 = FieldIndexStats().memory_usage({400, 200, 60, 10}).size_on_disk(1500).
disk_io_stats(DiskIoStats().read_operations(1).read_bytes(1000));
auto f2_stats2 = FieldIndexStats().memory_usage({300, 100, 40, 5}).size_on_disk(500).
disk_io_stats(DiskIoStats().read_operations(4).read_bytes(6000));
auto f2_stats3 = FieldIndexStats().memory_usage({700, 300, 100, 15}).size_on_disk(2000).
disk_io_stats(DiskIoStats().read_operations(5).read_bytes(7000));
auto f3_stats = FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500).
disk_io_stats(DiskIoStats().read_operations(1).read_bytes(1000));
base_stats.add_field_stats("f1", f1_stats).add_field_stats("f2", f2_stats1);
SearchableStats added_stats;
added_stats.add_field_stats("f2", FieldIndexStats().memory_usage({300, 100, 40, 5}).size_on_disk(500)).
add_field_stats("f3", FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500));
added_stats.add_field_stats("f2", f2_stats2).add_field_stats("f3", f3_stats);
SearchableStats act_stats = base_stats;
act_stats.merge(added_stats);
SearchableStats exp_stats;
exp_stats.add_field_stats("f1", FieldIndexStats().memory_usage({100, 40, 10, 5}).size_on_disk(1000)).
add_field_stats("f2", FieldIndexStats().memory_usage({700, 300, 100, 15}).size_on_disk(2000)).
add_field_stats("f3", FieldIndexStats().memory_usage({110, 50, 20, 12}).size_on_disk(500));
exp_stats.add_field_stats("f1", f1_stats).
add_field_stats("f2", f2_stats3).
add_field_stats("f3", f3_stats);
EXPECT_EQ(exp_stats, act_stats);
}

Expand Down
18 changes: 15 additions & 3 deletions searchlib/src/vespa/searchlib/diskindex/field_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,21 @@ const std::vector<std::string> field_file_names{

}

FieldIndex::LockedDiskIoStats::LockedDiskIoStats() noexcept
: DiskIoStats(),
_mutex()
{
}

FieldIndex::LockedDiskIoStats::~LockedDiskIoStats() = default;

FieldIndex::FieldIndex()
: _posting_file(),
_bit_vector_dict(),
_dict(),
_size_on_disk(0)
_size_on_disk(0),
_disk_io_stats(std::make_shared<LockedDiskIoStats>())
{

}

FieldIndex::FieldIndex(FieldIndex&&) = default;
Expand Down Expand Up @@ -146,6 +154,9 @@ FieldIndex::read_posting_list(const DictionaryLookupResult& lookup_result) const
return {};
}
handle->_file->readPostingList(*handle);
if (handle->_read_bytes != 0) {
_disk_io_stats->add_read_operation(handle->_read_bytes);
}
return handle;
}

Expand All @@ -167,7 +178,8 @@ FieldIndex::get_field_length_info() const
FieldIndexStats
FieldIndex::get_stats() const
{
return FieldIndexStats().size_on_disk(_size_on_disk);
auto disk_io_stats = _disk_io_stats->read_and_clear();
return FieldIndexStats().size_on_disk(_size_on_disk).disk_io_stats(disk_io_stats);
}

}
19 changes: 19 additions & 0 deletions searchlib/src/vespa/searchlib/diskindex/field_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,29 @@ class FieldIndex {
using DiskPostingFileReal = Zc4PosOccRandRead;
using DiskPostingFileDynamicKReal = ZcPosOccRandRead;

class LockedDiskIoStats : public DiskIoStats {
std::mutex _mutex;

public:
LockedDiskIoStats() noexcept;
~LockedDiskIoStats();

void add_read_operation(uint64_t bytes) {
std::lock_guard guard(_mutex);
DiskIoStats::add_read_operation(bytes);
}

DiskIoStats read_and_clear() {
std::lock_guard guard(_mutex);
return DiskIoStats::read_and_clear();
}
};

std::shared_ptr<DiskPostingFile> _posting_file;
std::shared_ptr<BitVectorDictionary> _bit_vector_dict;
std::unique_ptr<index::DictionaryFileRandRead> _dict;
uint64_t _size_on_disk;
std::shared_ptr<LockedDiskIoStats> _disk_io_stats;

public:
FieldIndex();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ ZcPosOccRandRead::readPostingList(PostingListHandle &handle)
handle._mem = static_cast<char *>(alignedBuffer) + padBefore;
handle._allocMem = mallocStart;
handle._allocSize = mallocLen;
handle._read_bytes = padBefore + vectorLen + padAfter;
}
handle._bitOffsetMem = (startOffset << 3) - _headerBitSize;
}
Expand Down
5 changes: 4 additions & 1 deletion searchlib/src/vespa/searchlib/index/postinglisthandle.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class PostingListHandle {
const void *_mem; // Memory backing posting list after read/mmap
void *_allocMem; // What to free after posting list
size_t _allocSize; // Size of allocated memory
uint64_t _read_bytes; // Bytes read from disk (used by disk io stats)

PostingListHandle()
: _file(nullptr),
Expand All @@ -39,7 +40,8 @@ class PostingListHandle {
_bitOffsetMem(0),
_mem(nullptr),
_allocMem(nullptr),
_allocSize(0)
_allocSize(0),
_read_bytes(0)
{ }

~PostingListHandle()
Expand Down Expand Up @@ -68,6 +70,7 @@ class PostingListHandle {
_allocMem = nullptr;
}
_allocSize = 0;
_read_bytes = 0;
}
};

Expand Down
1 change: 1 addition & 0 deletions searchlib/src/vespa/searchlib/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ vespa_add_library(searchlib_util OBJECT
comprbuffer.cpp
comprfile.cpp
data_buffer_writer.cpp
disk_io_stats.cpp
dirtraverse.cpp
drainingbufferwriter.cpp
field_index_stats.cpp
Expand Down
13 changes: 13 additions & 0 deletions searchlib/src/vespa/searchlib/util/disk_io_stats.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#include "disk_io_stats.h"
#include <ostream>

namespace search {

std::ostream& operator<<(std::ostream& os, const DiskIoStats& stats) {
os << "{read_operations: " << stats.read_operations() << ", read_bytes: " << stats.read_bytes() << "}";
return os;
}

}
48 changes: 48 additions & 0 deletions searchlib/src/vespa/searchlib/util/disk_io_stats.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include <cstdint>
#include <iosfwd>

namespace search {

/*
* Class tracking disk io.
*/
class DiskIoStats {
uint64_t _read_operations;
uint64_t _read_bytes;

public:
DiskIoStats() noexcept
: _read_operations(0),
_read_bytes(0)
{}

void add_read_operation(uint64_t bytes) noexcept {
++_read_operations;
_read_bytes += bytes;
}
void merge(const DiskIoStats& rhs) noexcept {
_read_operations += rhs._read_operations;
_read_bytes += rhs._read_bytes;
}
bool operator==(const DiskIoStats& rhs) const noexcept {
return _read_operations == rhs._read_operations &&
_read_bytes == rhs._read_bytes;
}
void clear() noexcept {
_read_operations = 0;
_read_bytes = 0;
}
DiskIoStats read_and_clear() noexcept { auto result = *this; clear(); return result; }

DiskIoStats& read_operations(uint64_t value) { _read_operations = value; return *this; }
DiskIoStats& read_bytes(uint64_t value) { _read_bytes = value; return *this; }
uint64_t read_operations() const noexcept { return _read_operations; }
uint64_t read_bytes() const noexcept { return _read_bytes; }
};

std::ostream& operator<<(std::ostream& os, const DiskIoStats& stats);

}
3 changes: 2 additions & 1 deletion searchlib/src/vespa/searchlib/util/field_index_stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
namespace search {

std::ostream& operator<<(std::ostream& os, const FieldIndexStats& stats) {
os << "{memory: " << stats.memory_usage() << ", disk: " << stats.size_on_disk() << "}";
os << "{memory: " << stats.memory_usage() << ", disk: " << stats.size_on_disk() <<
", diskio: " << stats.disk_io_stats() << "}";
return os;
}

Expand Down
13 changes: 11 additions & 2 deletions searchlib/src/vespa/searchlib/util/field_index_stats.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include "disk_io_stats.h"
#include <vespa/vespalib/util/memoryusage.h>
#include <iosfwd>

namespace search {

Expand All @@ -13,11 +15,13 @@ class FieldIndexStats
private:
vespalib::MemoryUsage _memory_usage;
size_t _size_on_disk; // in bytes
DiskIoStats _disk_io_stats;

public:
FieldIndexStats() noexcept
: _memory_usage(),
_size_on_disk(0)
_size_on_disk(0),
_disk_io_stats()
{}
FieldIndexStats &memory_usage(const vespalib::MemoryUsage &usage) noexcept {
_memory_usage = usage;
Expand All @@ -30,14 +34,19 @@ class FieldIndexStats
}
size_t size_on_disk() const noexcept { return _size_on_disk; }

FieldIndexStats& disk_io_stats(const DiskIoStats& stats) { _disk_io_stats = stats; return *this; }
const DiskIoStats& disk_io_stats() const noexcept { return _disk_io_stats; }

void merge(const FieldIndexStats &rhs) noexcept {
_memory_usage.merge(rhs._memory_usage);
_size_on_disk += rhs._size_on_disk;
_disk_io_stats.merge(rhs._disk_io_stats);
}

bool operator==(const FieldIndexStats& rhs) const noexcept {
return _memory_usage == rhs._memory_usage &&
_size_on_disk == rhs._size_on_disk;
_size_on_disk == rhs._size_on_disk &&
_disk_io_stats == rhs._disk_io_stats;
}
};

Expand Down

0 comments on commit 5e4c750

Please sign in to comment.