diff --git a/cmake/copyright.cmake b/cmake/copyright.cmake index 1a1ca9d573..6b0d9c8586 100644 --- a/cmake/copyright.cmake +++ b/cmake/copyright.cmake @@ -61,7 +61,7 @@ list(FILTER SRC EXCLUDE REGEX [[silkworm/core/common/lru_cache(_test)?\..pp$]]) list(FILTER SRC EXCLUDE REGEX [[silkworm/core/crypto/kzg\.cpp$]]) list(FILTER SRC EXCLUDE REGEX [[silkworm/infra/concurrency/thread_pool\.hpp$]]) list(FILTER SRC EXCLUDE REGEX [[silkworm/interfaces/]]) -list(FILTER SRC EXCLUDE REGEX [[silkworm/db/datastore/snapshots/config/[a-z_]+\.hpp$]]) +list(FILTER SRC EXCLUDE REGEX [[silkworm/db/datastore/snapshots/config/chains/[a-z_]+\.hpp$]]) list(FILTER SRC EXCLUDE REGEX [[silkworm/rpc/json_rpc/specification\.cpp$]]) list(FILTER SRC EXCLUDE REGEX [[silkworm/sync/internals/preverified_hashes/preverified_hashes_[a-z]+\.cpp$]]) diff --git a/cmd/capi/execute.cpp b/cmd/capi/execute.cpp index 2f1778d9bc..426e72e400 100644 --- a/cmd/capi/execute.cpp +++ b/cmd/capi/execute.cpp @@ -56,7 +56,7 @@ struct ExecuteBlocksSettings { }; struct BuildIndexesSettings { - std::vector snapshot_names; + std::vector segment_file_names; }; struct Settings { @@ -113,7 +113,7 @@ void parse_command_line(int argc, char* argv[], CLI::App& app, Settings& setting auto cmd_build_indexes = app.add_subcommand("build_indexes", "Build indexes"); BuildIndexesSettings build_indexes_settings; - cmd_build_indexes->add_option("--snapshot_names", build_indexes_settings.snapshot_names, "Snapshot to index")->delimiter(',')->required(); + cmd_build_indexes->add_option("--filenames", build_indexes_settings.segment_file_names, "Segment file names to index")->delimiter(',')->required(); // rpcdaemon sub-command auto cmd_rpcdaemon = app.add_subcommand("rpcdaemon", "Start RPC Daemon"); @@ -146,7 +146,7 @@ const char* make_path(const snapshots::SnapshotPath& p) { return path; } -std::vector collect_all_snapshots(SnapshotRepository& snapshot_repository) { +std::vector collect_all_snapshots(const SnapshotRepository& snapshot_repository) { std::vector headers_snapshot_sequence; std::vector bodies_snapshot_sequence; std::vector transactions_snapshot_sequence; @@ -157,9 +157,9 @@ std::vector collect_all_snapshots(SnapshotRepository& sna { SilkwormHeadersSnapshot raw_headers_snapshot{ .segment{ - .file_path = make_path(bundle.header_snapshot.path()), - .memory_address = bundle.header_snapshot.memory_file_region().data(), - .memory_length = bundle.header_snapshot.memory_file_region().size(), + .file_path = make_path(bundle.header_segment.path()), + .memory_address = bundle.header_segment.memory_file_region().data(), + .memory_length = bundle.header_segment.memory_file_region().size(), }, .header_hash_index{ .file_path = make_path(bundle.idx_header_hash.path()), @@ -172,9 +172,9 @@ std::vector collect_all_snapshots(SnapshotRepository& sna { SilkwormBodiesSnapshot raw_bodies_snapshot{ .segment{ - .file_path = make_path(bundle.body_snapshot.path()), - .memory_address = bundle.body_snapshot.memory_file_region().data(), - .memory_length = bundle.body_snapshot.memory_file_region().size(), + .file_path = make_path(bundle.body_segment.path()), + .memory_address = bundle.body_segment.memory_file_region().data(), + .memory_length = bundle.body_segment.memory_file_region().size(), }, .block_num_index{ .file_path = make_path(bundle.idx_body_number.path()), @@ -187,9 +187,9 @@ std::vector collect_all_snapshots(SnapshotRepository& sna { SilkwormTransactionsSnapshot raw_transactions_snapshot{ .segment{ - .file_path = make_path(bundle.txn_snapshot.path()), - .memory_address = bundle.txn_snapshot.memory_file_region().data(), - .memory_length = bundle.txn_snapshot.memory_file_region().size(), + .file_path = make_path(bundle.txn_segment.path()), + .memory_address = bundle.txn_segment.memory_file_region().data(), + .memory_length = bundle.txn_segment.memory_file_region().size(), }, .tx_hash_index{ .file_path = make_path(bundle.idx_txn_hash.path()), @@ -321,32 +321,32 @@ int execute_blocks(SilkwormHandle handle, ExecuteBlocksSettings settings, Snapsh } int build_indexes(SilkwormHandle handle, const BuildIndexesSettings& settings, const DataDirectory& data_dir) { - SILK_INFO << "Building indexes for snapshots: " << settings.snapshot_names; + SILK_INFO << "Building indexes for segments: " << settings.segment_file_names; - std::vector snapshots; - std::vector snapshot_files; + std::vector segments; + std::vector segment_mmap_files; // Parse snapshot paths and create memory mapped files - for (auto& snapshot_name : settings.snapshot_names) { - auto raw_snapshot_path = data_dir.snapshots().path() / snapshot_name; + for (auto& file_name : settings.segment_file_names) { + auto raw_snapshot_path = data_dir.snapshots().path() / file_name; auto snapshot_path = SnapshotPath::parse(raw_snapshot_path); if (!snapshot_path.has_value()) throw std::runtime_error("Invalid snapshot path"); - Snapshot& snapshot = snapshots.emplace_back(*snapshot_path); - snapshot.reopen_segment(); + SegmentFileReader& segment = segments.emplace_back(*snapshot_path); + segment.reopen_segment(); auto mmf = new SilkwormMemoryMappedFile{ .file_path = make_path(*snapshot_path), - .memory_address = snapshot.memory_file_region().data(), - .memory_length = snapshot.memory_file_region().size(), + .memory_address = segment.memory_file_region().data(), + .memory_length = segment.memory_file_region().size(), }; - snapshot_files.push_back(mmf); + segment_mmap_files.push_back(mmf); } // Call api to build indexes const auto start_time{std::chrono::high_resolution_clock::now()}; - const int status_code = silkworm_build_recsplit_indexes(handle, snapshot_files.data(), snapshot_files.size()); + const int status_code = silkworm_build_recsplit_indexes(handle, segment_mmap_files.data(), segment_mmap_files.size()); if (status_code != SILKWORM_OK) return status_code; auto elapsed = std::chrono::high_resolution_clock::now() - start_time; @@ -354,9 +354,9 @@ int build_indexes(SilkwormHandle handle, const BuildIndexesSettings& settings, c << std::chrono::duration_cast(elapsed).count() << "ms"; // Free memory mapped files - for (auto mmf : snapshot_files) { - delete[] mmf->file_path; - delete mmf; + for (auto mmap_file : segment_mmap_files) { + delete[] mmap_file->file_path; + delete mmap_file; } return SILKWORM_OK; diff --git a/cmd/dev/snapshots.cpp b/cmd/dev/snapshots.cpp index 178969837b..2ae1883e8a 100644 --- a/cmd/dev/snapshots.cpp +++ b/cmd/dev/snapshots.cpp @@ -41,12 +41,12 @@ #include #include #include -#include -#include +#include +#include #include #include // TODO(canepat) refactor to extract Hash128 to murmur_hash3.hpp #include -#include +#include #include #include #include @@ -74,7 +74,7 @@ constexpr int kDefaultRepetitions{1}; struct SnapshotSubcommandSettings { SnapshotSettings settings; std::filesystem::path input_file_path; - std::optional snapshot_file_name; + std::optional segment_file_name; int page_size{kDefaultPageSize}; bool skip_system_txs{false}; std::optional lookup_hash; @@ -221,7 +221,7 @@ void parse_command_line(int argc, char* argv[], CLI::App& app, SnapshotToolboxSe for (auto& cmd : {commands[SnapshotTool::create_index], commands[SnapshotTool::open_index], commands[SnapshotTool::decode_segment]}) { - cmd->add_option("--snapshot_file", snapshot_settings.snapshot_file_name, "Path to snapshot file") + cmd->add_option("--snapshot_file", snapshot_settings.segment_file_name, "Path to snapshot file") ->required() ->capture_default_str(); } @@ -230,7 +230,7 @@ void parse_command_line(int argc, char* argv[], CLI::App& app, SnapshotToolboxSe commands[SnapshotTool::lookup_body], commands[SnapshotTool::lookup_header], commands[SnapshotTool::lookup_txn]}) { - cmd->add_option("--snapshot_file", snapshot_settings.snapshot_file_name, "Path to snapshot file") + cmd->add_option("--snapshot_file", snapshot_settings.segment_file_name, "Path to snapshot file") ->capture_default_str(); } @@ -281,14 +281,14 @@ auto as_seconds(const std::chrono::duration& elapsed) { } void decode_segment(const SnapshotSubcommandSettings& settings, int repetitions) { - ensure(settings.snapshot_file_name.has_value(), "decode_segment: --snapshot_file must be specified"); - const auto snap_file{SnapshotPath::parse(std::filesystem::path{*settings.snapshot_file_name})}; - ensure(snap_file.has_value(), "decode_segment: invalid snapshot_file path format"); + ensure(settings.segment_file_name.has_value(), "decode_segment: --snapshot_file must be specified"); + const auto snapshot_path = SnapshotPath::parse(std::filesystem::path{*settings.segment_file_name}); + ensure(snapshot_path.has_value(), "decode_segment: invalid snapshot_file path format"); - SILK_INFO << "Decode snapshot: " << snap_file->path(); + SILK_INFO << "Decode snapshot: " << snapshot_path->path(); std::chrono::time_point start{std::chrono::steady_clock::now()}; for (int i = 0; i < repetitions; ++i) { - Snapshot snapshot{*snap_file}; + SegmentFileReader snapshot{*snapshot_path}; snapshot.reopen_segment(); } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; @@ -301,7 +301,7 @@ static std::unique_ptr bundle_factory() { using BodyCounters = std::pair; -BodyCounters count_bodies_in_one(const SnapshotSubcommandSettings& settings, const Snapshot& body_snapshot) { +BodyCounters count_bodies_in_one(const SnapshotSubcommandSettings& settings, const SegmentFileReader& body_segment) { int num_bodies = 0; uint64_t num_txns = 0; const int kFirstItems = 3; @@ -309,7 +309,7 @@ BodyCounters count_bodies_in_one(const SnapshotSubcommandSettings& settings, con if (settings.verbose) { SILK_INFO << "Printing first " << kFirstItems << " bodies, then every " << kStepItems; } - for (const BlockBodyForStorage& b : BodySnapshotReader{body_snapshot}) { + for (const BlockBodyForStorage& b : BodySegmentReader{body_segment}) { // If *system transactions* should not be counted, skip first and last tx in block body const auto base_txn_id{settings.skip_system_txs ? b.base_txn_id + 1 : b.base_txn_id}; const auto txn_count{settings.skip_system_txs && b.txn_count >= 2 ? b.txn_count - 2 : b.txn_count}; @@ -324,13 +324,13 @@ BodyCounters count_bodies_in_one(const SnapshotSubcommandSettings& settings, con } BodyCounters count_bodies_in_all(const SnapshotSubcommandSettings& settings) { - SnapshotRepository snapshot_repo{settings.settings, bundle_factory()}; - snapshot_repo.reopen_folder(); + SnapshotRepository snapshot_repository{settings.settings, bundle_factory()}; + snapshot_repository.reopen_folder(); int num_bodies = 0; uint64_t num_txns = 0; - for (const auto& bundle_ptr : snapshot_repo.view_bundles()) { + for (const auto& bundle_ptr : snapshot_repository.view_bundles()) { const auto& bundle = *bundle_ptr; - const auto [body_count, txn_count] = count_bodies_in_one(settings, bundle.body_snapshot); + const auto [body_count, txn_count] = count_bodies_in_one(settings, bundle.body_segment); num_bodies += body_count; num_txns += txn_count; } @@ -342,13 +342,13 @@ void count_bodies(const SnapshotSubcommandSettings& settings, int repetitions) { int num_bodies = 0; uint64_t num_txns = 0; for (int i = 0; i < repetitions; ++i) { - if (settings.snapshot_file_name) { - const auto snapshot_path{SnapshotPath::parse(std::filesystem::path{*settings.snapshot_file_name})}; + if (settings.segment_file_name) { + const auto snapshot_path{SnapshotPath::parse(std::filesystem::path{*settings.segment_file_name})}; ensure(snapshot_path.has_value(), "count_bodies: invalid snapshot_file path format"); ensure(snapshot_path->type() == SnapshotType::bodies, "count_bodies: snapshot_file must point to body segment"); - Snapshot body_snapshot{*snapshot_path}; - body_snapshot.reopen_segment(); - std::tie(num_bodies, num_txns) = count_bodies_in_one(settings, body_snapshot); + SegmentFileReader body_segment{*snapshot_path}; + body_segment.reopen_segment(); + std::tie(num_bodies, num_txns) = count_bodies_in_one(settings, body_segment); } else { std::tie(num_bodies, num_txns) = count_bodies_in_all(settings); } @@ -357,14 +357,14 @@ void count_bodies(const SnapshotSubcommandSettings& settings, int repetitions) { SILK_INFO << "How many bodies: " << num_bodies << " txs: " << num_txns << " duration: " << as_milliseconds(elapsed) << " msec"; } -int count_headers_in_one(const SnapshotSubcommandSettings& settings, const Snapshot& header_snapshot) { +int count_headers_in_one(const SnapshotSubcommandSettings& settings, const SegmentFileReader& header_segment) { int num_headers = 0; const int kFirstItems = 3; const int kStepItems = 50'000; if (settings.verbose) { SILK_INFO << "Printing first " << kFirstItems << " headers, then every " << kStepItems; } - for (const BlockHeader& h : HeaderSnapshotReader{header_snapshot}) { + for (const BlockHeader& h : HeaderSegmentReader{header_segment}) { ++num_headers; if (settings.verbose && (num_headers < kFirstItems || num_headers % kStepItems == 0)) { SILK_INFO << "Header number: " << h.number << " hash: " << to_hex(h.hash()); @@ -374,12 +374,12 @@ int count_headers_in_one(const SnapshotSubcommandSettings& settings, const Snaps } int count_headers_in_all(const SnapshotSubcommandSettings& settings) { - SnapshotRepository snapshot_repo{settings.settings, bundle_factory()}; - snapshot_repo.reopen_folder(); + SnapshotRepository snapshot_repository{settings.settings, bundle_factory()}; + snapshot_repository.reopen_folder(); int num_headers{0}; - for (const auto& bundle_ptr : snapshot_repo.view_bundles()) { + for (const auto& bundle_ptr : snapshot_repository.view_bundles()) { const auto& bundle = *bundle_ptr; - const auto header_count = count_headers_in_one(settings, bundle.header_snapshot); + const auto header_count = count_headers_in_one(settings, bundle.header_segment); num_headers += header_count; } return num_headers; @@ -389,13 +389,13 @@ void count_headers(const SnapshotSubcommandSettings& settings, int repetitions) std::chrono::time_point start{std::chrono::steady_clock::now()}; int num_headers{0}; for (int i{0}; i < repetitions; ++i) { - if (settings.snapshot_file_name) { - const auto snapshot_path{SnapshotPath::parse(std::filesystem::path{*settings.snapshot_file_name})}; + if (settings.segment_file_name) { + const auto snapshot_path{SnapshotPath::parse(std::filesystem::path{*settings.segment_file_name})}; ensure(snapshot_path.has_value(), "count_headers: invalid snapshot_file path format"); ensure(snapshot_path->type() == SnapshotType::headers, "count_headers: snapshot_file must point to header segment"); - Snapshot header_snapshot{*snapshot_path}; - header_snapshot.reopen_segment(); - num_headers = count_headers_in_one(settings, header_snapshot); + SegmentFileReader header_segment{*snapshot_path}; + header_segment.reopen_segment(); + num_headers = count_headers_in_one(settings, header_segment); } else { num_headers = count_headers_in_all(settings); } @@ -406,29 +406,29 @@ void count_headers(const SnapshotSubcommandSettings& settings, int repetitions) } void create_index(const SnapshotSubcommandSettings& settings, int repetitions) { - ensure(settings.snapshot_file_name.has_value(), "create_index: --snapshot_file must be specified"); - SILK_INFO << "Create index for snapshot: " << *settings.snapshot_file_name; + ensure(settings.segment_file_name.has_value(), "create_index: --snapshot_file must be specified"); + SILK_INFO << "Create index for snapshot: " << *settings.segment_file_name; std::chrono::time_point start{std::chrono::steady_clock::now()}; - const auto snap_file{SnapshotPath::parse(std::filesystem::path{*settings.snapshot_file_name})}; - if (snap_file) { + const auto snapshot_path = SnapshotPath::parse(std::filesystem::path{*settings.segment_file_name}); + if (snapshot_path) { for (int i{0}; i < repetitions; ++i) { - switch (snap_file->type()) { + switch (snapshot_path->type()) { case SnapshotType::headers: { - auto index = HeaderIndex::make(*snap_file); + auto index = HeaderIndex::make(*snapshot_path); index.build(); break; } case SnapshotType::bodies: { - auto index = BodyIndex::make(*snap_file); + auto index = BodyIndex::make(*snapshot_path); index.build(); break; } case SnapshotType::transactions: { - auto bodies_segment_path = snap_file->related_path(SnapshotType::bodies, kSegmentExtension); - auto index = TransactionIndex::make(bodies_segment_path, *snap_file); + auto bodies_segment_path = snapshot_path->related_path(SnapshotType::bodies, kSegmentExtension); + auto index = TransactionIndex::make(bodies_segment_path, *snapshot_path); index.build(); - auto index_hash_to_block = TransactionToBlockIndex::make(bodies_segment_path, *snap_file); + auto index_hash_to_block = TransactionToBlockIndex::make(bodies_segment_path, *snapshot_path); index_hash_to_block.build(); break; } @@ -438,15 +438,15 @@ void create_index(const SnapshotSubcommandSettings& settings, int repetitions) { } } } else { - SILK_ERROR << "Invalid snapshot file: " << *settings.snapshot_file_name; + SILK_ERROR << "Invalid snapshot file: " << *settings.segment_file_name; } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; SILK_INFO << "Create index elapsed: " << as_milliseconds(elapsed) << " msec"; } void open_index(const SnapshotSubcommandSettings& settings) { - ensure(settings.snapshot_file_name.has_value(), "open_index: --snapshot_file must be specified"); - std::filesystem::path segment_file_path{settings.repository_dir() / *settings.snapshot_file_name}; + ensure(settings.segment_file_name.has_value(), "open_index: --snapshot_file must be specified"); + std::filesystem::path segment_file_path{settings.repository_dir() / *settings.segment_file_name}; SILK_INFO << "Open index for snapshot: " << segment_file_path; const auto snapshot_path{snapshots::SnapshotPath::parse(segment_file_path)}; ensure(snapshot_path.has_value(), [&]() { return "open_index: invalid snapshot file " + segment_file_path.filename().string(); }); @@ -490,7 +490,7 @@ void open_btree_index(const SnapshotSubcommandSettings& settings) { std::chrono::time_point start{std::chrono::steady_clock::now()}; seg::Decompressor kv_decompressor{settings.input_file_path}; kv_decompressor.open(); - snapshots::index::BTreeIndex bt_index{kv_decompressor, bt_index_file_path}; + btree::BTreeIndex bt_index{kv_decompressor, bt_index_file_path}; SILK_INFO << "Starting KV scan and BTreeIndex check, total keys: " << bt_index.key_count(); size_t matching_count{0}, key_count{0}; bool is_key{true}; @@ -543,7 +543,7 @@ void open_existence_index(const SnapshotSubcommandSettings& settings) { std::chrono::time_point start{std::chrono::steady_clock::now()}; seg::Decompressor kv_decompressor{settings.input_file_path}; kv_decompressor.open(); - snapshots::index::ExistenceIndex existence_index{existence_index_file_path}; + bloom_filter::BloomFilter existence_index{existence_index_file_path}; SILK_INFO << "Starting KV scan and existence index check"; size_t key_count{0}, found_count{0}, nonexistent_count{0}, nonexistent_found_count{0}; @@ -606,7 +606,7 @@ void open_existence_index(const SnapshotSubcommandSettings& settings) { static TorrentInfoPtrList download_web_seed(const DownloadSettings& settings) { const auto known_config{snapshots::Config::lookup_known_config(settings.chain_id)}; - WebSeedClient web_client{/*url_seeds=*/{settings.url_seed}, known_config.preverified_snapshots()}; + WebSeedClient web_client{/*url_seeds=*/{settings.url_seed}, known_config.preverified_snapshots_as_pairs()}; boost::asio::io_context scheduler; ShutdownSignal shutdown_signal{scheduler.get_executor()}; @@ -687,8 +687,8 @@ void download(const DownloadSettings& settings) { SILK_INFO << "Download elapsed: " << as_seconds(elapsed) << " sec"; } -static void print_header(const BlockHeader& header, const std::string& snapshot_filename) { - std::cout << "Header found in: " << snapshot_filename << "\n" +static void print_header(const BlockHeader& header, const std::string& filename) { + std::cout << "Header found in: " << filename << "\n" << "hash=" << to_hex(header.hash()) << "\n" << "parent_hash=" << to_hex(header.parent_hash) << "\n" << "number=" << header.number << "\n" @@ -719,24 +719,24 @@ void lookup_header_by_hash(const SnapshotSubcommandSettings& settings) { SILK_INFO << "Lookup header hash: " << hash->to_hex(); std::chrono::time_point start{std::chrono::steady_clock::now()}; - std::optional matching_snapshot; + std::optional matching_snapshot_path; std::optional matching_header; SnapshotRepository snapshot_repository{settings.settings, bundle_factory()}; snapshot_repository.reopen_folder(); for (const auto& bundle_ptr : snapshot_repository.view_bundles_reverse()) { const auto& bundle = *bundle_ptr; - auto snapshot_and_index = bundle.snapshot_and_index(SnapshotType::headers); - const auto header = HeaderFindByHashQuery{snapshot_and_index}.exec(*hash); + auto segment_and_index = bundle.segment_and_index(SnapshotType::headers); + const auto header = HeaderFindByHashQuery{segment_and_index}.exec(*hash); if (header) { matching_header = header; - matching_snapshot = snapshot_and_index.snapshot.path(); + matching_snapshot_path = segment_and_index.segment.path(); break; } } - if (matching_snapshot) { - SILK_INFO << "Lookup header hash: " << hash->to_hex() << " found in: " << matching_snapshot->filename(); + if (matching_snapshot_path) { + SILK_INFO << "Lookup header hash: " << hash->to_hex() << " found in: " << matching_snapshot_path->filename(); if (matching_header && settings.verbose) { - print_header(*matching_header, matching_snapshot->filename()); + print_header(*matching_header, matching_snapshot_path->filename()); } } else { SILK_WARN << "Lookup header hash: " << hash->to_hex() << " NOT found"; @@ -753,14 +753,14 @@ void lookup_header_by_number(const SnapshotSubcommandSettings& settings) { SnapshotRepository snapshot_repository{settings.settings, bundle_factory()}; snapshot_repository.reopen_folder(); - const auto [snapshot_and_index, _] = snapshot_repository.find_segment(SnapshotType::headers, block_number); - if (snapshot_and_index) { - const auto header = HeaderFindByBlockNumQuery{*snapshot_and_index}.exec(block_number); + const auto [segment_and_index, _] = snapshot_repository.find_segment(SnapshotType::headers, block_number); + if (segment_and_index) { + const auto header = HeaderFindByBlockNumQuery{*segment_and_index}.exec(block_number); ensure(header.has_value(), - [&]() { return "lookup_header_by_number: " + std::to_string(block_number) + " NOT found in " + snapshot_and_index->snapshot.path().filename(); }); - SILK_INFO << "Lookup header number: " << block_number << " found in: " << snapshot_and_index->snapshot.path().filename(); + [&]() { return "lookup_header_by_number: " + std::to_string(block_number) + " NOT found in " + segment_and_index->segment.path().filename(); }); + SILK_INFO << "Lookup header number: " << block_number << " found in: " << segment_and_index->segment.path().filename(); if (settings.verbose) { - print_header(*header, snapshot_and_index->snapshot.path().filename()); + print_header(*header, segment_and_index->segment.path().filename()); } } else { SILK_WARN << "Lookup header number: " << block_number << " NOT found"; @@ -779,8 +779,8 @@ void lookup_header(const SnapshotSubcommandSettings& settings) { } } -static void print_body(const BlockBodyForStorage& body, const std::string& snapshot_filename) { - std::cout << "Body found in: " << snapshot_filename << "\n" +static void print_body(const BlockBodyForStorage& body, const std::string& filename) { + std::cout << "Body found in: " << filename << "\n" << "base_txn_id=" << body.base_txn_id << "\n" << "txn_count=" << body.txn_count << "\n" << "rlp=" << to_hex(body.encode()) << "\n"; @@ -791,20 +791,20 @@ void lookup_body_in_one(const SnapshotSubcommandSettings& settings, BlockNum blo ensure(snapshot_path.has_value(), "lookup_body: --snapshot_file is invalid snapshot file"); std::chrono::time_point start{std::chrono::steady_clock::now()}; - Snapshot body_snapshot{*snapshot_path}; - body_snapshot.reopen_segment(); + SegmentFileReader body_segment{*snapshot_path}; + body_segment.reopen_segment(); Index idx_body_number{snapshot_path->index_file()}; idx_body_number.reopen_index(); - const auto body = BodyFindByBlockNumQuery{{body_snapshot, idx_body_number}}.exec(block_number); + const auto body = BodyFindByBlockNumQuery{{body_segment, idx_body_number}}.exec(block_number); if (body) { - SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_snapshot.path().filename(); + SILK_INFO << "Lookup body number: " << block_number << " found in: " << body_segment.path().filename(); if (settings.verbose) { - print_body(*body, body_snapshot.path().filename()); + print_body(*body, body_segment.path().filename()); } } else { - SILK_WARN << "Lookup body number: " << block_number << " NOT found in: " << body_snapshot.path().filename(); + SILK_WARN << "Lookup body number: " << block_number << " NOT found in: " << body_segment.path().filename(); } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; SILK_INFO << "Lookup body elapsed: " << duration_as(elapsed) << " usec"; @@ -815,14 +815,14 @@ void lookup_body_in_all(const SnapshotSubcommandSettings& settings, BlockNum blo snapshot_repository.reopen_folder(); std::chrono::time_point start{std::chrono::steady_clock::now()}; - const auto [snapshot_and_index, _] = snapshot_repository.find_segment(SnapshotType::bodies, block_number); - if (snapshot_and_index) { - const auto body = BodyFindByBlockNumQuery{*snapshot_and_index}.exec(block_number); + const auto [segment_and_index, _] = snapshot_repository.find_segment(SnapshotType::bodies, block_number); + if (segment_and_index) { + const auto body = BodyFindByBlockNumQuery{*segment_and_index}.exec(block_number); ensure(body.has_value(), - [&]() { return "lookup_body: " + std::to_string(block_number) + " NOT found in " + snapshot_and_index->snapshot.path().filename(); }); - SILK_INFO << "Lookup body number: " << block_number << " found in: " << snapshot_and_index->snapshot.path().filename(); + [&]() { return "lookup_body: " + std::to_string(block_number) + " NOT found in " + segment_and_index->segment.path().filename(); }); + SILK_INFO << "Lookup body number: " << block_number << " found in: " << segment_and_index->segment.path().filename(); if (settings.verbose) { - print_body(*body, snapshot_and_index->snapshot.path().filename()); + print_body(*body, segment_and_index->segment.path().filename()); } } else { SILK_WARN << "Lookup body number: " << block_number << " NOT found"; @@ -837,15 +837,15 @@ void lookup_body(const SnapshotSubcommandSettings& settings) { const auto block_number{*settings.lookup_number}; SILK_INFO << "Lookup body number: " << block_number; - if (settings.snapshot_file_name) { - lookup_body_in_one(settings, block_number, *settings.snapshot_file_name); + if (settings.segment_file_name) { + lookup_body_in_one(settings, block_number, *settings.segment_file_name); } else { lookup_body_in_all(settings, block_number); } } -static void print_txn(const Transaction& txn, const std::string& snapshot_filename) { - std::cout << "Transaction found in: " << snapshot_filename << "\n" +static void print_txn(const Transaction& txn, const std::string& filename) { + std::cout << "Transaction found in: " << filename << "\n" << "hash=" << to_hex(txn.hash()) << "\n" << "type=" << magic_enum::enum_name(txn.type) << "\n" << "from=" << (txn.sender() ? address_to_hex(*txn.sender()) : "") << "\n" @@ -896,21 +896,21 @@ void lookup_txn_by_hash_in_one(const SnapshotSubcommandSettings& settings, const ensure(snapshot_path.has_value(), "lookup_tx_by_hash_in_one: --snapshot_file is invalid snapshot file"); std::chrono::time_point start{std::chrono::steady_clock::now()}; - Snapshot tx_snapshot{*snapshot_path}; - tx_snapshot.reopen_segment(); + SegmentFileReader txn_segment{*snapshot_path}; + txn_segment.reopen_segment(); { Index idx_txn_hash{snapshot_path->index_file()}; idx_txn_hash.reopen_index(); - const auto transaction = TransactionFindByHashQuery{{tx_snapshot, idx_txn_hash}}.exec(hash); + const auto transaction = TransactionFindByHashQuery{{txn_segment, idx_txn_hash}}.exec(hash); if (transaction) { - SILK_INFO << "Lookup txn hash: " << hash.to_hex() << " found in: " << tx_snapshot.path().filename(); + SILK_INFO << "Lookup txn hash: " << hash.to_hex() << " found in: " << txn_segment.path().filename(); if (settings.verbose) { - print_txn(*transaction, tx_snapshot.path().filename()); + print_txn(*transaction, txn_segment.path().filename()); } } else { - SILK_WARN << "Lookup txn hash: " << hash.to_hex() << " NOT found in: " << tx_snapshot.path().filename(); + SILK_WARN << "Lookup txn hash: " << hash.to_hex() << " NOT found in: " << txn_segment.path().filename(); } } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; @@ -921,24 +921,24 @@ void lookup_txn_by_hash_in_all(const SnapshotSubcommandSettings& settings, const SnapshotRepository snapshot_repository{settings.settings, bundle_factory()}; snapshot_repository.reopen_folder(); - std::optional matching_snapshot; + std::optional matching_snapshot_path; std::chrono::time_point start{std::chrono::steady_clock::now()}; for (const auto& bundle_ptr : snapshot_repository.view_bundles_reverse()) { const auto& bundle = *bundle_ptr; - auto snapshot_and_index = bundle.snapshot_and_index(SnapshotType::transactions); - const auto transaction = TransactionFindByHashQuery{snapshot_and_index}.exec(hash); + auto segment_and_index = bundle.segment_and_index(SnapshotType::transactions); + const auto transaction = TransactionFindByHashQuery{segment_and_index}.exec(hash); if (transaction) { - matching_snapshot = snapshot_and_index.snapshot.path(); + matching_snapshot_path = segment_and_index.segment.path(); if (settings.verbose) { - print_txn(*transaction, matching_snapshot->path().filename()); + print_txn(*transaction, matching_snapshot_path->path().filename()); } break; } } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; SILK_INFO << "Lookup txn elapsed: " << duration_as(elapsed) << " usec"; - if (matching_snapshot) { - SILK_INFO << "Lookup txn hash: " << hash.to_hex() << " found in: " << matching_snapshot->path().filename(); + if (matching_snapshot_path) { + SILK_INFO << "Lookup txn hash: " << hash.to_hex() << " found in: " << matching_snapshot_path->path().filename(); } else { SILK_WARN << "Lookup txn hash: " << hash.to_hex() << " NOT found"; } @@ -949,8 +949,8 @@ void lookup_txn_by_hash(const SnapshotSubcommandSettings& settings, const std::s ensure(hash.has_value(), "lookup_txn_by_hash: lookup_hash is not a valid hash"); SILK_INFO << "Lookup txn hash: " << hash->to_hex(); - if (settings.snapshot_file_name) { - lookup_txn_by_hash_in_one(settings, *hash, *settings.snapshot_file_name); + if (settings.segment_file_name) { + lookup_txn_by_hash_in_one(settings, *hash, *settings.segment_file_name); } else { lookup_txn_by_hash_in_all(settings, *hash); } @@ -961,21 +961,21 @@ void lookup_txn_by_id_in_one(const SnapshotSubcommandSettings& settings, uint64_ ensure(snapshot_path.has_value(), "lookup_txn_by_id_in_one: --snapshot_file is invalid snapshot file"); std::chrono::time_point start{std::chrono::steady_clock::now()}; - Snapshot tx_snapshot{*snapshot_path}; - tx_snapshot.reopen_segment(); + SegmentFileReader txn_segment{*snapshot_path}; + txn_segment.reopen_segment(); { Index idx_txn_hash{snapshot_path->index_file()}; idx_txn_hash.reopen_index(); - const auto transaction = TransactionFindByIdQuery{{tx_snapshot, idx_txn_hash}}.exec(txn_id); + const auto transaction = TransactionFindByIdQuery{{txn_segment, idx_txn_hash}}.exec(txn_id); if (transaction) { - SILK_INFO << "Lookup txn ID: " << txn_id << " found in: " << tx_snapshot.path().filename(); + SILK_INFO << "Lookup txn ID: " << txn_id << " found in: " << txn_segment.path().filename(); if (settings.verbose) { - print_txn(*transaction, tx_snapshot.path().filename()); + print_txn(*transaction, txn_segment.path().filename()); } } else { - SILK_WARN << "Lookup txn ID: " << txn_id << " NOT found in: " << tx_snapshot.path().filename(); + SILK_WARN << "Lookup txn ID: " << txn_id << " NOT found in: " << txn_segment.path().filename(); } } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; @@ -986,24 +986,24 @@ void lookup_txn_by_id_in_all(const SnapshotSubcommandSettings& settings, uint64_ SnapshotRepository snapshot_repository{settings.settings, bundle_factory()}; snapshot_repository.reopen_folder(); - std::optional matching_snapshot; + std::optional matching_snapshot_path; std::chrono::time_point start{std::chrono::steady_clock::now()}; for (const auto& bundle_ptr : snapshot_repository.view_bundles_reverse()) { const auto& bundle = *bundle_ptr; - auto snapshot_and_index = bundle.snapshot_and_index(SnapshotType::transactions); - const auto transaction = TransactionFindByIdQuery{snapshot_and_index}.exec(txn_id); + auto segment_and_index = bundle.segment_and_index(SnapshotType::transactions); + const auto transaction = TransactionFindByIdQuery{segment_and_index}.exec(txn_id); if (transaction) { - matching_snapshot = snapshot_and_index.snapshot.path(); + matching_snapshot_path = segment_and_index.segment.path(); if (settings.verbose) { - print_txn(*transaction, matching_snapshot->path().filename()); + print_txn(*transaction, matching_snapshot_path->path().filename()); } break; } } std::chrono::duration elapsed{std::chrono::steady_clock::now() - start}; SILK_INFO << "Lookup txn elapsed: " << as_milliseconds(elapsed) << " msec"; - if (matching_snapshot) { - SILK_INFO << "Lookup txn ID: " << txn_id << " found in: " << matching_snapshot->path().filename(); + if (matching_snapshot_path) { + SILK_INFO << "Lookup txn ID: " << txn_id << " found in: " << matching_snapshot_path->path().filename(); } else { SILK_WARN << "Lookup txn ID: " << txn_id << " NOT found"; } @@ -1012,8 +1012,8 @@ void lookup_txn_by_id_in_all(const SnapshotSubcommandSettings& settings, uint64_ void lookup_txn_by_id(const SnapshotSubcommandSettings& settings, uint64_t txn_id) { SILK_INFO << "Lookup txn ID: " << txn_id; - if (settings.snapshot_file_name) { - lookup_txn_by_id_in_one(settings, txn_id, *settings.snapshot_file_name); + if (settings.segment_file_name) { + lookup_txn_by_id_in_one(settings, txn_id, *settings.segment_file_name); } else { lookup_txn_by_id_in_all(settings, txn_id); } diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 5c2f0e7bcd..4da4a64726 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -95,7 +95,7 @@ If you need to update the list of builtin snapshots in Silkworm, the following p * update `erigon-snapshot` submodule to the new commit * generate the embedded C++ code bindings for predefined snapshots by executing from project home folder: ``` -/cmd/dev/embed_toml -i third_party/erigon-snapshot -o silkworm/db/datastore/snapshots/config +/cmd/dev/embed_toml -i third_party/erigon-snapshot -o silkworm/db/datastore/snapshots/config/chains ``` diff --git a/silkworm/capi/silkworm.cpp b/silkworm/capi/silkworm.cpp index 7a53ca0057..e5f795ebe7 100644 --- a/silkworm/capi/silkworm.cpp +++ b/silkworm/capi/silkworm.cpp @@ -37,9 +37,8 @@ #include #include #include -#include #include -#include +#include #include #include #include @@ -245,7 +244,7 @@ SILKWORM_EXPORT int silkworm_init(SilkwormHandle* handle, const struct SilkwormS return SILKWORM_OK; } -SILKWORM_EXPORT int silkworm_build_recsplit_indexes(SilkwormHandle handle, struct SilkwormMemoryMappedFile* snapshots[], size_t len) SILKWORM_NOEXCEPT { +SILKWORM_EXPORT int silkworm_build_recsplit_indexes(SilkwormHandle handle, struct SilkwormMemoryMappedFile* segments[], size_t len) SILKWORM_NOEXCEPT { const int kNeededIndexesToBuildInParallel = 2; if (!handle) { @@ -254,13 +253,13 @@ SILKWORM_EXPORT int silkworm_build_recsplit_indexes(SilkwormHandle handle, struc std::vector> needed_indexes; for (size_t i = 0; i < len; ++i) { - struct SilkwormMemoryMappedFile* snapshot = snapshots[i]; - if (!snapshot) { + struct SilkwormMemoryMappedFile* segment = segments[i]; + if (!segment) { return SILKWORM_INVALID_SNAPSHOT; } - auto snapshot_region = make_region(*snapshot); + auto segment_region = make_region(*segment); - const auto snapshot_path = snapshots::SnapshotPath::parse(snapshot->file_path); + const auto snapshot_path = snapshots::SnapshotPath::parse(segment->file_path); if (!snapshot_path) { return SILKWORM_INVALID_PATH; } @@ -268,30 +267,30 @@ SILKWORM_EXPORT int silkworm_build_recsplit_indexes(SilkwormHandle handle, struc std::shared_ptr index; switch (snapshot_path->type()) { case snapshots::SnapshotType::headers: { - index = std::make_shared(snapshots::HeaderIndex::make(*snapshot_path, snapshot_region)); + index = std::make_shared(snapshots::HeaderIndex::make(*snapshot_path, segment_region)); needed_indexes.push_back(index); break; } case snapshots::SnapshotType::bodies: { - index = std::make_shared(snapshots::BodyIndex::make(*snapshot_path, snapshot_region)); + index = std::make_shared(snapshots::BodyIndex::make(*snapshot_path, segment_region)); needed_indexes.push_back(index); break; } case snapshots::SnapshotType::transactions: { auto bodies_segment_path = snapshot_path->related_path(snapshots::SnapshotType::bodies, snapshots::kSegmentExtension); - auto bodies_file = std::find_if(snapshots, snapshots + len, [&](SilkwormMemoryMappedFile* file) -> bool { + auto bodies_file = std::find_if(segments, segments + len, [&](SilkwormMemoryMappedFile* file) -> bool { return snapshots::SnapshotPath::parse(file->file_path) == bodies_segment_path; }); - if (bodies_file < snapshots + len) { + if (bodies_file < segments + len) { auto bodies_segment_region = make_region(**bodies_file); index = std::make_shared(snapshots::TransactionIndex::make( - bodies_segment_path, bodies_segment_region, *snapshot_path, snapshot_region)); + bodies_segment_path, bodies_segment_region, *snapshot_path, segment_region)); needed_indexes.push_back(index); index = std::make_shared(snapshots::TransactionToBlockIndex::make( - bodies_segment_path, bodies_segment_region, *snapshot_path, snapshot_region)); + bodies_segment_path, bodies_segment_region, *snapshot_path, segment_region)); needed_indexes.push_back(index); } break; @@ -352,7 +351,7 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn if (!headers_segment_path) { return SILKWORM_INVALID_PATH; } - snapshots::Snapshot header_snapshot{*headers_segment_path, make_region(hs.segment)}; + snapshots::SegmentFileReader header_segment{*headers_segment_path, make_region(hs.segment)}; snapshots::Index idx_header_hash{headers_segment_path->index_file(), make_region(hs.header_hash_index)}; const SilkwormBodiesSnapshot& bs = snapshot->bodies; @@ -363,7 +362,7 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn if (!bodies_segment_path) { return SILKWORM_INVALID_PATH; } - snapshots::Snapshot body_snapshot{*bodies_segment_path, make_region(bs.segment)}; + snapshots::SegmentFileReader body_segment{*bodies_segment_path, make_region(bs.segment)}; snapshots::Index idx_body_number{bodies_segment_path->index_file(), make_region(bs.block_num_index)}; const SilkwormTransactionsSnapshot& ts = snapshot->transactions; @@ -374,18 +373,18 @@ SILKWORM_EXPORT int silkworm_add_snapshot(SilkwormHandle handle, SilkwormChainSn if (!transactions_segment_path) { return SILKWORM_INVALID_PATH; } - snapshots::Snapshot txn_snapshot{*transactions_segment_path, make_region(ts.segment)}; + snapshots::SegmentFileReader txn_segment{*transactions_segment_path, make_region(ts.segment)}; snapshots::Index idx_txn_hash{transactions_segment_path->related_path(snapshots::SnapshotType::transactions, snapshots::kIdxExtension), make_region(ts.tx_hash_index)}; snapshots::Index idx_txn_hash_2_block{transactions_segment_path->related_path(snapshots::SnapshotType::transactions_to_block, snapshots::kIdxExtension), make_region(ts.tx_hash_2_block_index)}; snapshots::SnapshotBundle bundle{{ - .header_snapshot = std::move(header_snapshot), + .header_segment = std::move(header_segment), .idx_header_hash = std::move(idx_header_hash), - .body_snapshot = std::move(body_snapshot), + .body_segment = std::move(body_segment), .idx_body_number = std::move(idx_body_number), - .txn_snapshot = std::move(txn_snapshot), + .txn_segment = std::move(txn_segment), .idx_txn_hash = std::move(idx_txn_hash), .idx_txn_hash_2_block = std::move(idx_txn_hash_2_block), }}; diff --git a/silkworm/capi/silkworm.h b/silkworm/capi/silkworm.h index 853fabc7ac..7bf26dc854 100644 --- a/silkworm/capi/silkworm.h +++ b/silkworm/capi/silkworm.h @@ -133,13 +133,11 @@ SILKWORM_EXPORT int silkworm_init(SilkwormHandle* handle, const struct SilkwormS /** * \brief Build a set of indexes for the given snapshots. * \param[in] handle A valid Silkworm instance handle, got with silkworm_init. - * \param[in] snapshots An array of snapshots to index. - * \param[in] indexPaths An array of paths to write indexes to. - * Note that the name of the index is a part of the path and it is used to determine the index type. - * \param[in] len The number of snapshots and paths. + * \param[in] segments An array of segment files to index. + * \param[in] len The number of segment files. * \return SILKWORM_OK (=0) on success, a non-zero error value on failure on some or all indexes. */ -SILKWORM_EXPORT int silkworm_build_recsplit_indexes(SilkwormHandle handle, struct SilkwormMemoryMappedFile* snapshots[], size_t len) SILKWORM_NOEXCEPT; +SILKWORM_EXPORT int silkworm_build_recsplit_indexes(SilkwormHandle handle, struct SilkwormMemoryMappedFile* segments[], size_t len) SILKWORM_NOEXCEPT; /** * \brief Notify Silkworm about a new snapshot to use. diff --git a/silkworm/capi/silkworm_test.cpp b/silkworm/capi/silkworm_test.cpp index 957f7ed9bb..1cbb1ba48f 100644 --- a/silkworm/capi/silkworm_test.cpp +++ b/silkworm/capi/silkworm_test.cpp @@ -26,9 +26,8 @@ #include #include #include -#include #include -#include +#include #include #include #include @@ -808,54 +807,54 @@ TEST_CASE_METHOD(CApiTest, "CAPI silkworm_execute_blocks_perpetual multiple bloc } TEST_CASE_METHOD(CApiTest, "CAPI silkworm_add_snapshot", "[silkworm][capi]") { - snapshot_test::SampleHeaderSnapshotFile header_snapshot_file{tmp_dir.path()}; - auto& header_snapshot_path = header_snapshot_file.path(); - snapshot_test::SampleBodySnapshotFile body_snapshot_file{tmp_dir.path()}; - auto& body_snapshot_path = body_snapshot_file.path(); - snapshot_test::SampleTransactionSnapshotFile tx_snapshot_file{tmp_dir.path()}; - auto& tx_snapshot_path = tx_snapshot_file.path(); - - auto header_index_builder = snapshots::HeaderIndex::make(header_snapshot_path); - header_index_builder.set_base_data_id(header_snapshot_file.block_num_range().start); + snapshot_test::SampleHeaderSnapshotFile header_segment_file{tmp_dir.path()}; + auto& header_segment_path = header_segment_file.path(); + snapshot_test::SampleBodySnapshotFile body_segment_file{tmp_dir.path()}; + auto& body_segment_path = body_segment_file.path(); + snapshot_test::SampleTransactionSnapshotFile txn_segment_file{tmp_dir.path()}; + auto& txn_segment_path = txn_segment_file.path(); + + auto header_index_builder = snapshots::HeaderIndex::make(header_segment_path); + header_index_builder.set_base_data_id(header_segment_file.block_num_range().start); REQUIRE_NOTHROW(header_index_builder.build()); - snapshots::Snapshot header_snapshot{header_snapshot_path}; - header_snapshot.reopen_segment(); - snapshots::Index idx_header_hash{header_snapshot_path.index_file()}; + snapshots::SegmentFileReader header_segment{header_segment_path}; + header_segment.reopen_segment(); + snapshots::Index idx_header_hash{header_segment_path.index_file()}; idx_header_hash.reopen_index(); - auto body_index_builder = snapshots::BodyIndex::make(body_snapshot_path); - body_index_builder.set_base_data_id(body_snapshot_file.block_num_range().start); + auto body_index_builder = snapshots::BodyIndex::make(body_segment_path); + body_index_builder.set_base_data_id(body_segment_file.block_num_range().start); REQUIRE_NOTHROW(body_index_builder.build()); - snapshots::Snapshot body_snapshot{body_snapshot_path}; - body_snapshot.reopen_segment(); - snapshots::Index idx_body_number{body_snapshot_path.index_file()}; + snapshots::SegmentFileReader body_segment{body_segment_path}; + body_segment.reopen_segment(); + snapshots::Index idx_body_number{body_segment_path.index_file()}; idx_body_number.reopen_index(); - auto tx_index_builder = snapshots::TransactionIndex::make(body_snapshot_path, tx_snapshot_path); + auto tx_index_builder = snapshots::TransactionIndex::make(body_segment_path, txn_segment_path); tx_index_builder.build(); - auto tx_index_hash_to_block_builder = snapshots::TransactionToBlockIndex::make(body_snapshot_path, tx_snapshot_path, tx_snapshot_file.block_num_range().start); + auto tx_index_hash_to_block_builder = snapshots::TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment_file.block_num_range().start); tx_index_hash_to_block_builder.build(); - snapshots::Snapshot tx_snapshot{tx_snapshot_path}; - tx_snapshot.reopen_segment(); - snapshots::Index idx_txn_hash{tx_snapshot_path.index_file()}; + snapshots::SegmentFileReader txn_segment{txn_segment_path}; + txn_segment.reopen_segment(); + snapshots::Index idx_txn_hash{txn_segment_path.index_file()}; idx_txn_hash.reopen_index(); snapshots::Index idx_txn_hash_2_block{tx_index_hash_to_block_builder.path()}; idx_txn_hash_2_block.reopen_index(); - const auto header_snapshot_path_string{header_snapshot_path.path().string()}; + const auto header_segment_path_string{header_segment_path.path().string()}; const auto header_index_path_string{idx_header_hash.path().path().string()}; - const auto body_snapshot_path_string{body_snapshot_path.path().string()}; + const auto body_segment_path_string{body_segment_path.path().string()}; const auto body_index_path_string{idx_body_number.path().path().string()}; - const auto tx_snapshot_path_string{tx_snapshot_path.path().string()}; + const auto txn_segment_path_string{txn_segment_path.path().string()}; const auto tx_hash_index_path_string{idx_txn_hash.path().path().string()}; const auto tx_hash2block_index_path_string{idx_txn_hash_2_block.path().path().string()}; // Prepare templates for valid header/body/transaction C data structures SilkwormHeadersSnapshot valid_shs{ .segment = SilkwormMemoryMappedFile{ - .file_path = header_snapshot_path_string.c_str(), - .memory_address = header_snapshot.memory_file_region().data(), - .memory_length = header_snapshot.memory_file_region().size(), + .file_path = header_segment_path_string.c_str(), + .memory_address = header_segment.memory_file_region().data(), + .memory_length = header_segment.memory_file_region().size(), }, .header_hash_index = SilkwormMemoryMappedFile{ .file_path = header_index_path_string.c_str(), @@ -865,9 +864,9 @@ TEST_CASE_METHOD(CApiTest, "CAPI silkworm_add_snapshot", "[silkworm][capi]") { }; SilkwormBodiesSnapshot valid_sbs{ .segment = SilkwormMemoryMappedFile{ - .file_path = body_snapshot_path_string.c_str(), - .memory_address = body_snapshot.memory_file_region().data(), - .memory_length = body_snapshot.memory_file_region().size(), + .file_path = body_segment_path_string.c_str(), + .memory_address = body_segment.memory_file_region().data(), + .memory_length = body_segment.memory_file_region().size(), }, .block_num_index = SilkwormMemoryMappedFile{ .file_path = body_index_path_string.c_str(), @@ -877,9 +876,9 @@ TEST_CASE_METHOD(CApiTest, "CAPI silkworm_add_snapshot", "[silkworm][capi]") { }; SilkwormTransactionsSnapshot valid_sts{ .segment = SilkwormMemoryMappedFile{ - .file_path = tx_snapshot_path_string.c_str(), - .memory_address = tx_snapshot.memory_file_region().data(), - .memory_length = tx_snapshot.memory_file_region().size(), + .file_path = txn_segment_path_string.c_str(), + .memory_address = txn_segment.memory_file_region().data(), + .memory_length = txn_segment.memory_file_region().size(), }, .tx_hash_index = SilkwormMemoryMappedFile{ .file_path = tx_hash_index_path_string.c_str(), diff --git a/silkworm/db/access_layer.cpp b/silkworm/db/access_layer.cpp index 71eaec6539..33c90db6e3 100644 --- a/silkworm/db/access_layer.cpp +++ b/silkworm/db/access_layer.cpp @@ -1261,9 +1261,9 @@ std::optional DataModel::read_header_from_snapshot(BlockNum height) std::optional block_header; // We know the header snapshot in advance: find it based on target block number - const auto [snapshot_and_index, _] = repository_->find_segment(SnapshotType::headers, height); - if (snapshot_and_index) { - block_header = HeaderFindByBlockNumQuery{*snapshot_and_index}.exec(height); + const auto [segment_and_index, _] = repository_->find_segment(SnapshotType::headers, height); + if (segment_and_index) { + block_header = HeaderFindByBlockNumQuery{*segment_and_index}.exec(height); } return block_header; } @@ -1277,8 +1277,8 @@ std::optional DataModel::read_header_from_snapshot(const Hash& hash // We don't know the header snapshot in advance: search for block hash in each header snapshot in reverse order for (const auto& bundle_ptr : repository_->view_bundles_reverse()) { const auto& bundle = *bundle_ptr; - auto snapshot_and_index = bundle.snapshot_and_index(SnapshotType::headers); - block_header = HeaderFindByHashQuery{snapshot_and_index}.exec(hash); + auto segment_and_index = bundle.segment_and_index(SnapshotType::headers); + block_header = HeaderFindByHashQuery{segment_and_index}.exec(hash); if (block_header) break; } return block_header; @@ -1290,10 +1290,10 @@ std::optional DataModel::read_body_for_storage_from_snapsho } // We know the body snapshot in advance: find it based on target block number - const auto [snapshot_and_index, _] = repository_->find_segment(SnapshotType::bodies, height); - if (!snapshot_and_index) return std::nullopt; + const auto [segment_and_index, _] = repository_->find_segment(SnapshotType::bodies, height); + if (!segment_and_index) return std::nullopt; - auto stored_body = BodyFindByBlockNumQuery{*snapshot_and_index}.exec(height); + auto stored_body = BodyFindByBlockNumQuery{*segment_and_index}.exec(height); return stored_body; } @@ -1321,9 +1321,9 @@ bool DataModel::is_body_in_snapshot(BlockNum height) { } // We know the body snapshot in advance: find it based on target block number - const auto [snapshot_and_index, _] = repository_->find_segment(SnapshotType::bodies, height); - if (snapshot_and_index) { - const auto stored_body = BodyFindByBlockNumQuery{*snapshot_and_index}.exec(height); + const auto [segment_and_index, _] = repository_->find_segment(SnapshotType::bodies, height); + if (segment_and_index) { + const auto stored_body = BodyFindByBlockNumQuery{*segment_and_index}.exec(height); return stored_body.has_value(); } @@ -1335,18 +1335,18 @@ bool DataModel::read_transactions_from_snapshot(BlockNum height, uint64_t base_t return true; } - const auto [snapshot_and_index, _] = repository_->find_segment(SnapshotType::transactions, height); - if (!snapshot_and_index) return false; + const auto [segment_and_index, _] = repository_->find_segment(SnapshotType::transactions, height); + if (!segment_and_index) return false; - txs = TransactionRangeFromIdQuery{*snapshot_and_index}.exec_into_vector(base_txn_id, txn_count); + txs = TransactionRangeFromIdQuery{*segment_and_index}.exec_into_vector(base_txn_id, txn_count); return true; } bool DataModel::read_rlp_transactions_from_snapshot(BlockNum height, std::vector& rlp_txs) { - const auto [body_snapshot_and_index, _] = repository_->find_segment(SnapshotType::bodies, height); - if (body_snapshot_and_index) { - auto stored_body = BodyFindByBlockNumQuery{*body_snapshot_and_index}.exec(height); + const auto [body_segment_and_index, _] = repository_->find_segment(SnapshotType::bodies, height); + if (body_segment_and_index) { + auto stored_body = BodyFindByBlockNumQuery{*body_segment_and_index}.exec(height); if (!stored_body) return false; // Skip first and last *system transactions* in block body @@ -1355,10 +1355,10 @@ bool DataModel::read_rlp_transactions_from_snapshot(BlockNum height, std::vector if (txn_count == 0) return true; - const auto [tx_snapshot_and_index, _2] = repository_->find_segment(SnapshotType::transactions, height); - if (!tx_snapshot_and_index) return false; + const auto [tx_segment_and_index, _2] = repository_->find_segment(SnapshotType::transactions, height); + if (!tx_segment_and_index) return false; - rlp_txs = TransactionPayloadRlpRangeFromIdQuery{*tx_snapshot_and_index}.exec_into_vector(base_txn_id, txn_count); + rlp_txs = TransactionPayloadRlpRangeFromIdQuery{*tx_segment_and_index}.exec_into_vector(base_txn_id, txn_count); return true; } diff --git a/silkworm/db/blocks/bodies/body_index.hpp b/silkworm/db/blocks/bodies/body_index.hpp index 9555b722db..6785ba3ea5 100644 --- a/silkworm/db/blocks/bodies/body_index.hpp +++ b/silkworm/db/blocks/bodies/body_index.hpp @@ -21,8 +21,8 @@ #include #include +#include #include -#include #include namespace silkworm::snapshots { diff --git a/silkworm/db/blocks/bodies/body_queries.hpp b/silkworm/db/blocks/bodies/body_queries.hpp index 9b9ddeab9b..66bc13c9c4 100644 --- a/silkworm/db/blocks/bodies/body_queries.hpp +++ b/silkworm/db/blocks/bodies/body_queries.hpp @@ -18,10 +18,10 @@ #include -#include "body_snapshot.hpp" +#include "body_segment.hpp" namespace silkworm::snapshots { -using BodyFindByBlockNumQuery = FindByIdQuery; +using BodyFindByBlockNumQuery = FindByIdQuery; } // namespace silkworm::snapshots diff --git a/silkworm/db/blocks/bodies/body_snapshot.cpp b/silkworm/db/blocks/bodies/body_segment.cpp similarity index 97% rename from silkworm/db/blocks/bodies/body_snapshot.cpp rename to silkworm/db/blocks/bodies/body_segment.cpp index d4b7c1087b..4f27c10c88 100644 --- a/silkworm/db/blocks/bodies/body_snapshot.cpp +++ b/silkworm/db/blocks/bodies/body_segment.cpp @@ -14,7 +14,7 @@ limitations under the License. */ -#include "body_snapshot.hpp" +#include "body_segment.hpp" #include diff --git a/silkworm/db/blocks/bodies/body_snapshot.hpp b/silkworm/db/blocks/bodies/body_segment.hpp similarity index 63% rename from silkworm/db/blocks/bodies/body_snapshot.hpp rename to silkworm/db/blocks/bodies/body_segment.hpp index 181f85e55c..d4b910bebd 100644 --- a/silkworm/db/blocks/bodies/body_snapshot.hpp +++ b/silkworm/db/blocks/bodies/body_segment.hpp @@ -19,20 +19,20 @@ #include #include #include -#include -#include -#include +#include +#include +#include namespace silkworm::snapshots { void encode_word_from_body(Bytes& word, const BlockBodyForStorage& body); void decode_word_into_body(ByteView word, BlockBodyForStorage& body); -struct BodySnapshotWordSerializer : public SnapshotWordSerializer { +struct BodySegmentWordEncoder : public Encoder { BlockBodyForStorage value; Bytes word; - ~BodySnapshotWordSerializer() override = default; + ~BodySegmentWordEncoder() override = default; ByteView encode_word() override { word.clear(); @@ -41,21 +41,21 @@ struct BodySnapshotWordSerializer : public SnapshotWordSerializer { } }; -static_assert(SnapshotWordSerializerConcept); +static_assert(EncoderConcept); -struct BodySnapshotWordDeserializer : public SnapshotWordDeserializer { +struct BodySegmentWordDecoder : public Decoder { BlockBodyForStorage value; - ~BodySnapshotWordDeserializer() override = default; + ~BodySegmentWordDecoder() override = default; void decode_word(ByteView word) override { decode_word_into_body(word, value); } }; -static_assert(SnapshotWordDeserializerConcept); +static_assert(DecoderConcept); -using BodySnapshotReader = SnapshotReader; -using BodySnapshotWriter = SnapshotWriter; +using BodySegmentReader = SegmentReader; +using BodySegmentWriter = SegmentWriter; } // namespace silkworm::snapshots diff --git a/silkworm/db/blocks/bodies/body_snapshot_freezer.cpp b/silkworm/db/blocks/bodies/body_segment_collation.cpp similarity index 74% rename from silkworm/db/blocks/bodies/body_snapshot_freezer.cpp rename to silkworm/db/blocks/bodies/body_segment_collation.cpp index f881c1609d..1328b76185 100644 --- a/silkworm/db/blocks/bodies/body_snapshot_freezer.cpp +++ b/silkworm/db/blocks/bodies/body_segment_collation.cpp @@ -14,26 +14,26 @@ limitations under the License. */ -#include "body_snapshot_freezer.hpp" +#include "body_segment_collation.hpp" #include #include #include -#include "body_snapshot.hpp" +#include "body_segment.hpp" namespace silkworm::db { -void BodySnapshotFreezer::copy(ROTxn& txn, const FreezerCommand& command, snapshots::SnapshotFileWriter& file_writer) const { +void BodySegmentCollation::copy(ROTxn& txn, const SegmentCollationCommand& command, snapshots::SegmentFileWriter& file_writer) const { BlockNumRange range = command.range; uint64_t base_txn_id = command.base_txn_id; - snapshots::BodySnapshotWriter writer{file_writer}; + snapshots::BodySegmentWriter writer{file_writer}; auto out = writer.out(); for (BlockNum i = range.start; i < range.end; ++i) { auto value_opt = read_canonical_body_for_storage(txn, i); - if (!value_opt) throw std::runtime_error{"BodySnapshotFreezer::copy missing body for block " + std::to_string(i)}; + if (!value_opt) throw std::runtime_error{"BodySegmentCollation::copy missing body for block " + std::to_string(i)}; BlockBodyForStorage& value = *value_opt; // remap to sequential values without gaps (see txnum.go) value.base_txn_id = base_txn_id; @@ -42,7 +42,7 @@ void BodySnapshotFreezer::copy(ROTxn& txn, const FreezerCommand& command, snapsh } } -void BodySnapshotFreezer::cleanup(RWTxn& txn, BlockNumRange range) const { +void BodySegmentCollation::prune(RWTxn& txn, BlockNumRange range) const { for (BlockNum i = range.start, count = 1; i < range.end; ++i, ++count) { auto hash_opt = read_canonical_header_hash(txn, i); if (!hash_opt) continue; @@ -51,7 +51,7 @@ void BodySnapshotFreezer::cleanup(RWTxn& txn, BlockNumRange range) const { delete_body(txn, hash, i); if ((count > 10000) && ((count % 10000) == 0)) { - log::Debug("BodySnapshotFreezer") << "cleaned up until block " << i; + log::Debug("BodySegmentCollation") << "cleaned up until block " << i; } } } diff --git a/silkworm/db/blocks/headers/header_snapshot_freezer.hpp b/silkworm/db/blocks/bodies/body_segment_collation.hpp similarity index 66% rename from silkworm/db/blocks/headers/header_snapshot_freezer.hpp rename to silkworm/db/blocks/bodies/body_segment_collation.hpp index 00124aae0b..9fa5d1a460 100644 --- a/silkworm/db/blocks/headers/header_snapshot_freezer.hpp +++ b/silkworm/db/blocks/bodies/body_segment_collation.hpp @@ -16,15 +16,15 @@ #pragma once -#include +#include namespace silkworm::db { -class HeaderSnapshotFreezer : public SnapshotFreezer { +class BodySegmentCollation : public SegmentCollation { public: - ~HeaderSnapshotFreezer() override = default; - void copy(ROTxn& txn, const FreezerCommand& command, snapshots::SnapshotFileWriter& file_writer) const override; - void cleanup(RWTxn& txn, BlockNumRange range) const override; + ~BodySegmentCollation() override = default; + void copy(ROTxn& txn, const SegmentCollationCommand& command, snapshots::SegmentFileWriter& file_writer) const override; + void prune(RWTxn& txn, BlockNumRange range) const override; }; } // namespace silkworm::db diff --git a/silkworm/db/blocks/bodies/body_txs_amount_query.cpp b/silkworm/db/blocks/bodies/body_txs_amount_query.cpp index 6b284a0aa8..819e5d95fe 100644 --- a/silkworm/db/blocks/bodies/body_txs_amount_query.cpp +++ b/silkworm/db/blocks/bodies/body_txs_amount_query.cpp @@ -18,17 +18,17 @@ #include -#include "body_snapshot.hpp" +#include "body_segment.hpp" namespace silkworm::snapshots { BodyTxsAmountQuery::Result BodyTxsAmountQuery::exec() { - size_t body_count = snapshot_.item_count(); + size_t body_count = segment_.item_count(); if (body_count == 0) { - throw std::runtime_error("BodyTxsAmountQuery empty body snapshot: " + snapshot_.path().path().string()); + throw std::runtime_error("BodyTxsAmountQuery empty body snapshot: " + segment_.path().path().string()); } - BodySnapshotReader reader{snapshot_}; + BodySegmentReader reader{segment_}; auto it = reader.begin(); uint64_t first_tx_id = it->base_txn_id; diff --git a/silkworm/db/blocks/bodies/body_txs_amount_query.hpp b/silkworm/db/blocks/bodies/body_txs_amount_query.hpp index e7169e5b7d..dc73ad505f 100644 --- a/silkworm/db/blocks/bodies/body_txs_amount_query.hpp +++ b/silkworm/db/blocks/bodies/body_txs_amount_query.hpp @@ -18,7 +18,7 @@ #include -#include +#include namespace silkworm::snapshots { @@ -29,12 +29,12 @@ class BodyTxsAmountQuery { uint64_t count{}; }; - explicit BodyTxsAmountQuery(const Snapshot& snapshot) : snapshot_(snapshot) {} + explicit BodyTxsAmountQuery(const SegmentFileReader& segment) : segment_(segment) {} Result exec(); private: - const Snapshot& snapshot_; + const SegmentFileReader& segment_; }; } // namespace silkworm::snapshots diff --git a/silkworm/db/blocks/bodies/body_txs_amount_query_test.cpp b/silkworm/db/blocks/bodies/body_txs_amount_query_test.cpp index a6fa2a77ca..0027f3f807 100644 --- a/silkworm/db/blocks/bodies/body_txs_amount_query_test.cpp +++ b/silkworm/db/blocks/bodies/body_txs_amount_query_test.cpp @@ -29,7 +29,7 @@ TEST_CASE("BodyTxsAmountQuery") { silkworm::test_util::SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; test_util::SampleBodySnapshotFile snapshot_file{tmp_dir.path()}; - Snapshot snapshot{snapshot_file.path()}; + SegmentFileReader snapshot{snapshot_file.path()}; snapshot.reopen_segment(); BodyTxsAmountQuery query{snapshot}; diff --git a/silkworm/db/blocks/headers/header_index.hpp b/silkworm/db/blocks/headers/header_index.hpp index 2310861364..072e48f49d 100644 --- a/silkworm/db/blocks/headers/header_index.hpp +++ b/silkworm/db/blocks/headers/header_index.hpp @@ -21,8 +21,8 @@ #include #include +#include #include -#include #include namespace silkworm::snapshots { diff --git a/silkworm/db/blocks/headers/header_queries.hpp b/silkworm/db/blocks/headers/header_queries.hpp index 8caa9c0c80..0bb7e0e610 100644 --- a/silkworm/db/blocks/headers/header_queries.hpp +++ b/silkworm/db/blocks/headers/header_queries.hpp @@ -18,11 +18,11 @@ #include -#include "header_snapshot.hpp" +#include "header_segment.hpp" namespace silkworm::snapshots { -using HeaderFindByBlockNumQuery = FindByIdQuery; -using HeaderFindByHashQuery = FindByHashQuery; +using HeaderFindByBlockNumQuery = FindByIdQuery; +using HeaderFindByHashQuery = FindByHashQuery; } // namespace silkworm::snapshots diff --git a/silkworm/db/blocks/headers/header_snapshot.cpp b/silkworm/db/blocks/headers/header_segment.cpp similarity index 98% rename from silkworm/db/blocks/headers/header_snapshot.cpp rename to silkworm/db/blocks/headers/header_segment.cpp index 92b1f1c5c4..2091a82469 100644 --- a/silkworm/db/blocks/headers/header_snapshot.cpp +++ b/silkworm/db/blocks/headers/header_segment.cpp @@ -14,7 +14,7 @@ limitations under the License. */ -#include "header_snapshot.hpp" +#include "header_segment.hpp" #include #include diff --git a/silkworm/db/blocks/headers/header_snapshot.hpp b/silkworm/db/blocks/headers/header_segment.hpp similarity index 66% rename from silkworm/db/blocks/headers/header_snapshot.hpp rename to silkworm/db/blocks/headers/header_segment.hpp index 9e6fcfd6eb..fa7a445aa9 100644 --- a/silkworm/db/blocks/headers/header_snapshot.hpp +++ b/silkworm/db/blocks/headers/header_segment.hpp @@ -18,9 +18,9 @@ #include #include -#include -#include -#include +#include +#include +#include namespace silkworm::snapshots { @@ -28,11 +28,11 @@ void encode_word_from_header(Bytes& word, const BlockHeader& header); void decode_word_into_header(ByteView word, BlockHeader& header); void check_sanity_of_header_with_metadata(const BlockHeader& header, BlockNumRange block_num_range); -struct HeaderSnapshotWordSerializer : public SnapshotWordSerializer { +struct HeaderSegmentWordEncoder : public Encoder { BlockHeader value; Bytes word; - ~HeaderSnapshotWordSerializer() override = default; + ~HeaderSegmentWordEncoder() override = default; ByteView encode_word() override { word.clear(); @@ -41,12 +41,12 @@ struct HeaderSnapshotWordSerializer : public SnapshotWordSerializer { } }; -static_assert(SnapshotWordSerializerConcept); +static_assert(EncoderConcept); -struct HeaderSnapshotWordDeserializer : public SnapshotWordDeserializer { +struct HeaderSegmentWordDecoder : public Decoder { BlockHeader value; - ~HeaderSnapshotWordDeserializer() override = default; + ~HeaderSegmentWordDecoder() override = default; void decode_word(ByteView word) override { decode_word_into_header(word, value); @@ -57,9 +57,9 @@ struct HeaderSnapshotWordDeserializer : public SnapshotWordDeserializer { } }; -static_assert(SnapshotWordDeserializerConcept); +static_assert(DecoderConcept); -using HeaderSnapshotReader = SnapshotReader; -using HeaderSnapshotWriter = SnapshotWriter; +using HeaderSegmentReader = SegmentReader; +using HeaderSegmentWriter = SegmentWriter; } // namespace silkworm::snapshots diff --git a/silkworm/db/blocks/headers/header_snapshot_freezer.cpp b/silkworm/db/blocks/headers/header_segment_collation.cpp similarity index 70% rename from silkworm/db/blocks/headers/header_snapshot_freezer.cpp rename to silkworm/db/blocks/headers/header_segment_collation.cpp index 8f2d2399c6..52d18aa128 100644 --- a/silkworm/db/blocks/headers/header_snapshot_freezer.cpp +++ b/silkworm/db/blocks/headers/header_segment_collation.cpp @@ -14,29 +14,29 @@ limitations under the License. */ -#include "header_snapshot_freezer.hpp" +#include "header_segment_collation.hpp" #include #include #include -#include "header_snapshot.hpp" +#include "header_segment.hpp" namespace silkworm::db { -void HeaderSnapshotFreezer::copy(ROTxn& txn, const FreezerCommand& command, snapshots::SnapshotFileWriter& file_writer) const { +void HeaderSegmentCollation::copy(ROTxn& txn, const SegmentCollationCommand& command, snapshots::SegmentFileWriter& file_writer) const { BlockNumRange range = command.range; - snapshots::HeaderSnapshotWriter writer{file_writer}; + snapshots::HeaderSegmentWriter writer{file_writer}; auto out = writer.out(); for (BlockNum i = range.start; i < range.end; ++i) { auto value_opt = read_canonical_header(txn, i); - if (!value_opt) throw std::runtime_error{"HeaderSnapshotFreezer::copy missing header for block " + std::to_string(i)}; + if (!value_opt) throw std::runtime_error{"HeaderSegmentCollation::copy missing header for block " + std::to_string(i)}; *out++ = *value_opt; } } -void HeaderSnapshotFreezer::cleanup(RWTxn& txn, BlockNumRange range) const { +void HeaderSegmentCollation::prune(RWTxn& txn, BlockNumRange range) const { for (BlockNum i = range.start, count = 1; i < range.end; ++i, ++count) { auto hash_opt = read_canonical_header_hash(txn, i); if (!hash_opt) continue; @@ -45,7 +45,7 @@ void HeaderSnapshotFreezer::cleanup(RWTxn& txn, BlockNumRange range) const { delete_header(txn, i, hash); if ((count > 10000) && ((count % 10000) == 0)) { - log::Debug("HeaderSnapshotFreezer") << "cleaned up until block " << i; + log::Debug("HeaderSegmentCollation") << "cleaned up until block " << i; } } } diff --git a/silkworm/db/transactions/txn_snapshot_freezer.hpp b/silkworm/db/blocks/headers/header_segment_collation.hpp similarity index 66% rename from silkworm/db/transactions/txn_snapshot_freezer.hpp rename to silkworm/db/blocks/headers/header_segment_collation.hpp index e03aa21676..59760fdfd4 100644 --- a/silkworm/db/transactions/txn_snapshot_freezer.hpp +++ b/silkworm/db/blocks/headers/header_segment_collation.hpp @@ -16,15 +16,15 @@ #pragma once -#include +#include namespace silkworm::db { -class TransactionSnapshotFreezer : public SnapshotFreezer { +class HeaderSegmentCollation : public SegmentCollation { public: - ~TransactionSnapshotFreezer() override = default; - void copy(ROTxn& txn, const FreezerCommand& command, snapshots::SnapshotFileWriter& file_writer) const override; - void cleanup(RWTxn& txn, BlockNumRange range) const override; + ~HeaderSegmentCollation() override = default; + void copy(ROTxn& txn, const SegmentCollationCommand& command, snapshots::SegmentFileWriter& file_writer) const override; + void prune(RWTxn& txn, BlockNumRange range) const override; }; } // namespace silkworm::db diff --git a/silkworm/db/datastore/snapshot_freezer.hpp b/silkworm/db/datastore/segment_collation.hpp similarity index 67% rename from silkworm/db/datastore/snapshot_freezer.hpp rename to silkworm/db/datastore/segment_collation.hpp index ae29e97515..234c7b5392 100644 --- a/silkworm/db/datastore/snapshot_freezer.hpp +++ b/silkworm/db/datastore/segment_collation.hpp @@ -22,34 +22,34 @@ #include "data_migration_command.hpp" #include "mdbx/mdbx.hpp" -#include "snapshots/snapshot_writer.hpp" +#include "snapshots/segment/segment_writer.hpp" namespace silkworm::db { -struct FreezerCommand : public DataMigrationCommand { +struct SegmentCollationCommand : public DataMigrationCommand { BlockNumRange range; uint64_t base_txn_id; - FreezerCommand(BlockNumRange range1, uint64_t base_txn_id1) + SegmentCollationCommand(BlockNumRange range1, uint64_t base_txn_id1) : range(range1), base_txn_id(base_txn_id1) {} - ~FreezerCommand() override = default; + ~SegmentCollationCommand() override = default; std::string description() const override { std::stringstream stream; - stream << "FreezerCommand " << range.to_string(); + stream << "SegmentCollationCommand " << range.to_string(); return stream.str(); } }; -struct SnapshotFreezer { - virtual ~SnapshotFreezer() = default; +struct SegmentCollation { + virtual ~SegmentCollation() = default; //! Copies data for a block range from db to the snapshot file. - virtual void copy(ROTxn& txn, const FreezerCommand& command, snapshots::SnapshotFileWriter& file_writer) const = 0; + virtual void copy(ROTxn& txn, const SegmentCollationCommand& command, snapshots::SegmentFileWriter& file_writer) const = 0; //! Cleans up data for a block range from db after it was copied to the snapshot file. - virtual void cleanup(RWTxn& txn, BlockNumRange range) const = 0; + virtual void prune(RWTxn& txn, BlockNumRange range) const = 0; }; } // namespace silkworm::db diff --git a/silkworm/db/datastore/snapshot_merger.cpp b/silkworm/db/datastore/snapshot_merger.cpp index 08938077b3..960f7badca 100644 --- a/silkworm/db/datastore/snapshot_merger.cpp +++ b/silkworm/db/datastore/snapshot_merger.cpp @@ -23,10 +23,10 @@ #include #include +#include "snapshots/common/snapshot_path.hpp" #include "snapshots/seg/compressor.hpp" +#include "snapshots/segment/segment_writer.hpp" #include "snapshots/snapshot_bundle.hpp" -#include "snapshots/snapshot_path.hpp" -#include "snapshots/snapshot_writer.hpp" namespace silkworm::db { @@ -78,9 +78,9 @@ std::unique_ptr SnapshotMerger::next_command() { return {}; } -struct RawSnapshotWordDeserializer : public SnapshotWordDeserializer { +struct RawDecoder : public Decoder { ByteView value; - ~RawSnapshotWordDeserializer() override = default; + ~RawDecoder() override = default; void decode_word(ByteView word) override { value = word; } @@ -91,14 +91,14 @@ std::shared_ptr SnapshotMerger::migrate(std::unique_ptr reader{bundle.snapshot(path.type())}; + SegmentReader reader{bundle.segment(path.type())}; std::copy(reader.begin(), reader.end(), compressor.add_word_iterator()); } diff --git a/silkworm/db/datastore/snapshots/CMakeLists.txt b/silkworm/db/datastore/snapshots/CMakeLists.txt index 056dfb9e4c..c2cac92399 100644 --- a/silkworm/db/datastore/snapshots/CMakeLists.txt +++ b/silkworm/db/datastore/snapshots/CMakeLists.txt @@ -16,31 +16,31 @@ include("${SILKWORM_MAIN_DIR}/cmake/common/targets.cmake") +add_subdirectory(bittorrent) add_subdirectory(seg) find_package(absl REQUIRED strings) find_package(Boost REQUIRED headers url) # headers for signals2 -find_package(GTest REQUIRED) -find_package(LibtorrentRasterbar REQUIRED) find_package(magic_enum REQUIRED) find_package(Microsoft.GSL REQUIRED) find_package(OpenSSL REQUIRED) +# cmake-format: off set(LIBS_PRIVATE absl::strings Boost::headers - Boost::url - LibtorrentRasterbar::torrent-rasterbar magic_enum::magic_enum OpenSSL::Crypto silkworm_snapshots_seg ) +# cmake-format: on # cmake-format: off set(LIBS_PUBLIC Microsoft.GSL::GSL silkworm_core silkworm_infra + silkworm_bittorrent ) # cmake-format: on @@ -50,4 +50,4 @@ silkworm_library( PRIVATE ${LIBS_PRIVATE} ) -target_link_libraries(silkworm_snapshots_test PRIVATE GTest::gmock silkworm_infra_test_util) +target_link_libraries(silkworm_snapshots_test PRIVATE silkworm_infra_test_util) diff --git a/silkworm/db/datastore/snapshots/basic_queries.hpp b/silkworm/db/datastore/snapshots/basic_queries.hpp index b7b39552d7..3bc538e136 100644 --- a/silkworm/db/datastore/snapshots/basic_queries.hpp +++ b/silkworm/db/datastore/snapshots/basic_queries.hpp @@ -21,30 +21,29 @@ #include -#include "index.hpp" -#include "snapshot_and_index.hpp" -#include "snapshot_reader.hpp" +#include "segment/segment_reader.hpp" +#include "segment_and_index.hpp" namespace silkworm::snapshots { -template +template class BasicQuery { public: explicit BasicQuery( - const SnapshotAndIndex snapshot_and_index) - : reader_{snapshot_and_index.snapshot}, - index_{snapshot_and_index.index} {} + const SegmentAndIndex segment_and_index) + : reader_{segment_and_index.segment}, + index_{segment_and_index.index} {} protected: - TSnapshotReader reader_; + TSegmentReader reader_; const Index& index_; }; -template -struct FindByIdQuery : public BasicQuery { - using BasicQuery::BasicQuery; +template +struct FindByIdQuery : public BasicQuery { + using BasicQuery::BasicQuery; - std::optional exec(uint64_t id) { + std::optional exec(uint64_t id) { auto offset = this->index_.lookup_by_data_id(id); if (!offset) { return std::nullopt; @@ -54,11 +53,11 @@ struct FindByIdQuery : public BasicQuery { } }; -template -struct FindByHashQuery : public BasicQuery { - using BasicQuery::BasicQuery; +template +struct FindByHashQuery : public BasicQuery { + using BasicQuery::BasicQuery; - std::optional exec(const Hash& hash) { + std::optional exec(const Hash& hash) { auto offset = this->index_.lookup_by_hash(hash); if (!offset) { return std::nullopt; @@ -75,11 +74,11 @@ struct FindByHashQuery : public BasicQuery { } }; -template -struct RangeFromIdQuery : public BasicQuery { - using BasicQuery::BasicQuery; +template +struct RangeFromIdQuery : public BasicQuery { + using BasicQuery::BasicQuery; - std::vector exec_into_vector(uint64_t first_id, uint64_t count) { + std::vector exec_into_vector(uint64_t first_id, uint64_t count) { auto offset = this->index_.lookup_by_data_id(first_id); if (!offset) { return {}; diff --git a/silkworm/db/datastore/snapshots/bittorrent/CMakeLists.txt b/silkworm/db/datastore/snapshots/bittorrent/CMakeLists.txt new file mode 100644 index 0000000000..34d36e92b2 --- /dev/null +++ b/silkworm/db/datastore/snapshots/bittorrent/CMakeLists.txt @@ -0,0 +1,48 @@ +#[[ + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +]] + +include("${SILKWORM_MAIN_DIR}/cmake/common/targets.cmake") + +find_package(absl REQUIRED strings) +find_package(Boost REQUIRED headers url) +find_package(GTest REQUIRED) +find_package(LibtorrentRasterbar REQUIRED) +find_package(magic_enum REQUIRED) + +# cmake-format: off +set(LIBS_PRIVATE + absl::strings + LibtorrentRasterbar::torrent-rasterbar + magic_enum::magic_enum +) +# cmake-format: on + +# cmake-format: off +set(LIBS_PUBLIC + Boost::headers + Boost::url + silkworm_core + silkworm_infra +) +# cmake-format: on + +silkworm_library( + silkworm_bittorrent + PUBLIC ${LIBS_PUBLIC} + PRIVATE ${LIBS_PRIVATE} +) + +target_link_libraries(silkworm_bittorrent_test PRIVATE GTest::gmock silkworm_infra_test_util) diff --git a/silkworm/db/datastore/snapshots/bittorrent/web_seed_client.cpp b/silkworm/db/datastore/snapshots/bittorrent/web_seed_client.cpp index 2f7d89ace2..6e1abb05f0 100644 --- a/silkworm/db/datastore/snapshots/bittorrent/web_seed_client.cpp +++ b/silkworm/db/datastore/snapshots/bittorrent/web_seed_client.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -61,14 +62,21 @@ static const std::map kCloudflareHeaders{ {"lsjdjwcush6jbnjj3jnjscoscisoc5s", "I%OSJDNFKE783DDHHJD873EFSIVNI7384R78SSJBJBCCJBC32JABBJCBJK45"}, }; -WebSeedClient::WebSeedClient(std::vector url_seeds, const PreverifiedList& preverified) - : WebSeedClient(std::make_unique(), std::move(url_seeds), preverified) {} - -WebSeedClient::WebSeedClient(std::unique_ptr web_session, - std::vector url_seeds, - const PreverifiedList& preverified) +WebSeedClient::WebSeedClient( + std::vector url_seeds, + Whitelist whitelist) + : WebSeedClient{ + std::make_unique(), + std::move(url_seeds), + std::move(whitelist), + } {} + +WebSeedClient::WebSeedClient( + std::unique_ptr web_session, + std::vector url_seeds, + Whitelist whitelist) : url_seeds_{std::move(url_seeds)}, - preverified_{preverified}, + whitelist_{std::move(whitelist)}, web_session_{std::move(web_session)} {} Task WebSeedClient::discover_torrents(bool fail_fast) { @@ -168,10 +176,6 @@ TorrentInfoPtr WebSeedClient::validate_torrent_file(const urls::url& provider_ur file_name.remove_suffix(kTorrentExtension.size()); if (!is_whitelisted(file_name, lt::aux::to_hex(torrent_hash))) { - if (WebSeedClient::is_caplin_segment(file_name)) { - SILK_TRACE << "WebSeedClient::validate_torrent_file skip Caplin torrent: " << file_name; - return {}; - } SILK_WARN << "WebSeedClient::validate_torrent_file torrent NOT whitelisted: " << file_name; if (throw_not_whitelisted_) { throw std::runtime_error{".torrent file " + std::string{file_name} + " is not whitelisted"}; @@ -183,17 +187,9 @@ TorrentInfoPtr WebSeedClient::validate_torrent_file(const urls::url& provider_ur } bool WebSeedClient::is_whitelisted(std::string_view file_name, std::string_view torrent_hash) { - SILK_TRACE << "WebSeedClient::is_whitelisted file_name: " << file_name << " torrent_hash: " << torrent_hash; - const auto it = std::find_if(preverified_.cbegin(), preverified_.cend(), [=](auto& preverified_file) { - const auto [preverified_file_name, preverified_hash] = preverified_file; - SILK_TRACE << "WebSeedClient::is_whitelisted preverified_file_name: " << preverified_file_name << " preverified_hash: " << preverified_hash; - return preverified_file_name == file_name && preverified_hash == torrent_hash; + return std::ranges::any_of(whitelist_, [&](const std::pair& entry) { + return (entry.first == file_name) && (entry.second == torrent_hash); }); - return it != preverified_.cend(); -} - -bool WebSeedClient::is_caplin_segment(std::string_view file_name) { - return file_name.ends_with("beaconblocks.seg") || file_name.ends_with("blobsidecars.seg"); } } // namespace silkworm::snapshots::bittorrent diff --git a/silkworm/db/datastore/snapshots/bittorrent/web_seed_client.hpp b/silkworm/db/datastore/snapshots/bittorrent/web_seed_client.hpp index 88d1626938..0da045bcec 100644 --- a/silkworm/db/datastore/snapshots/bittorrent/web_seed_client.hpp +++ b/silkworm/db/datastore/snapshots/bittorrent/web_seed_client.hpp @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include @@ -34,7 +36,6 @@ #include -#include "../config.hpp" #include "web_session.hpp" namespace silkworm::snapshots::bittorrent { @@ -47,19 +48,22 @@ inline auto torrent_info_compare = [](const TorrentInfoPtr& lhs, const TorrentIn return lhs->name() < rhs->name(); }; using TorrentInfoPtrList = std::set; +using Whitelist = std::vector>; class WebSeedClient { public: - WebSeedClient(std::vector url_seeds, const PreverifiedList& preverified); - WebSeedClient(std::unique_ptr web_session, - std::vector url_seeds, - const PreverifiedList& preverified); + WebSeedClient( + std::vector url_seeds, + Whitelist whitelist); + WebSeedClient( + std::unique_ptr web_session, + std::vector url_seeds, + Whitelist whitelist); Task discover_torrents(bool fail_fast = false); protected: - static bool is_caplin_segment(std::string_view file_name); - + WebSession& web_session() { return *web_session_; } Task build_list_of_torrents(bool fail_fast); Task build_list_of_torrents(std::string_view provider_url); Task download_and_filter_all_torrents(); @@ -73,7 +77,7 @@ class WebSeedClient { boost::asio::io_context io_ctx_; std::vector url_seeds_; - const PreverifiedList& preverified_; + Whitelist whitelist_; std::unique_ptr web_session_; TorrentsByProvider torrents_by_provider_; bool throw_not_whitelisted_{false}; diff --git a/silkworm/db/datastore/snapshots/bittorrent/web_seed_client_test.cpp b/silkworm/db/datastore/snapshots/bittorrent/web_seed_client_test.cpp index 99b8dc6c6e..8de6d9c1ba 100644 --- a/silkworm/db/datastore/snapshots/bittorrent/web_seed_client_test.cpp +++ b/silkworm/db/datastore/snapshots/bittorrent/web_seed_client_test.cpp @@ -23,8 +23,8 @@ #include #include -#include #include +#include namespace silkworm::snapshots::bittorrent { @@ -38,20 +38,12 @@ class WebSeedClientForTest : public WebSeedClient { using WebSeedClient::build_list_of_torrents; using WebSeedClient::download_and_filter_all_torrents; using WebSeedClient::download_from_provider; - using WebSeedClient::is_caplin_segment; using WebSeedClient::is_whitelisted; using WebSeedClient::validate_torrent_file; + using WebSeedClient::web_session; using WebSeedClient::WebSeedClient; // NOLINT(*-rvalue-reference-param-not-moved) }; -TEST_CASE("WebSeedClientForTest::is_caplin_segment", "[db][snapshot][bittorrent]") { - CHECK(!WebSeedClientForTest::is_caplin_segment("v1-000000-000500-bodies.seg")); - CHECK(!WebSeedClientForTest::is_caplin_segment("v1-000000-000500-headers.seg")); - CHECK(!WebSeedClientForTest::is_caplin_segment("v1-000000-000500-transactions.seg")); - - CHECK(WebSeedClientForTest::is_caplin_segment("v1-000000-000100-beaconblocks.seg")); -} - //! Content for manifest file containing one torrent file static constexpr std::string_view kValidManifestContent{ "v1-010000-010500-bodies.seg.torrent\n"sv}; @@ -88,36 +80,36 @@ static boost::urls::url make_e2_snapshots_provider_url() { return boost::urls::url{kErigon2Snapshots}; } -struct WebSeedClientTest : public test_util::ContextTestBase { - snapshots::Config known_config{snapshots::Config::lookup_known_config(/*chain_id=*/1)}; - std::unique_ptr session{std::make_unique()}; - WebSeedClientForTest client{{kErigon2Snapshots}, known_config.preverified_snapshots()}; -}; - -TEST_CASE("WebSeedClientForTest::WebSeedClientForTest", "[db][snapshot][bittorrent]") { - PreverifiedList preverified_torrent_list; - WebSeedClientForTest client{{}, preverified_torrent_list}; +TEST_CASE("WebSeedClient::WebSeedClient", "[db][snapshot][bittorrent]") { + WebSeedClientForTest client{{}, {}}; } -TEST_CASE_METHOD(WebSeedClientTest, "WebSeedClientForTest::discover_torrents", "[db][snapshot][bittorrent]") { +TEST_CASE("WebSeedClient::discover_torrents", "[db][snapshot][bittorrent]") { + test_util::TaskRunner task_runner; + static const Whitelist kWhitelist = {{"v1-010000-010500-bodies.seg", "542b3f77a2f3c4b9d8a4085d838bdd1b14043f3b"}}; + WebSeedClientForTest ws_client{std::make_unique(), {kErigon2Snapshots}, kWhitelist}; + auto& session = dynamic_cast(ws_client.web_session()); + SECTION("empty") { - EXPECT_CALL(*session, https_get(make_e2_snapshots_provider_url(), _, _)) + EXPECT_CALL(session, https_get(make_e2_snapshots_provider_url(), _, _)) .WillOnce(InvokeWithoutArgs([]() -> Task { co_return WebSession::StringResponse{}; })); - WebSeedClientForTest ws_client{std::move(session), {kErigon2Snapshots}, known_config.preverified_snapshots()}; - CHECK(spawn_and_wait(ws_client.discover_torrents()).empty()); + TorrentInfoPtrList torrent_info_set = task_runner.run(ws_client.discover_torrents()); + CHECK(torrent_info_set.empty()); } + SECTION("invalid manifest") { - EXPECT_CALL(*session, https_get(make_e2_snapshots_provider_url(), _, _)) + EXPECT_CALL(session, https_get(make_e2_snapshots_provider_url(), _, _)) .WillOnce(InvokeWithoutArgs([]() -> Task { WebSession::StringResponse rsp; rsp.body().assign("\000\001"); co_return rsp; })); - WebSeedClientForTest ws_client{std::move(session), {kErigon2Snapshots}, known_config.preverified_snapshots()}; - CHECK(spawn_and_wait(ws_client.discover_torrents()).empty()); + TorrentInfoPtrList torrent_info_set = task_runner.run(ws_client.discover_torrents()); + CHECK(torrent_info_set.empty()); } + SECTION("valid manifest") { - EXPECT_CALL(*session, https_get(make_e2_snapshots_provider_url(), _, _)) + EXPECT_CALL(session, https_get(make_e2_snapshots_provider_url(), _, _)) .WillOnce(InvokeWithoutArgs([]() -> Task { WebSession::StringResponse rsp; rsp.body().assign(kValidManifestContent); @@ -128,9 +120,7 @@ TEST_CASE_METHOD(WebSeedClientTest, "WebSeedClientForTest::discover_torrents", " rsp.body().assign(kValidTorrentContentAscii); co_return rsp; })); - WebSeedClientForTest ws_client{std::move(session), {kErigon2Snapshots}, known_config.preverified_snapshots()}; - TorrentInfoPtrList torrent_info_set; - CHECK_NOTHROW((torrent_info_set = spawn_and_wait(ws_client.discover_torrents()))); + TorrentInfoPtrList torrent_info_set = task_runner.run(ws_client.discover_torrents()); REQUIRE_FALSE(torrent_info_set.empty()); const TorrentInfoPtr torrent_info = *torrent_info_set.begin(); CHECK(torrent_info->name() == "v1-010000-010500-bodies.seg"); @@ -138,14 +128,22 @@ TEST_CASE_METHOD(WebSeedClientTest, "WebSeedClientForTest::discover_torrents", " } } -TEST_CASE_METHOD(WebSeedClientTest, "WebSeedClientForTest::validate_torrent_file", "[db][snapshot][bittorrent]") { +TEST_CASE("WebSeedClient::validate_torrent_file", "[db][snapshot][bittorrent]") { + WebSeedClientForTest client{{kErigon2Snapshots}, {{"v1-010000-010500-bodies.seg", "542b3f77a2f3c4b9d8a4085d838bdd1b14043f3b"}}}; CHECK(client.validate_torrent_file(make_e2_snapshots_provider_url(), "v1-010000-010500-bodies.seg.torrent", kValidTorrentContentAscii)); CHECK_THROWS_AS(client.validate_torrent_file(make_e2_snapshots_provider_url(), "v1-010000-010500-bodies.seg.torrent", ""), boost::system::system_error); CHECK_THROWS_AS(client.validate_torrent_file(make_e2_snapshots_provider_url(), "v1-010000-010500-bodies.seg.torrent", "AA"), boost::system::system_error); } -TEST_CASE_METHOD(WebSeedClientTest, "WebSeedClientForTest::is_whitelisted", "[db][snapshot][bittorrent]") { +TEST_CASE("WebSeedClient::is_whitelisted", "[db][snapshot][bittorrent]") { + static const Whitelist kWhitelist = { + {"v1-010000-010500-bodies.seg", "542b3f77a2f3c4b9d8a4085d838bdd1b14043f3b"}, + {"v1-010000-010500-headers.seg", "080d0cd1613831820c8f5e48715d68643f48054a"}, + {"v1-010000-010500-transactions.seg", "8151bbc8b6635465760af6ebcfd630c9679b31a5"}, + }; + WebSeedClientForTest client{{kErigon2Snapshots}, kWhitelist}; + CHECK(client.is_whitelisted("v1-010000-010500-bodies.seg", "542b3f77a2f3c4b9d8a4085d838bdd1b14043f3b")); CHECK(client.is_whitelisted("v1-010000-010500-headers.seg", "080d0cd1613831820c8f5e48715d68643f48054a")); CHECK(client.is_whitelisted("v1-010000-010500-transactions.seg", "8151bbc8b6635465760af6ebcfd630c9679b31a5")); diff --git a/silkworm/db/datastore/snapshots/index/bloom_filter.cpp b/silkworm/db/datastore/snapshots/bloom_filter/bloom_filter.cpp similarity index 88% rename from silkworm/db/datastore/snapshots/index/bloom_filter.cpp rename to silkworm/db/datastore/snapshots/bloom_filter/bloom_filter.cpp index d6d65e3838..8b990300d6 100644 --- a/silkworm/db/datastore/snapshots/index/bloom_filter.cpp +++ b/silkworm/db/datastore/snapshots/bloom_filter/bloom_filter.cpp @@ -16,9 +16,11 @@ #include "bloom_filter.hpp" +#include #include #include #include +#include #include #include @@ -27,10 +29,13 @@ #include #include -namespace silkworm::snapshots::index { +namespace silkworm::snapshots::bloom_filter { using namespace std::numbers; +//! The minimum Bloom filter bits count +static constexpr size_t kMinimumBitsCount = 2; + //! kRotation sets how much to rotate the hash on each filter iteration. //! This is somewhat randomly set to a prime on the lower segment of 64. static constexpr size_t kRotation = 17; @@ -44,8 +49,26 @@ uint64_t BloomFilter::optimal_bits_count(uint64_t max_key_count, double p) { return static_cast(std::ceil(-static_cast(max_key_count) * std::log(p) / (ln2 * ln2))); } -BloomFilter::BloomFilter(uint64_t bits_count) - : BloomFilter(bits_count, new_random_keys()) {} +BloomFilter::BloomFilter(std::filesystem::path path) + : BloomFilter{kMinimumBitsCount, new_random_keys()} { + if (!std::filesystem::exists(path)) { + throw std::runtime_error("index file " + path.filename().string() + " doesn't exist"); + } + if (std::filesystem::file_size(path) == 0) { + throw std::runtime_error("index file " + path.filename().string() + " is empty"); + } + std::ifstream file_stream{path, std::ios::in | std::ios::binary}; + file_stream.exceptions(std::ios::failbit | std::ios::badbit); + file_stream >> *this; + + path_ = std::move(path); +} + +BloomFilter::BloomFilter() + : BloomFilter{kMinimumBitsCount, new_random_keys()} {} + +BloomFilter::BloomFilter(uint64_t max_key_count, double p) + : BloomFilter{optimal_bits_count(max_key_count, p), new_random_keys()} {} BloomFilter::BloomFilter(uint64_t bits_count, KeyArray keys) : bits_count_(bits_count), @@ -54,9 +77,6 @@ BloomFilter::BloomFilter(uint64_t bits_count, KeyArray keys) ensure_min_bits_count(bits_count); } -BloomFilter::BloomFilter(uint64_t max_key_count, double p) - : BloomFilter(optimal_bits_count(max_key_count, p)) {} - void BloomFilter::add_hash(uint64_t hash) { for (size_t n = 0; n < kHardCodedK; ++n) { hash = ((hash << kRotation) | (hash >> kRotationOf64)) ^ keys_[n]; @@ -195,4 +215,4 @@ std::istream& operator>>(std::istream& is, BloomFilter& filter) { return is; } -} // namespace silkworm::snapshots::index +} // namespace silkworm::snapshots::bloom_filter diff --git a/silkworm/db/datastore/snapshots/index/bloom_filter.hpp b/silkworm/db/datastore/snapshots/bloom_filter/bloom_filter.hpp similarity index 84% rename from silkworm/db/datastore/snapshots/index/bloom_filter.hpp rename to silkworm/db/datastore/snapshots/bloom_filter/bloom_filter.hpp index d0db13710b..62e1fbd783 100644 --- a/silkworm/db/datastore/snapshots/index/bloom_filter.hpp +++ b/silkworm/db/datastore/snapshots/bloom_filter/bloom_filter.hpp @@ -19,34 +19,20 @@ #include #include #include -#include -#include -#include -#include +#include #include -#include -#include - -namespace silkworm::snapshots::index { +namespace silkworm::snapshots::bloom_filter { //! Bloom filter implementation (https://en.wikipedia.org/wiki/Bloom_filter) //! \remark Serialized binary format compatible with: https://github.com/holiman/bloomfilter class BloomFilter { public: - //! The minimum Bloom filter bits count - static constexpr size_t kMinimumBitsCount = 2; - - //! The fixed number of keys - static constexpr size_t kHardCodedK = 3; - - using KeyArray = std::array; - - static uint64_t optimal_bits_count(uint64_t max_key_count, double p); - - explicit BloomFilter(uint64_t bits_count = kMinimumBitsCount); + explicit BloomFilter(std::filesystem::path path); + BloomFilter(); BloomFilter(uint64_t max_key_count, double p); + const std::filesystem::path& path() const { return path_; } uint64_t bits_count() const { return bits_count_; } uint64_t key_count() const { return keys_.size(); } @@ -61,12 +47,22 @@ class BloomFilter { friend std::istream& operator>>(std::istream& is, BloomFilter& filter); + static uint64_t optimal_bits_count(uint64_t max_key_count, double p); + + //! The fixed number of keys + static constexpr size_t kHardCodedK = 3; + private: + using KeyArray = std::array; + static void ensure_min_bits_count(uint64_t bits_count); static KeyArray new_random_keys(); BloomFilter(uint64_t bits_count, KeyArray keys); + //! The index file path + std::filesystem::path path_; + //! The number of bits that the bitmap should be able to track uint64_t bits_count_; @@ -80,4 +76,4 @@ class BloomFilter { uint64_t inserted_count_{0}; }; -} // namespace silkworm::snapshots::index +} // namespace silkworm::snapshots::bloom_filter diff --git a/silkworm/db/datastore/snapshots/index/bloom_filter_test.cpp b/silkworm/db/datastore/snapshots/bloom_filter/bloom_filter_test.cpp similarity index 72% rename from silkworm/db/datastore/snapshots/index/bloom_filter_test.cpp rename to silkworm/db/datastore/snapshots/bloom_filter/bloom_filter_test.cpp index 0f275cf13c..69d0d20943 100644 --- a/silkworm/db/datastore/snapshots/index/bloom_filter_test.cpp +++ b/silkworm/db/datastore/snapshots/bloom_filter/bloom_filter_test.cpp @@ -24,22 +24,28 @@ #include #include +#include #include "../test_util/sample_bloom_filter_data.hpp" -namespace silkworm::snapshots::index { +namespace silkworm::snapshots::bloom_filter { TEST_CASE("BloomFilter", "[snapshot][index][bloom_filter]") { SECTION("empty") { CHECK_THROWS_AS(BloomFilter(0, 0.01), std::runtime_error); } + SECTION("empty file") { + TemporaryDirectory tmp_dir; + CHECK_THROWS_AS(BloomFilter{tmp_dir.get_unique_temporary_path()}, std::runtime_error); + } + SECTION("item present") { // Create PRNG to generate pseudo-random hash values static std::mt19937_64 rnd_generator{std::random_device{}()}; std::uniform_int_distribution u32_distribution; - BloomFilter filter{BloomFilter::optimal_bits_count(10'000'000, 0.01)}; + BloomFilter filter{10'000'000, 0.01}; CHECK(filter.key_count() == BloomFilter::kHardCodedK); CHECK(filter.bits_count() == BloomFilter::optimal_bits_count(10'000'000, 0.01)); @@ -60,6 +66,25 @@ TEST_CASE("BloomFilter", "[snapshot][index][bloom_filter]") { } } } + + SECTION("item present in file") { + // Create PRNG to generate pseudo-random hash values + static std::mt19937_64 rnd_generator{std::random_device{}()}; + std::uniform_int_distribution u32_distribution; + + // Create sample existence index + REQUIRE(!test_util::kValidBloomFilters.empty()); + silkworm::test_util::TemporaryFile sample_ei_file; + sample_ei_file.write(*from_hex(test_util::kValidBloomFilters[0])); + + BloomFilter existence_index{sample_ei_file.path()}; + CHECK(existence_index.path() == sample_ei_file.path()); + for (size_t i = 0; i < 100; ++i) { + const uint64_t h = u32_distribution(rnd_generator); + existence_index.add_hash(h); + CHECK(existence_index.contains_hash(h)); + } + } } TEST_CASE("BloomFilter: operator>>", "[snapshot][index][bloom_filter]") { @@ -85,4 +110,4 @@ TEST_CASE("BloomFilter: operator>>", "[snapshot][index][bloom_filter]") { } } -} // namespace silkworm::snapshots::index +} // namespace silkworm::snapshots::bloom_filter diff --git a/silkworm/db/datastore/snapshots/index/btree.cpp b/silkworm/db/datastore/snapshots/btree/btree.cpp similarity index 99% rename from silkworm/db/datastore/snapshots/index/btree.cpp rename to silkworm/db/datastore/snapshots/btree/btree.cpp index 5a3e3dd613..5a2095a6e5 100644 --- a/silkworm/db/datastore/snapshots/index/btree.cpp +++ b/silkworm/db/datastore/snapshots/btree/btree.cpp @@ -24,7 +24,7 @@ #include #include -namespace silkworm::snapshots::index { +namespace silkworm::snapshots::btree { //! Smallest shard available for scan instead of binary search static constexpr uint64_t kDefaultBtreeStartSkip{4}; @@ -213,4 +213,4 @@ BTree::BinarySearchResult BTree::binary_search_in_cache(ByteView key) { return {node, left_index, right_index}; } -} // namespace silkworm::snapshots::index +} // namespace silkworm::snapshots::btree diff --git a/silkworm/db/datastore/snapshots/index/btree.hpp b/silkworm/db/datastore/snapshots/btree/btree.hpp similarity index 97% rename from silkworm/db/datastore/snapshots/index/btree.hpp rename to silkworm/db/datastore/snapshots/btree/btree.hpp index 9c2412aea2..985c650dc0 100644 --- a/silkworm/db/datastore/snapshots/index/btree.hpp +++ b/silkworm/db/datastore/snapshots/btree/btree.hpp @@ -24,7 +24,7 @@ #include "../seg/decompressor.hpp" -namespace silkworm::snapshots::index { +namespace silkworm::snapshots::btree { class BTree { public: @@ -101,4 +101,4 @@ class BTree { bool check_encoded_keys_; }; -} // namespace silkworm::snapshots::index +} // namespace silkworm::snapshots::btree diff --git a/silkworm/db/datastore/snapshots/index/btree_index.cpp b/silkworm/db/datastore/snapshots/btree/btree_index.cpp similarity index 98% rename from silkworm/db/datastore/snapshots/index/btree_index.cpp rename to silkworm/db/datastore/snapshots/btree/btree_index.cpp index 47f4d5f9ad..5c37815fd5 100644 --- a/silkworm/db/datastore/snapshots/index/btree_index.cpp +++ b/silkworm/db/datastore/snapshots/btree/btree_index.cpp @@ -24,7 +24,7 @@ #include #include -namespace silkworm::snapshots::index { +namespace silkworm::snapshots::btree { BTreeIndex::BTreeIndex(seg::Decompressor& kv_decompressor, std::filesystem::path index_file_path, @@ -141,4 +141,4 @@ bool BTreeIndex::Cursor::to_next() { return true; } -} // namespace silkworm::snapshots::index +} // namespace silkworm::snapshots::btree diff --git a/silkworm/db/datastore/snapshots/index/btree_index.hpp b/silkworm/db/datastore/snapshots/btree/btree_index.hpp similarity index 94% rename from silkworm/db/datastore/snapshots/index/btree_index.hpp rename to silkworm/db/datastore/snapshots/btree/btree_index.hpp index 2252d4afe9..74ea178d29 100644 --- a/silkworm/db/datastore/snapshots/index/btree_index.hpp +++ b/silkworm/db/datastore/snapshots/btree/btree_index.hpp @@ -23,13 +23,11 @@ #include -#include "../common/encoding/elias_fano.hpp" +#include "../elias_fano/elias_fano.hpp" #include "../seg/decompressor.hpp" #include "btree.hpp" -namespace silkworm::snapshots::index { - -using encoding::EliasFanoList32; +namespace silkworm::snapshots::btree { class BTreeIndex { public: @@ -37,6 +35,7 @@ class BTreeIndex { using DataIndex = BTree::DataIndex; using DataIterator = BTree::DataIterator; + using EliasFanoList32 = elias_fano::EliasFanoList32; class Cursor { public: @@ -96,4 +95,4 @@ class BTreeIndex { std::unique_ptr btree_; }; -} // namespace silkworm::snapshots::index +} // namespace silkworm::snapshots::btree diff --git a/silkworm/db/datastore/snapshots/index/btree_index_test.cpp b/silkworm/db/datastore/snapshots/btree/btree_index_test.cpp similarity index 98% rename from silkworm/db/datastore/snapshots/index/btree_index_test.cpp rename to silkworm/db/datastore/snapshots/btree/btree_index_test.cpp index 1390d4d7e2..fff77aaa8a 100644 --- a/silkworm/db/datastore/snapshots/index/btree_index_test.cpp +++ b/silkworm/db/datastore/snapshots/btree/btree_index_test.cpp @@ -29,9 +29,10 @@ #include "../seg/compressor.hpp" #include "../seg/decompressor.hpp" -namespace silkworm::snapshots::index { +namespace silkworm::snapshots::btree { using namespace silkworm::test_util; +using elias_fano::EliasFanoList32; using KeyAndValue = std::pair; @@ -173,4 +174,4 @@ TEST_CASE("BTreeIndex", "[db]") { } } -} // namespace silkworm::snapshots::index +} // namespace silkworm::snapshots::btree diff --git a/silkworm/db/datastore/snapshots/snapshot_word_serializer.hpp b/silkworm/db/datastore/snapshots/common/codec.hpp similarity index 59% rename from silkworm/db/datastore/snapshots/snapshot_word_serializer.hpp rename to silkworm/db/datastore/snapshots/common/codec.hpp index 627db407b0..fb145bd4d4 100644 --- a/silkworm/db/datastore/snapshots/snapshot_word_serializer.hpp +++ b/silkworm/db/datastore/snapshots/common/codec.hpp @@ -23,23 +23,25 @@ namespace silkworm::snapshots { class SnapshotPath; -struct SnapshotWordSerializer { - virtual ~SnapshotWordSerializer() = default; +struct Encoder { + virtual ~Encoder() = default; virtual ByteView encode_word() = 0; }; -template -concept SnapshotWordSerializerConcept = std::derived_from && - requires(TWordSerializer serializer) { serializer.value; }; +template +concept EncoderConcept = + std::derived_from && + requires(TEncoder encoder) { encoder.value; }; -struct SnapshotWordDeserializer { - virtual ~SnapshotWordDeserializer() = default; +struct Decoder { + virtual ~Decoder() = default; virtual void decode_word(ByteView word) = 0; virtual void check_sanity_with_metadata(const SnapshotPath& /*path*/) {} }; -template -concept SnapshotWordDeserializerConcept = std::derived_from && - requires(TWordDeserializer deserializer) { deserializer.value; }; +template +concept DecoderConcept = + std::derived_from && + requires(TDecoder decoder) { decoder.value; }; } // namespace silkworm::snapshots diff --git a/silkworm/db/datastore/snapshots/snapshot_path.cpp b/silkworm/db/datastore/snapshots/common/snapshot_path.cpp similarity index 100% rename from silkworm/db/datastore/snapshots/snapshot_path.cpp rename to silkworm/db/datastore/snapshots/common/snapshot_path.cpp diff --git a/silkworm/db/datastore/snapshots/snapshot_path.hpp b/silkworm/db/datastore/snapshots/common/snapshot_path.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/snapshot_path.hpp rename to silkworm/db/datastore/snapshots/common/snapshot_path.hpp diff --git a/silkworm/db/datastore/snapshots/snapshot_path_test.cpp b/silkworm/db/datastore/snapshots/common/snapshot_path_test.cpp similarity index 100% rename from silkworm/db/datastore/snapshots/snapshot_path_test.cpp rename to silkworm/db/datastore/snapshots/common/snapshot_path_test.cpp diff --git a/silkworm/db/datastore/snapshots/snapshot_type.hpp b/silkworm/db/datastore/snapshots/common/snapshot_type.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/snapshot_type.hpp rename to silkworm/db/datastore/snapshots/common/snapshot_type.hpp diff --git a/silkworm/db/datastore/snapshots/step.hpp b/silkworm/db/datastore/snapshots/common/step.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/step.hpp rename to silkworm/db/datastore/snapshots/common/step.hpp diff --git a/silkworm/db/datastore/snapshots/common/bitmask_operators.hpp b/silkworm/db/datastore/snapshots/common/util/bitmask_operators.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/common/bitmask_operators.hpp rename to silkworm/db/datastore/snapshots/common/util/bitmask_operators.hpp diff --git a/silkworm/db/datastore/snapshots/common/iterator/iterator_read_into_vector.hpp b/silkworm/db/datastore/snapshots/common/util/iterator/iterator_read_into_vector.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/common/iterator/iterator_read_into_vector.hpp rename to silkworm/db/datastore/snapshots/common/util/iterator/iterator_read_into_vector.hpp diff --git a/silkworm/db/datastore/snapshots/common/iterator/map_values_view.hpp b/silkworm/db/datastore/snapshots/common/util/iterator/map_values_view.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/common/iterator/map_values_view.hpp rename to silkworm/db/datastore/snapshots/common/util/iterator/map_values_view.hpp diff --git a/silkworm/db/datastore/snapshots/config/amoy.hpp b/silkworm/db/datastore/snapshots/config/chains/amoy.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/config/amoy.hpp rename to silkworm/db/datastore/snapshots/config/chains/amoy.hpp diff --git a/silkworm/db/datastore/snapshots/config/bor_mainnet.hpp b/silkworm/db/datastore/snapshots/config/chains/bor_mainnet.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/config/bor_mainnet.hpp rename to silkworm/db/datastore/snapshots/config/chains/bor_mainnet.hpp diff --git a/silkworm/db/datastore/snapshots/config/holesky.hpp b/silkworm/db/datastore/snapshots/config/chains/holesky.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/config/holesky.hpp rename to silkworm/db/datastore/snapshots/config/chains/holesky.hpp diff --git a/silkworm/db/datastore/snapshots/config/mainnet.hpp b/silkworm/db/datastore/snapshots/config/chains/mainnet.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/config/mainnet.hpp rename to silkworm/db/datastore/snapshots/config/chains/mainnet.hpp diff --git a/silkworm/db/datastore/snapshots/config/sepolia.hpp b/silkworm/db/datastore/snapshots/config/chains/sepolia.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/config/sepolia.hpp rename to silkworm/db/datastore/snapshots/config/chains/sepolia.hpp diff --git a/silkworm/db/datastore/snapshots/config.cpp b/silkworm/db/datastore/snapshots/config/config.cpp similarity index 88% rename from silkworm/db/datastore/snapshots/config.cpp rename to silkworm/db/datastore/snapshots/config/config.cpp index f71e57d5df..1c09db0a54 100644 --- a/silkworm/db/datastore/snapshots/config.cpp +++ b/silkworm/db/datastore/snapshots/config/config.cpp @@ -21,12 +21,11 @@ #include -#include "config/amoy.hpp" -#include "config/bor_mainnet.hpp" -#include "config/holesky.hpp" -#include "config/mainnet.hpp" -#include "config/sepolia.hpp" -#include "snapshot_path.hpp" +#include "chains/amoy.hpp" +#include "chains/bor_mainnet.hpp" +#include "chains/holesky.hpp" +#include "chains/mainnet.hpp" +#include "chains/sepolia.hpp" namespace silkworm::snapshots { @@ -80,6 +79,14 @@ PreverifiedList Config::remove_unsupported_entries(const PreverifiedList& entrie return results; } +PreverifiedListOfPairs Config::preverified_snapshots_as_pairs() const { + PreverifiedListOfPairs entries; + for (const Entry& entry : entries_) { + entries.emplace_back(entry.file_name, entry.torrent_hash); + } + return entries; +} + bool Config::contains_file_name(std::string_view file_name) const { return std::ranges::any_of(entries_, [&](const Entry& entry) { return entry.file_name == file_name; diff --git a/silkworm/db/datastore/snapshots/config.hpp b/silkworm/db/datastore/snapshots/config/config.hpp similarity index 89% rename from silkworm/db/datastore/snapshots/config.hpp rename to silkworm/db/datastore/snapshots/config/config.hpp index 460ca4b34c..6431126975 100644 --- a/silkworm/db/datastore/snapshots/config.hpp +++ b/silkworm/db/datastore/snapshots/config/config.hpp @@ -18,6 +18,7 @@ #include #include +#include #include #include @@ -28,6 +29,7 @@ namespace silkworm::snapshots { using PreverifiedList = std::vector; +using PreverifiedListOfPairs = std::vector>; class Config { public: @@ -39,6 +41,7 @@ class Config { : entries_(std::move(entries)) {} const PreverifiedList& preverified_snapshots() const { return entries_; } + PreverifiedListOfPairs preverified_snapshots_as_pairs() const; bool contains_file_name(std::string_view file_name) const; private: diff --git a/silkworm/db/datastore/snapshots/config_test.cpp b/silkworm/db/datastore/snapshots/config/config_test.cpp similarity index 98% rename from silkworm/db/datastore/snapshots/config_test.cpp rename to silkworm/db/datastore/snapshots/config/config_test.cpp index d79dc5138d..5c62318852 100644 --- a/silkworm/db/datastore/snapshots/config_test.cpp +++ b/silkworm/db/datastore/snapshots/config/config_test.cpp @@ -21,8 +21,6 @@ #include #include -#include "snapshot_path.hpp" - namespace silkworm::snapshots { TEST_CASE("Config::lookup_known_config", "[silkworm][snapshot][config]") { diff --git a/silkworm/db/datastore/snapshots/entry.hpp b/silkworm/db/datastore/snapshots/config/entry.hpp similarity index 100% rename from silkworm/db/datastore/snapshots/entry.hpp rename to silkworm/db/datastore/snapshots/config/entry.hpp diff --git a/silkworm/db/datastore/snapshots/common/encoding/elias_fano.hpp b/silkworm/db/datastore/snapshots/elias_fano/elias_fano.hpp similarity index 97% rename from silkworm/db/datastore/snapshots/common/encoding/elias_fano.hpp rename to silkworm/db/datastore/snapshots/elias_fano/elias_fano.hpp index 044613bde8..6dabc7932b 100644 --- a/silkworm/db/datastore/snapshots/common/encoding/elias_fano.hpp +++ b/silkworm/db/datastore/snapshots/elias_fano/elias_fano.hpp @@ -61,8 +61,8 @@ #include #include -#include "sequence.hpp" -#include "util.hpp" +#include "../common/encoding/sequence.hpp" +#include "../common/encoding/util.hpp" // Elias-Fano encoding is a high bits / low bits representation of a monotonically increasing sequence of N > 0 natural numbers x[i] // 0 <= x[0] <= x[1] <= ... <= x[N-2] <= x[N-1] <= U @@ -72,7 +72,7 @@ // P. Elias. Efficient storage and retrieval by content and address of static files. J. ACM, 21(2):246–260, 1974. // Partitioned Elias-Fano Indexes http://groups.di.unipi.it/~ottavian/files/elias_fano_sigir14.pdf -namespace silkworm::snapshots::encoding { +namespace silkworm::snapshots::elias_fano { //! Log2Q = Log2(Quantum) static constexpr uint64_t kLog2q = 8; @@ -106,6 +106,8 @@ static void set_bits(std::span bits, const uint64_t start, const uint } } +using silkworm::snapshots::encoding::Uint64Sequence; + //! 32-bit Elias-Fano (EF) list that can be used to encode one monotone non-decreasing sequence class EliasFanoList32 { public: @@ -190,7 +192,7 @@ class EliasFanoList32 { d -= static_cast(bit_count); } - const uint64_t sel = select64(window, d); + const uint64_t sel = encoding::select64(window, d); const auto value = ((current_word * 64 + sel - i) << l_ | (lower & lower_bits_mask_)); return value; } @@ -389,7 +391,7 @@ class DoubleEliasFanoList16 { window_cum_keys = upper_bits_cum_keys_[curr_word_cum_keys]; } lower >>= l_position_; - cum_keys_next = ((curr_word_cum_keys * 64 + static_cast(rho(window_cum_keys)) - i - 1) << l_cum_keys_ | (lower & lower_bits_mask_cum_keys_)) + cum_delta + cum_keys_min_delta_; + cum_keys_next = ((curr_word_cum_keys * 64 + static_cast(encoding::rho(window_cum_keys)) - i - 1) << l_cum_keys_ | (lower & lower_bits_mask_cum_keys_)) + cum_delta + cum_keys_min_delta_; } private: @@ -460,13 +462,13 @@ class DoubleEliasFanoList16 { delta_position -= static_cast(bit_count); } - select_cum_keys = select64(window_cum_keys, delta_cum_keys); + select_cum_keys = encoding::select64(window_cum_keys, delta_cum_keys); cum_delta = i * cum_keys_min_delta_; cum_keys = ((curr_word_cum_keys * 64 + select_cum_keys - i) << l_cum_keys_ | (lower & lower_bits_mask_cum_keys_)) + cum_delta; lower >>= l_cum_keys_; - const uint64_t select_position = select64(window_position, delta_position); + const uint64_t select_position = encoding::select64(window_position, delta_position); const uint64_t bit_delta = i * position_min_delta_; position = ((curr_word_position * 64 + select_position - i) << l_position_ | (lower & lower_bits_mask_position_)) + bit_delta; } @@ -554,8 +556,8 @@ class DoubleEliasFanoList16 { is.read(reinterpret_cast(uint64_buffer.data()), sizeof(uint64_t)); ef.position_min_delta_ = endian::load_big_u64(uint64_buffer.data()); - ef.l_position_ = ef.u_position_ / (ef.num_buckets_ + 1) == 0 ? 0 : static_cast(lambda(ef.u_position_ / (ef.num_buckets_ + 1))); - ef.l_cum_keys_ = ef.u_cum_keys_ / (ef.num_buckets_ + 1) == 0 ? 0 : static_cast(lambda(ef.u_cum_keys_ / (ef.num_buckets_ + 1))); + ef.l_position_ = ef.u_position_ / (ef.num_buckets_ + 1) == 0 ? 0 : static_cast(encoding::lambda(ef.u_position_ / (ef.num_buckets_ + 1))); + ef.l_cum_keys_ = ef.u_cum_keys_ / (ef.num_buckets_ + 1) == 0 ? 0 : static_cast(encoding::lambda(ef.u_cum_keys_ / (ef.num_buckets_ + 1))); SILKWORM_ASSERT(ef.l_cum_keys_ * 2 + ef.l_position_ <= 56); ef.lower_bits_mask_cum_keys_ = (1UL << ef.l_cum_keys_) - 1; @@ -576,4 +578,4 @@ class DoubleEliasFanoList16 { } }; -} // namespace silkworm::snapshots::encoding +} // namespace silkworm::snapshots::elias_fano diff --git a/silkworm/db/datastore/snapshots/common/encoding/elias_fano_test.cpp b/silkworm/db/datastore/snapshots/elias_fano/elias_fano_test.cpp similarity index 97% rename from silkworm/db/datastore/snapshots/common/encoding/elias_fano_test.cpp rename to silkworm/db/datastore/snapshots/elias_fano/elias_fano_test.cpp index 5bba2348fc..428a6e2a2a 100644 --- a/silkworm/db/datastore/snapshots/common/encoding/elias_fano_test.cpp +++ b/silkworm/db/datastore/snapshots/elias_fano/elias_fano_test.cpp @@ -26,7 +26,9 @@ #include #include -namespace silkworm::snapshots::encoding { +namespace silkworm::snapshots::elias_fano { + +using silkworm::snapshots::encoding::Uint64Sequence; struct EliasFanoList32Test { std::vector offsets; @@ -168,4 +170,4 @@ TEST_CASE("DoubleEliasFanoList16", "[silkworm][recsplit][elias_fano]") { "0000000000000000010000000000000000000000000000000000000000000000")); } -} // namespace silkworm::snapshots::encoding +} // namespace silkworm::snapshots::elias_fano diff --git a/silkworm/db/datastore/snapshots/index/existence_index.cpp b/silkworm/db/datastore/snapshots/index/existence_index.cpp deleted file mode 100644 index 748de1573f..0000000000 --- a/silkworm/db/datastore/snapshots/index/existence_index.cpp +++ /dev/null @@ -1,42 +0,0 @@ -/* - Copyright 2024 The Silkworm Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "existence_index.hpp" - -#include - -namespace silkworm::snapshots::index { - -ExistenceIndex::ExistenceIndex(std::filesystem::path index_file_path) - : index_file_path_(std::move(index_file_path)) { - if (std::filesystem::file_size(index_file_path_) == 0) { - throw std::runtime_error("index " + index_file_path_.filename().string() + " is empty"); - } - std::ifstream index_file_stream{index_file_path_, std::ios::in | std::ios::binary}; - index_file_stream.exceptions(std::ios::failbit | std::ios::badbit); - filter_ = std::make_unique(); - index_file_stream >> *filter_; -} - -void ExistenceIndex::add_hash(uint64_t hash) { - filter_->add_hash(hash); -} - -bool ExistenceIndex::contains_hash(uint64_t hash) { - return filter_->contains_hash(hash); -} - -} // namespace silkworm::snapshots::index diff --git a/silkworm/db/datastore/snapshots/index/existence_index.hpp b/silkworm/db/datastore/snapshots/index/existence_index.hpp deleted file mode 100644 index a2f54a8a5d..0000000000 --- a/silkworm/db/datastore/snapshots/index/existence_index.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - Copyright 2024 The Silkworm Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#pragma once - -#include -#include -#include -#include - -#include "bloom_filter.hpp" - -namespace silkworm::snapshots::index { - -class BloomFilter; - -//! Key existence index based on a Bloom filter implementation -class ExistenceIndex { - public: - explicit ExistenceIndex(std::filesystem::path index_file_path); - - std::filesystem::path path() const { return index_file_path_; } - - //! Insert an already hashed item into the index - //! \param hash the hash value to add - void add_hash(uint64_t hash); - - //! Checks if index contains the give \p hash value - //! \param hash the hash value to check for presence - //! \return false means "definitely does not contain value", true means "probably contains value" - bool contains_hash(uint64_t hash); - - private: - //! The index file path - std::filesystem::path index_file_path_; - - //! The Bloom filter - std::unique_ptr filter_; -}; - -} // namespace silkworm::snapshots::index diff --git a/silkworm/db/datastore/snapshots/index/existence_index_test.cpp b/silkworm/db/datastore/snapshots/index/existence_index_test.cpp deleted file mode 100644 index ee6fe3eb8e..0000000000 --- a/silkworm/db/datastore/snapshots/index/existence_index_test.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - Copyright 2024 The Silkworm Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "existence_index.hpp" - -#include -#include -#include - -#include - -#include -#include -#include - -#include "../test_util/sample_bloom_filter_data.hpp" - -namespace silkworm::snapshots::index { - -using silkworm::test_util::TemporaryFile; - -TEST_CASE("ExistenceIndex", "[snapshot][index][existence_index]") { - SECTION("empty") { - TemporaryDirectory tmp_dir; - CHECK_THROWS_AS(ExistenceIndex(tmp_dir.get_unique_temporary_path()), std::runtime_error); - } - - SECTION("item present") { - // Create PRNG to generate pseudo-random hash values - static std::mt19937_64 rnd_generator{std::random_device{}()}; - std::uniform_int_distribution u32_distribution; - - // Create sample existence index - REQUIRE(!test_util::kValidBloomFilters.empty()); - TemporaryFile sample_ei_file; - sample_ei_file.write(*from_hex(test_util::kValidBloomFilters[0])); - - ExistenceIndex existence_index{sample_ei_file.path()}; - CHECK(existence_index.path() == sample_ei_file.path()); - for (size_t i = 0; i < 100; ++i) { - const uint64_t h = u32_distribution(rnd_generator); - existence_index.add_hash(h); - CHECK(existence_index.contains_hash(h)); - } - } -} - -} // namespace silkworm::snapshots::index diff --git a/silkworm/db/datastore/snapshots/index_builder.cpp b/silkworm/db/datastore/snapshots/index_builder.cpp index d9453bff04..f1c61b220e 100644 --- a/silkworm/db/datastore/snapshots/index_builder.cpp +++ b/silkworm/db/datastore/snapshots/index_builder.cpp @@ -46,34 +46,34 @@ static IndexInputDataQuery::Iterator::value_type decompressor_index_query_entry( } IndexInputDataQuery::Iterator DecompressorIndexInputDataQuery::begin() { - auto decoder = std::make_shared(segment_path_.path(), segment_region_); - decoder->open(); + auto decompressor = std::make_shared(segment_path_.path(), segment_region_); + decompressor->open(); - auto impl_it = std::make_shared(IteratorImpl{decoder, decoder->begin()}); + auto impl_it = std::make_shared(IteratorImpl{decompressor, decompressor->begin()}); return IndexInputDataQuery::Iterator{this, impl_it, decompressor_index_query_entry(impl_it->it)}; } IndexInputDataQuery::Iterator DecompressorIndexInputDataQuery::end() { - auto decoder = std::make_shared(segment_path_.path(), segment_region_); + auto decompressor = std::make_shared(segment_path_.path(), segment_region_); - auto impl_it = std::make_shared(IteratorImpl{{}, decoder->end()}); + auto impl_it = std::make_shared(IteratorImpl{{}, decompressor->end()}); return IndexInputDataQuery::Iterator{this, impl_it, decompressor_index_query_entry(impl_it->it)}; } size_t DecompressorIndexInputDataQuery::keys_count() { - seg::Decompressor decoder{segment_path_.path(), segment_region_}; - decoder.open(); - return decoder.words_count(); + seg::Decompressor decompressor{segment_path_.path(), segment_region_}; + decompressor.open(); + return decompressor.words_count(); } std::pair, IndexInputDataQuery::Iterator::value_type> DecompressorIndexInputDataQuery::next_iterator(std::shared_ptr it_impl) { auto& it_impl_ref = *reinterpret_cast(it_impl.get()); // check if not already at the end - if (it_impl_ref.decoder) { + if (it_impl_ref.decompressor) { ++it_impl_ref.it; - if (it_impl_ref.it == it_impl_ref.decoder->end()) { - it_impl_ref.decoder.reset(); + if (it_impl_ref.it == it_impl_ref.decompressor->end()) { + it_impl_ref.decompressor.reset(); } } return {it_impl, decompressor_index_query_entry(it_impl_ref.it)}; @@ -84,8 +84,8 @@ bool DecompressorIndexInputDataQuery::equal_iterators( std::shared_ptr rhs_it_impl) const { auto lhs = reinterpret_cast(lhs_it_impl.get()); auto rhs = reinterpret_cast(rhs_it_impl.get()); - return (lhs->decoder == rhs->decoder) && - (!lhs->decoder || (lhs->it == rhs->it)); + return (lhs->decompressor == rhs->decompressor) && + (!lhs->decompressor || (lhs->it == rhs->it)); } void IndexBuilder::build() { diff --git a/silkworm/db/datastore/snapshots/index_builder.hpp b/silkworm/db/datastore/snapshots/index_builder.hpp index d04acb680a..002683bee6 100644 --- a/silkworm/db/datastore/snapshots/index_builder.hpp +++ b/silkworm/db/datastore/snapshots/index_builder.hpp @@ -26,8 +26,8 @@ #include #include +#include "common/snapshot_path.hpp" #include "seg/decompressor.hpp" -#include "snapshot_path.hpp" namespace silkworm::snapshots { @@ -104,7 +104,7 @@ class DecompressorIndexInputDataQuery : public IndexInputDataQuery { private: struct IteratorImpl { - std::shared_ptr decoder; + std::shared_ptr decompressor; seg::Decompressor::Iterator it; }; diff --git a/silkworm/db/datastore/snapshots/common/encoding/golomb_rice.hpp b/silkworm/db/datastore/snapshots/rec_split/golomb_rice.hpp similarity index 92% rename from silkworm/db/datastore/snapshots/common/encoding/golomb_rice.hpp rename to silkworm/db/datastore/snapshots/rec_split/golomb_rice.hpp index 213786b1cd..e21756f786 100644 --- a/silkworm/db/datastore/snapshots/common/encoding/golomb_rice.hpp +++ b/silkworm/db/datastore/snapshots/rec_split/golomb_rice.hpp @@ -52,14 +52,17 @@ #include #include -#include "sequence.hpp" -#include "util.hpp" +#include "../common/encoding/sequence.hpp" +#include "../common/encoding/util.hpp" -namespace silkworm::snapshots::encoding { +namespace silkworm::snapshots::rec_split { //! Storage for Golomb-Rice codes of a RecSplit bucket. class GolombRiceVector { public: + using Uint32Sequence = encoding::Uint32Sequence; + using Uint64Sequence = encoding::Uint64Sequence; + class Builder { public: static constexpr size_t kDefaultAllocatedWords{16}; @@ -186,7 +189,7 @@ class GolombRiceVector { } } - const auto pos = static_cast(rho(curr_window_unary_)); + const auto pos = static_cast(encoding::rho(curr_window_unary_)); curr_window_unary_ >>= pos; curr_window_unary_ >>= 1; @@ -209,12 +212,12 @@ class GolombRiceVector { void skip_subtree(const size_t nodes, const size_t fixed_len) { SILKWORM_ASSERT(nodes > 0); size_t missing = nodes, cnt = 0; - while ((cnt = static_cast(nu(curr_window_unary_))) < missing) { + while ((cnt = static_cast(encoding::nu(curr_window_unary_))) < missing) { curr_window_unary_ = *(curr_ptr_unary_++); missing -= cnt; valid_lower_bits_unary_ = 64; } - cnt = select64(curr_window_unary_, missing - 1); + cnt = encoding::select64(curr_window_unary_, missing - 1); curr_window_unary_ >>= cnt; curr_window_unary_ >>= 1; valid_lower_bits_unary_ -= cnt + 1; @@ -241,17 +244,19 @@ class GolombRiceVector { Reader reader() const { return Reader{data_}; } private: - Uint64Sequence data_; + encoding::Uint64Sequence data_; friend std::ostream& operator<<(std::ostream& os, const GolombRiceVector& rbv) { + using namespace encoding; os << rbv.data_; return os; } friend std::istream& operator>>(std::istream& is, GolombRiceVector& rbv) { + using namespace encoding; is >> rbv.data_; return is; } }; -} // namespace silkworm::snapshots::encoding +} // namespace silkworm::snapshots::rec_split diff --git a/silkworm/db/datastore/snapshots/common/encoding/golomb_rice_test.cpp b/silkworm/db/datastore/snapshots/rec_split/golomb_rice_test.cpp similarity index 93% rename from silkworm/db/datastore/snapshots/common/encoding/golomb_rice_test.cpp rename to silkworm/db/datastore/snapshots/rec_split/golomb_rice_test.cpp index 8e9efce9cf..0e07c0769e 100644 --- a/silkworm/db/datastore/snapshots/common/encoding/golomb_rice_test.cpp +++ b/silkworm/db/datastore/snapshots/rec_split/golomb_rice_test.cpp @@ -23,7 +23,10 @@ #include #include -namespace silkworm::snapshots::encoding { +namespace silkworm::snapshots::rec_split { + +using silkworm::snapshots::encoding::Uint32Sequence; +using silkworm::snapshots::encoding::Uint64Sequence; static const size_t kGolombRiceTestNumKeys{128}; static const size_t kGolombRiceTestNumTrees{1'000}; @@ -85,4 +88,4 @@ TEST_CASE("GolombRiceVector", "[silkworm][recsplit][golomb_rice]") { } } -} // namespace silkworm::snapshots::encoding +} // namespace silkworm::snapshots::rec_split diff --git a/silkworm/db/datastore/snapshots/rec_split/rec_split.hpp b/silkworm/db/datastore/snapshots/rec_split/rec_split.hpp index 1e4b477b01..bc4962031d 100644 --- a/silkworm/db/datastore/snapshots/rec_split/rec_split.hpp +++ b/silkworm/db/datastore/snapshots/rec_split/rec_split.hpp @@ -75,9 +75,9 @@ #include #include -#include "../common/bitmask_operators.hpp" -#include "../common/encoding/elias_fano.hpp" -#include "../common/encoding/golomb_rice.hpp" +#include "../common/util/bitmask_operators.hpp" +#include "../elias_fano/elias_fano.hpp" +#include "golomb_rice.hpp" #include "murmur_hash3.hpp" #pragma GCC diagnostic push @@ -211,9 +211,9 @@ template class RecSplit { public: using SplitStrategy = SplittingStrategy; - using GolombRiceBuilder = encoding::GolombRiceVector::Builder; - using EliasFano = encoding::EliasFanoList32; - using DoubleEliasFano = encoding::DoubleEliasFanoList16; + using GolombRiceBuilder = GolombRiceVector::Builder; + using EliasFano = elias_fano::EliasFanoList32; + using DoubleEliasFano = elias_fano::DoubleEliasFanoList16; //! The base class for RecSplit building strategies struct BuildingStrategy { @@ -222,8 +222,12 @@ class RecSplit { virtual void setup(const RecSplitSettings& settings, size_t bucket_count) = 0; virtual void add_key(uint64_t bucket_id, uint64_t bucket_key, uint64_t offset) = 0; - virtual bool build_mph_index(std::ofstream& index_output_stream, encoding::GolombRiceVector& golomb_rice_codes, - uint16_t& golomb_param_max_index, DoubleEliasFano& double_ef_index, uint8_t bytes_per_record) = 0; + virtual bool build_mph_index( + std::ofstream& index_output_stream, + GolombRiceVector& golomb_rice_codes, + uint16_t& golomb_param_max_index, + DoubleEliasFano& double_ef_index, + uint8_t bytes_per_record) = 0; virtual void build_enum_index(std::unique_ptr& ef_offsets) = 0; virtual void clear() = 0; @@ -958,7 +962,7 @@ class RecSplit { size_t bucket_count_; //! The Golomb-Rice (GR) codes of splitting and bijection indices - encoding::GolombRiceVector golomb_rice_codes_; + GolombRiceVector golomb_rice_codes_; //! Double Elias-Fano (EF) index for bucket cumulative keys and bit positions DoubleEliasFano double_ef_index_; diff --git a/silkworm/db/datastore/snapshots/rec_split/rec_split_par.hpp b/silkworm/db/datastore/snapshots/rec_split/rec_split_par.hpp index 034ad62206..bb265c4787 100644 --- a/silkworm/db/datastore/snapshots/rec_split/rec_split_par.hpp +++ b/silkworm/db/datastore/snapshots/rec_split/rec_split_par.hpp @@ -116,7 +116,7 @@ struct RecSplit::ParallelBuildingStrategy : public BuildingStrategy { std::vector values_; // mike: current_bucket_offsets_; -> values_ //! Helper to build GR codes of splitting and bijection indices, local to current bucket - encoding::GolombRiceVector::LazyBuilder gr_builder_; + GolombRiceVector::LazyBuilder gr_builder_; //! The local max index used in Golomb parameter array uint16_t golomb_param_max_index_{0}; @@ -167,8 +167,12 @@ struct RecSplit::ParallelBuildingStrategy : public BuildingStrategy { ++keys_added_; } - bool build_mph_index(std::ofstream& index_output_stream, encoding::GolombRiceVector& golomb_rice_codes, uint16_t& golomb_param_max_index, - DoubleEliasFano& double_ef_index, uint8_t bytes_per_record) override { + bool build_mph_index( + std::ofstream& index_output_stream, + GolombRiceVector& golomb_rice_codes, + uint16_t& golomb_param_max_index, + DoubleEliasFano& double_ef_index, + uint8_t bytes_per_record) override { // Find splitting trees for each bucket std::atomic_bool collision{false}; for (auto& bucket : buckets_) { diff --git a/silkworm/db/datastore/snapshots/rec_split/rec_split_seq.hpp b/silkworm/db/datastore/snapshots/rec_split/rec_split_seq.hpp index cd334161d8..4e60559126 100644 --- a/silkworm/db/datastore/snapshots/rec_split/rec_split_seq.hpp +++ b/silkworm/db/datastore/snapshots/rec_split/rec_split_seq.hpp @@ -106,8 +106,12 @@ struct RecSplit::SequentialBuildingStrategy : public BuildingStrategy ++keys_added_; } - bool build_mph_index(std::ofstream& index_output_stream, encoding::GolombRiceVector& golomb_rice_codes, uint16_t& golomb_param_max_index, - DoubleEliasFano& double_ef_index, uint8_t bytes_per_record) override { + bool build_mph_index( + std::ofstream& index_output_stream, + GolombRiceVector& golomb_rice_codes, + uint16_t& golomb_param_max_index, + DoubleEliasFano& double_ef_index, + uint8_t bytes_per_record) override { current_bucket_id_ = std::numeric_limits::max(); // To make sure 0 bucket is detected [[maybe_unused]] auto _ = gsl::finally([&]() { bucket_collector_->clear(); }); diff --git a/silkworm/db/datastore/snapshots/index.cpp b/silkworm/db/datastore/snapshots/rec_split_index/index.cpp similarity index 100% rename from silkworm/db/datastore/snapshots/index.cpp rename to silkworm/db/datastore/snapshots/rec_split_index/index.cpp diff --git a/silkworm/db/datastore/snapshots/index.hpp b/silkworm/db/datastore/snapshots/rec_split_index/index.hpp similarity index 96% rename from silkworm/db/datastore/snapshots/index.hpp rename to silkworm/db/datastore/snapshots/rec_split_index/index.hpp index 84d4420033..d02d90358a 100644 --- a/silkworm/db/datastore/snapshots/index.hpp +++ b/silkworm/db/datastore/snapshots/rec_split_index/index.hpp @@ -23,8 +23,8 @@ #include #include -#include "rec_split/rec_split.hpp" -#include "snapshot_path.hpp" +#include "../common/snapshot_path.hpp" +#include "../rec_split/rec_split.hpp" namespace silkworm::snapshots { diff --git a/silkworm/db/datastore/snapshots/seg/decompressor.hpp b/silkworm/db/datastore/snapshots/seg/decompressor.hpp index 9dc2ef8e13..82a1425f94 100644 --- a/silkworm/db/datastore/snapshots/seg/decompressor.hpp +++ b/silkworm/db/datastore/snapshots/seg/decompressor.hpp @@ -29,7 +29,7 @@ #include #include -#include "../common/bitmask_operators.hpp" +#include "../common/util/bitmask_operators.hpp" namespace silkworm::snapshots::seg { diff --git a/silkworm/db/datastore/snapshots/segment/segment_reader.cpp b/silkworm/db/datastore/snapshots/segment/segment_reader.cpp new file mode 100644 index 0000000000..155b74ee57 --- /dev/null +++ b/silkworm/db/datastore/snapshots/segment/segment_reader.cpp @@ -0,0 +1,115 @@ +/* + Copyright 2024 The Silkworm Authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "segment_reader.hpp" + +#include + +#include +#include + +namespace silkworm::snapshots { + +SegmentFileReader::SegmentFileReader( + SnapshotPath path, + std::optional segment_region) + : path_(std::move(path)), + decompressor_{path_.path(), segment_region} {} + +SegmentFileReader::~SegmentFileReader() { + close(); +} + +MemoryMappedRegion SegmentFileReader::memory_file_region() const { + const auto memory_file{decompressor_.memory_file()}; + if (!memory_file) return MemoryMappedRegion{}; + return memory_file->region(); +} + +void SegmentFileReader::reopen_segment() { + close(); + + // Open decompressor that opens the mapped file in turns + decompressor_.open(); +} + +SegmentFileReader::Iterator& SegmentFileReader::Iterator::operator++() { + bool has_next = it_.has_next(); + ++it_; + + if (has_next) { + decoder_->decode_word(*it_); + decoder_->check_sanity_with_metadata(path_); + } else { + decoder_.reset(); + } + return *this; +} + +SegmentFileReader::Iterator& SegmentFileReader::Iterator::operator+=(size_t count) { + while ((count > 1) && it_.has_next()) { + it_.skip(); + --count; + } + if (count > 0) { + ++*this; + } + return *this; +} + +bool operator==(const SegmentFileReader::Iterator& lhs, const SegmentFileReader::Iterator& rhs) { + return (lhs.decoder_ == rhs.decoder_) && + (!lhs.decoder_ || (lhs.it_ == rhs.it_)); +} + +SegmentFileReader::Iterator SegmentFileReader::begin(std::shared_ptr decoder) const { + auto it = decompressor_.begin(); + if (it == decompressor_.end()) { + return end(); + } + decoder->decode_word(*it); + decoder->check_sanity_with_metadata(path_); + return SegmentFileReader::Iterator{std::move(it), std::move(decoder), path()}; +} + +SegmentFileReader::Iterator SegmentFileReader::end() const { + return SegmentFileReader::Iterator{decompressor_.end(), {}, path()}; +} + +seg::Decompressor::Iterator SegmentFileReader::seek_decompressor(uint64_t offset, std::optional hash_prefix) const { + return decompressor_.seek(offset, hash_prefix ? ByteView{hash_prefix->bytes, 1} : ByteView{}); +} + +SegmentFileReader::Iterator SegmentFileReader::seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr decoder) const { + auto it = seek_decompressor(offset, hash_prefix); + if (it == decompressor_.end()) { + return end(); + } + try { + decoder->decode_word(*it); + } catch (...) { + return end(); + } + decoder->check_sanity_with_metadata(path_); + return SegmentFileReader::Iterator{std::move(it), std::move(decoder), path()}; +} + +void SegmentFileReader::close() { + // Close decompressor that closes the mapped file in turns + decompressor_.close(); +} + +} // namespace silkworm::snapshots diff --git a/silkworm/db/datastore/snapshots/snapshot_reader.hpp b/silkworm/db/datastore/snapshots/segment/segment_reader.hpp similarity index 59% rename from silkworm/db/datastore/snapshots/snapshot_reader.hpp rename to silkworm/db/datastore/snapshots/segment/segment_reader.hpp index 6c37667b74..e8e31154d9 100644 --- a/silkworm/db/datastore/snapshots/snapshot_reader.hpp +++ b/silkworm/db/datastore/snapshots/segment/segment_reader.hpp @@ -31,23 +31,28 @@ #include #include -#include "common/iterator/iterator_read_into_vector.hpp" -#include "seg/decompressor.hpp" -#include "snapshot_path.hpp" -#include "snapshot_word_serializer.hpp" +#include "../common/codec.hpp" +#include "../common/snapshot_path.hpp" +#include "../common/util/iterator/iterator_read_into_vector.hpp" +#include "../seg/decompressor.hpp" namespace silkworm::snapshots { -//! \brief Generic snapshot containing data points for a specific block interval [block_from, block_to). -//! \warning The snapshot segment can also be externally managed. This means that the memory-mapping can happen -//! outside of this class and a \code Snapshot instance can be created by specifying the \code MemoryMappedRegion -//! segment containing the information about the memory region already mapped. This must be taken into account -//! because we must avoid to memory-map it again. -class Snapshot { +/** + * SegmentFileReader is a type-safe wrapper on top of a seg::Decompressor. + * + * The type-safe mechanism is based on Decoder interface. + * SegmentFileReader can be bound with any Decoder. + * SegmentFileReader is a template-free counterpart of SegmentReader. + * Use a SegmentReader for simple type-safe access to the data. + * SegmentFileReader can work with an externally owned MemoryMappedRegion if provided, + * otherwise the internal seg::Decompressor owns the memory mapped file. + */ +class SegmentFileReader { public: class Iterator { public: - using value_type = std::shared_ptr; + using value_type = std::shared_ptr; using iterator_category [[maybe_unused]] = std::input_iterator_tag; using difference_type = std::ptrdiff_t; using pointer = value_type*; @@ -55,11 +60,11 @@ class Snapshot { Iterator( seg::Decompressor::Iterator it, - std::shared_ptr deserializer, + std::shared_ptr decoder, SnapshotPath path) - : it_(std::move(it)), deserializer_(std::move(deserializer)), path_(std::move(path)) {} + : it_(std::move(it)), decoder_(std::move(decoder)), path_(std::move(path)) {} - value_type operator*() const { return deserializer_; } + value_type operator*() const { return decoder_; } Iterator operator++(int) { return std::exchange(*this, ++Iterator{*this}); } Iterator& operator++(); @@ -71,7 +76,7 @@ class Snapshot { private: seg::Decompressor::Iterator it_; - std::shared_ptr deserializer_; + std::shared_ptr decoder_; SnapshotPath path_; }; @@ -79,51 +84,51 @@ class Snapshot { static inline const auto kPageSize{os::page_size()}; - explicit Snapshot( + explicit SegmentFileReader( SnapshotPath path, std::optional segment_region = std::nullopt); - ~Snapshot(); + ~SegmentFileReader(); - Snapshot(Snapshot&&) = default; - Snapshot& operator=(Snapshot&&) = default; + SegmentFileReader(SegmentFileReader&&) = default; + SegmentFileReader& operator=(SegmentFileReader&&) = default; const SnapshotPath& path() const { return path_; } std::filesystem::path fs_path() const { return path_.path(); } bool empty() const { return item_count() == 0; } - size_t item_count() const { return decoder_.words_count(); } + size_t item_count() const { return decompressor_.words_count(); } MemoryMappedRegion memory_file_region() const; void reopen_segment(); void close(); - Iterator begin(std::shared_ptr deserializer) const; + Iterator begin(std::shared_ptr decoder) const; Iterator end() const; - Iterator seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr deserializer) const; + Iterator seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr decoder) const; private: - seg::Decompressor::Iterator seek_decoder(uint64_t offset, std::optional hash_prefix) const; + seg::Decompressor::Iterator seek_decompressor(uint64_t offset, std::optional hash_prefix) const; //! The path of the segment file for this snapshot SnapshotPath path_; - seg::Decompressor decoder_; + seg::Decompressor decompressor_; }; -template -class SnapshotReader { +template +class SegmentReader { public: class Iterator { public: - using value_type = decltype(TWordDeserializer::value); + using value_type = decltype(TDecoder::value); using iterator_category [[maybe_unused]] = std::input_iterator_tag; using difference_type = std::ptrdiff_t; using pointer = value_type*; using reference = value_type&; - explicit Iterator(Snapshot::Iterator it) + explicit Iterator(SegmentFileReader::Iterator it) : it_(std::move(it)) {} reference operator*() const { return value(); } @@ -145,31 +150,31 @@ class SnapshotReader { private: value_type& value() const { - SnapshotWordDeserializer& base_deserializer = **it_; - // dynamic_cast is safe because TWordDeserializer was used when creating the Iterator - auto& s = dynamic_cast(base_deserializer); - return s.value; + Decoder& base_decoder = **it_; + // dynamic_cast is safe because TDecoder was used when creating the Iterator + auto& decoder = dynamic_cast(base_decoder); + return decoder.value; } - Snapshot::Iterator it_; + SegmentFileReader::Iterator it_; }; static_assert(std::input_iterator); - using WordDeserializer = TWordDeserializer; + using DecoderType = TDecoder; - explicit SnapshotReader(const Snapshot& snapshot) : snapshot_(snapshot) {} + explicit SegmentReader(const SegmentFileReader& reader) : reader_(reader) {} Iterator begin() const { - return Iterator{snapshot_.begin(std::make_shared())}; + return Iterator{reader_.begin(std::make_shared())}; } Iterator end() const { - return Iterator{snapshot_.end()}; + return Iterator{reader_.end()}; } Iterator seek(uint64_t offset, std::optional hash_prefix = std::nullopt) const { - return Iterator{snapshot_.seek(offset, hash_prefix, std::make_shared())}; + return Iterator{reader_.seek(offset, hash_prefix, std::make_shared())}; } std::optional seek_one(uint64_t offset, std::optional hash_prefix = std::nullopt) const { @@ -180,19 +185,20 @@ class SnapshotReader { std::vector read_into_vector(uint64_t offset, size_t count) const { auto it = seek(offset); if (it == end()) { - throw std::runtime_error("SnapshotReader::read_into_vector: bad offset " + std::to_string(offset)); + throw std::runtime_error("SegmentReader::read_into_vector: bad offset " + std::to_string(offset)); } return iterator_read_into_vector(std::move(it), count); } - const SnapshotPath& path() const { return snapshot_.path(); } + const SnapshotPath& path() const { return reader_.path(); } private: - const Snapshot& snapshot_; + const SegmentFileReader& reader_; }; -template -concept SnapshotReaderConcept = std::same_as> || - std::derived_from>; +template +concept SegmentReaderConcept = + std::same_as> || + std::derived_from>; } // namespace silkworm::snapshots diff --git a/silkworm/db/datastore/snapshots/snapshot_writer.cpp b/silkworm/db/datastore/snapshots/segment/segment_writer.cpp similarity index 67% rename from silkworm/db/datastore/snapshots/snapshot_writer.cpp rename to silkworm/db/datastore/snapshots/segment/segment_writer.cpp index fd52b6da26..70f8cf3ddb 100644 --- a/silkworm/db/datastore/snapshots/snapshot_writer.cpp +++ b/silkworm/db/datastore/snapshots/segment/segment_writer.cpp @@ -14,27 +14,27 @@ limitations under the License. */ -#include "snapshot_writer.hpp" +#include "segment_writer.hpp" namespace silkworm::snapshots { -SnapshotFileWriter::SnapshotFileWriter( +SegmentFileWriter::SegmentFileWriter( SnapshotPath path, const std::filesystem::path& tmp_dir_path) : path_(std::move(path)), compressor_(path_.path(), tmp_dir_path) { } -SnapshotFileWriter::Iterator& SnapshotFileWriter::Iterator::operator=(const SnapshotFileWriter::Iterator::value_type& value) { +SegmentFileWriter::Iterator& SegmentFileWriter::Iterator::operator=(const SegmentFileWriter::Iterator::value_type& value) { *it_ = value->encode_word(); return *this; } -SnapshotFileWriter::Iterator SnapshotFileWriter::out(std::shared_ptr serializer) { - return SnapshotFileWriter::Iterator{compressor_.add_word_iterator(), std::move(serializer)}; +SegmentFileWriter::Iterator SegmentFileWriter::out(std::shared_ptr encoder) { + return SegmentFileWriter::Iterator{compressor_.add_word_iterator(), std::move(encoder)}; } -void SnapshotFileWriter::flush(SnapshotFileWriter writer) { +void SegmentFileWriter::flush(SegmentFileWriter writer) { seg::Compressor::compress(std::move(writer.compressor_)); } diff --git a/silkworm/db/datastore/snapshots/snapshot_writer.hpp b/silkworm/db/datastore/snapshots/segment/segment_writer.hpp similarity index 56% rename from silkworm/db/datastore/snapshots/snapshot_writer.hpp rename to silkworm/db/datastore/snapshots/segment/segment_writer.hpp index 218d0a66c0..17b7e7841e 100644 --- a/silkworm/db/datastore/snapshots/snapshot_writer.hpp +++ b/silkworm/db/datastore/snapshots/segment/segment_writer.hpp @@ -22,17 +22,17 @@ #include #include -#include "seg/compressor.hpp" -#include "snapshot_path.hpp" -#include "snapshot_word_serializer.hpp" +#include "../common/codec.hpp" +#include "../common/snapshot_path.hpp" +#include "../seg/compressor.hpp" namespace silkworm::snapshots { -class SnapshotFileWriter { +class SegmentFileWriter { public: class Iterator { public: - using value_type = std::shared_ptr; + using value_type = std::shared_ptr; using iterator_category [[maybe_unused]] = std::output_iterator_tag; using difference_type = std::ptrdiff_t; using pointer = void; @@ -40,8 +40,8 @@ class SnapshotFileWriter { Iterator( seg::Compressor::Iterator it, - std::shared_ptr serializer) - : it_(it), serializer_(std::move(serializer)) {} + std::shared_ptr encoder) + : it_(it), encoder_(std::move(encoder)) {} Iterator& operator*() { return *this; } @@ -53,45 +53,45 @@ class SnapshotFileWriter { Iterator& operator=(const value_type& value); - std::shared_ptr serializer() const { return serializer_; } + std::shared_ptr encoder() const { return encoder_; } private: seg::Compressor::Iterator it_; - std::shared_ptr serializer_; + std::shared_ptr encoder_; }; static_assert(std::output_iterator); - explicit SnapshotFileWriter( + explicit SegmentFileWriter( SnapshotPath path, const std::filesystem::path& tmp_dir_path); - SnapshotFileWriter(SnapshotFileWriter&&) = default; - SnapshotFileWriter& operator=(SnapshotFileWriter&&) = default; + SegmentFileWriter(SegmentFileWriter&&) = default; + SegmentFileWriter& operator=(SegmentFileWriter&&) = default; SnapshotPath path() const { return path_; } - Iterator out(std::shared_ptr serializer); + Iterator out(std::shared_ptr encoder); - static void flush(SnapshotFileWriter writer); + static void flush(SegmentFileWriter writer); private: SnapshotPath path_; seg::Compressor compressor_; }; -template -class SnapshotWriter { +template +class SegmentWriter { public: class Iterator { public: - using value_type = decltype(TWordSerializer::value); + using value_type = decltype(TEncoder::value); using iterator_category [[maybe_unused]] = std::output_iterator_tag; using difference_type = std::ptrdiff_t; using pointer = void; using reference = void; - explicit Iterator(SnapshotFileWriter::Iterator it) + explicit Iterator(SegmentFileWriter::Iterator it) : it_(std::move(it)) {} Iterator& operator*() { return *this; } @@ -108,33 +108,34 @@ class SnapshotWriter { } private: - SnapshotFileWriter::Iterator::value_type set_value(value_type value) { - SnapshotWordSerializer& base_serializer = *it_.serializer(); - // dynamic_cast is safe because TWordSerializer was used when creating the Iterator - auto& s = dynamic_cast(base_serializer); - s.value = std::move(value); - return it_.serializer(); + SegmentFileWriter::Iterator::value_type set_value(value_type value) { + Encoder& base_encoder = *it_.encoder(); + // dynamic_cast is safe because TEncoder was used when creating the Iterator + auto& encoder = dynamic_cast(base_encoder); + encoder.value = std::move(value); + return it_.encoder(); } - SnapshotFileWriter::Iterator it_; + SegmentFileWriter::Iterator it_; }; static_assert(std::output_iterator); - using WordDeserializer = TWordSerializer; + using EncoderType = TEncoder; - explicit SnapshotWriter(SnapshotFileWriter& snapshot) : snapshot_(snapshot) {} + explicit SegmentWriter(SegmentFileWriter& writer) : writer_(writer) {} Iterator out() { - return Iterator{snapshot_.out(std::make_shared())}; + return Iterator{writer_.out(std::make_shared())}; } private: - SnapshotFileWriter& snapshot_; + SegmentFileWriter& writer_; }; -template -concept SnapshotWriterConcept = std::same_as> || - std::derived_from>; +template +concept SegmentWriterConcept = + std::same_as> || + std::derived_from>; } // namespace silkworm::snapshots diff --git a/silkworm/db/datastore/snapshots/snapshot_and_index.hpp b/silkworm/db/datastore/snapshots/segment_and_index.hpp similarity index 83% rename from silkworm/db/datastore/snapshots/snapshot_and_index.hpp rename to silkworm/db/datastore/snapshots/segment_and_index.hpp index 7a9f047b48..2d64cb7191 100644 --- a/silkworm/db/datastore/snapshots/snapshot_and_index.hpp +++ b/silkworm/db/datastore/snapshots/segment_and_index.hpp @@ -16,13 +16,13 @@ #pragma once -#include "index.hpp" -#include "snapshot_reader.hpp" +#include "rec_split_index/index.hpp" +#include "segment/segment_reader.hpp" namespace silkworm::snapshots { -struct SnapshotAndIndex { - const Snapshot& snapshot; +struct SegmentAndIndex { + const SegmentFileReader& segment; const Index& index; }; diff --git a/silkworm/db/datastore/snapshots/snapshot_bundle.cpp b/silkworm/db/datastore/snapshots/snapshot_bundle.cpp index 3a4ec2f385..f647da87bb 100644 --- a/silkworm/db/datastore/snapshots/snapshot_bundle.cpp +++ b/silkworm/db/datastore/snapshots/snapshot_bundle.cpp @@ -25,10 +25,10 @@ SnapshotBundle::~SnapshotBundle() { } void SnapshotBundle::reopen() { - for (auto& snapshot_ref : snapshots()) { - snapshot_ref.get().reopen_segment(); - ensure(!snapshot_ref.get().empty(), [&]() { - return "invalid empty snapshot " + snapshot_ref.get().fs_path().string(); + for (auto& segment_ref : segments()) { + segment_ref.get().reopen_segment(); + ensure(!segment_ref.get().empty(), [&]() { + return "invalid empty snapshot " + segment_ref.get().fs_path().string(); }); } for (auto& index_ref : indexes()) { @@ -40,8 +40,8 @@ void SnapshotBundle::close() { for (auto& index_ref : indexes()) { index_ref.get().close_index(); } - for (auto& snapshot_ref : snapshots()) { - snapshot_ref.get().close(); + for (auto& segment_ref : segments()) { + segment_ref.get().close(); } if (on_close_callback_) { on_close_callback_(*this); @@ -52,8 +52,8 @@ std::vector SnapshotBundle::files() { std::vector files; files.reserve(kSnapshotsCount + kIndexesCount); - for (auto& snapshot_ref : snapshots()) { - files.push_back(snapshot_ref.get().path().path()); + for (auto& segment_ref : segments()) { + files.push_back(segment_ref.get().path().path()); } for (auto& index_ref : indexes()) { files.push_back(index_ref.get().path().path()); @@ -65,8 +65,8 @@ std::vector SnapshotBundle::snapshot_paths() { std::vector paths; paths.reserve(kSnapshotsCount); - for (auto& snapshot_ref : snapshots()) { - paths.push_back(snapshot_ref.get().path()); + for (auto& segment_ref : segments()) { + paths.push_back(segment_ref.get().path()); } return paths; } diff --git a/silkworm/db/datastore/snapshots/snapshot_bundle.hpp b/silkworm/db/datastore/snapshots/snapshot_bundle.hpp index c0acc3dc82..6fca9ae6c4 100644 --- a/silkworm/db/datastore/snapshots/snapshot_bundle.hpp +++ b/silkworm/db/datastore/snapshots/snapshot_bundle.hpp @@ -24,23 +24,23 @@ #include #include -#include "index.hpp" -#include "snapshot_and_index.hpp" -#include "snapshot_path.hpp" -#include "snapshot_reader.hpp" +#include "common/snapshot_path.hpp" +#include "rec_split_index/index.hpp" +#include "segment/segment_reader.hpp" +#include "segment_and_index.hpp" namespace silkworm::snapshots { struct SnapshotBundleData { - Snapshot header_snapshot; + SegmentFileReader header_segment; //! Index header_hash -> block_num -> headers_segment_offset Index idx_header_hash; - Snapshot body_snapshot; + SegmentFileReader body_segment; //! Index block_num -> bodies_segment_offset Index idx_body_number; - Snapshot txn_snapshot; + SegmentFileReader txn_segment; //! Index transaction_hash -> txn_id -> transactions_segment_offset Index idx_txn_hash; //! Index transaction_hash -> block_num @@ -57,11 +57,11 @@ struct SnapshotBundle : public SnapshotBundleData { SnapshotBundle(SnapshotBundle&&) = default; SnapshotBundle& operator=(SnapshotBundle&&) noexcept = default; - std::array, kSnapshotsCount> snapshots() { + std::array, kSnapshotsCount> segments() { return { - header_snapshot, - body_snapshot, - txn_snapshot, + header_segment, + body_segment, + txn_segment, }; } @@ -91,18 +91,18 @@ struct SnapshotBundle : public SnapshotBundleData { }; } - const Snapshot& snapshot(SnapshotType type) const { + const SegmentFileReader& segment(SnapshotType type) const { switch (type) { case headers: - return header_snapshot; + return header_segment; case bodies: - return body_snapshot; + return body_segment; case transactions: case transactions_to_block: - return txn_snapshot; + return txn_segment; } SILKWORM_ASSERT(false); - return header_snapshot; + return header_segment; } const Index& index(SnapshotType type) const { @@ -120,12 +120,12 @@ struct SnapshotBundle : public SnapshotBundleData { return idx_header_hash; } - SnapshotAndIndex snapshot_and_index(SnapshotType type) const { - return {snapshot(type), index(type)}; + SegmentAndIndex segment_and_index(SnapshotType type) const { + return {segment(type), index(type)}; } // assume that all snapshots have the same block range, and use one of them - BlockNumRange block_range() const { return header_snapshot.path().step_range().to_block_num_range(); } + BlockNumRange block_range() const { return header_segment.path().step_range().to_block_num_range(); } size_t block_count() const { return block_range().size(); } std::vector files(); diff --git a/silkworm/db/datastore/snapshots/snapshot_bundle_factory.hpp b/silkworm/db/datastore/snapshots/snapshot_bundle_factory.hpp index 37d09a1d05..b86b7ec9f2 100644 --- a/silkworm/db/datastore/snapshots/snapshot_bundle_factory.hpp +++ b/silkworm/db/datastore/snapshots/snapshot_bundle_factory.hpp @@ -23,10 +23,9 @@ #include +#include "common/snapshot_path.hpp" #include "index_builder.hpp" #include "snapshot_bundle.hpp" -#include "snapshot_path.hpp" -#include "snapshot_type.hpp" namespace silkworm::snapshots { @@ -37,8 +36,8 @@ struct SnapshotBundleFactory { virtual SnapshotBundle make(PathByTypeProvider snapshot_path, PathByTypeProvider index_path) const = 0; virtual SnapshotBundle make(const std::filesystem::path& dir_path, BlockNumRange range) const = 0; - virtual std::vector> index_builders(const SnapshotPath& seg_file) const = 0; - virtual std::vector> index_builders(const SnapshotPathList& snapshot_paths) const = 0; + virtual std::vector> index_builders(const SnapshotPath& segment_path) const = 0; + virtual std::vector> index_builders(const SnapshotPathList& segment_paths) const = 0; }; } // namespace silkworm::snapshots diff --git a/silkworm/db/datastore/snapshots/snapshot_reader.cpp b/silkworm/db/datastore/snapshots/snapshot_reader.cpp deleted file mode 100644 index 7b24e4888f..0000000000 --- a/silkworm/db/datastore/snapshots/snapshot_reader.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/* - Copyright 2024 The Silkworm Authors - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "snapshot_reader.hpp" - -#include - -#include -#include - -namespace silkworm::snapshots { - -Snapshot::Snapshot( - SnapshotPath path, - std::optional segment_region) - : path_(std::move(path)), - decoder_{path_.path(), segment_region} {} - -Snapshot::~Snapshot() { - close(); -} - -MemoryMappedRegion Snapshot::memory_file_region() const { - const auto memory_file{decoder_.memory_file()}; - if (!memory_file) return MemoryMappedRegion{}; - return memory_file->region(); -} - -void Snapshot::reopen_segment() { - close(); - - // Open decompressor that opens the mapped file in turns - decoder_.open(); -} - -Snapshot::Iterator& Snapshot::Iterator::operator++() { - bool has_next = it_.has_next(); - ++it_; - - if (has_next) { - deserializer_->decode_word(*it_); - deserializer_->check_sanity_with_metadata(path_); - } else { - deserializer_.reset(); - } - return *this; -} - -Snapshot::Iterator& Snapshot::Iterator::operator+=(size_t count) { - while ((count > 1) && it_.has_next()) { - it_.skip(); - --count; - } - if (count > 0) { - ++*this; - } - return *this; -} - -bool operator==(const Snapshot::Iterator& lhs, const Snapshot::Iterator& rhs) { - return (lhs.deserializer_ == rhs.deserializer_) && - (!lhs.deserializer_ || (lhs.it_ == rhs.it_)); -} - -Snapshot::Iterator Snapshot::begin(std::shared_ptr deserializer) const { - auto it = decoder_.begin(); - if (it == decoder_.end()) { - return end(); - } - deserializer->decode_word(*it); - deserializer->check_sanity_with_metadata(path_); - return Snapshot::Iterator{std::move(it), std::move(deserializer), path()}; -} - -Snapshot::Iterator Snapshot::end() const { - return Snapshot::Iterator{decoder_.end(), {}, path()}; -} - -seg::Decompressor::Iterator Snapshot::seek_decoder(uint64_t offset, std::optional hash_prefix) const { - return decoder_.seek(offset, hash_prefix ? ByteView{hash_prefix->bytes, 1} : ByteView{}); -} - -Snapshot::Iterator Snapshot::seek(uint64_t offset, std::optional hash_prefix, std::shared_ptr deserializer) const { - auto it = seek_decoder(offset, hash_prefix); - if (it == decoder_.end()) { - return end(); - } - try { - deserializer->decode_word(*it); - } catch (...) { - return end(); - } - deserializer->check_sanity_with_metadata(path_); - return Snapshot::Iterator{std::move(it), std::move(deserializer), path()}; -} - -void Snapshot::close() { - // Close decompressor that closes the mapped file in turns - decoder_.close(); -} - -} // namespace silkworm::snapshots diff --git a/silkworm/db/datastore/snapshots/snapshot_repository.cpp b/silkworm/db/datastore/snapshots/snapshot_repository.cpp index c766875f01..ebc610a669 100644 --- a/silkworm/db/datastore/snapshots/snapshot_repository.cpp +++ b/silkworm/db/datastore/snapshots/snapshot_repository.cpp @@ -82,10 +82,10 @@ BlockNum SnapshotRepository::max_block_available() const { return (block_num_range.size() > 0) ? block_num_range.end - 1 : block_num_range.start; } -std::pair, std::shared_ptr> SnapshotRepository::find_segment(SnapshotType type, BlockNum number) const { +std::pair, std::shared_ptr> SnapshotRepository::find_segment(SnapshotType type, BlockNum number) const { auto bundle = find_bundle(number); if (bundle) { - return {bundle->snapshot_and_index(type), bundle}; + return {bundle->segment_and_index(type), bundle}; } return {std::nullopt, {}}; } @@ -157,7 +157,7 @@ void SnapshotRepository::reopen_folder() { lock.unlock(); SILK_INFO << "Total reopened bundles: " << bundles_count() - << " snapshots: " << total_snapshots_count() + << " segments: " << total_segments_count() << " indexes: " << total_indexes_count() << " max block available: " << max_block_available(); } diff --git a/silkworm/db/datastore/snapshots/snapshot_repository.hpp b/silkworm/db/datastore/snapshots/snapshot_repository.hpp index eb1d77107a..b3b2e5c4b4 100644 --- a/silkworm/db/datastore/snapshots/snapshot_repository.hpp +++ b/silkworm/db/datastore/snapshots/snapshot_repository.hpp @@ -29,12 +29,12 @@ #include -#include "common/iterator/map_values_view.hpp" +#include "common/snapshot_path.hpp" +#include "common/util/iterator/map_values_view.hpp" #include "index_builder.hpp" -#include "snapshot_and_index.hpp" +#include "segment_and_index.hpp" #include "snapshot_bundle.hpp" #include "snapshot_bundle_factory.hpp" -#include "snapshot_path.hpp" #include "snapshot_settings.hpp" namespace silkworm::snapshots { @@ -67,7 +67,7 @@ class SnapshotRepository { void replace_snapshot_bundles(SnapshotBundle bundle); size_t bundles_count() const; - size_t total_snapshots_count() const { return bundles_count() * SnapshotBundle::kSnapshotsCount; } + size_t total_segments_count() const { return bundles_count() * SnapshotBundle::kSnapshotsCount; } size_t total_indexes_count() const { return bundles_count() * SnapshotBundle::kIndexesCount; } //! All types of .seg and .idx files are available up to this block number @@ -106,7 +106,7 @@ class SnapshotRepository { return BundlesView{std::ranges::reverse_view(make_map_values_view(*bundles_)), bundles_}; } - std::pair, std::shared_ptr> find_segment(SnapshotType type, BlockNum number) const; + std::pair, std::shared_ptr> find_segment(SnapshotType type, BlockNum number) const; std::shared_ptr find_bundle(BlockNum number) const; std::vector> bundles_in_range(BlockNumRange range) const; diff --git a/silkworm/db/freezer.cpp b/silkworm/db/freezer.cpp index 7eef41f34b..db098ac446 100644 --- a/silkworm/db/freezer.cpp +++ b/silkworm/db/freezer.cpp @@ -26,14 +26,14 @@ #include #include "access_layer.hpp" -#include "blocks/bodies/body_snapshot_freezer.hpp" -#include "blocks/headers/header_snapshot_freezer.hpp" -#include "datastore/snapshot_freezer.hpp" +#include "blocks/bodies/body_segment_collation.hpp" +#include "blocks/headers/header_segment_collation.hpp" +#include "datastore/segment_collation.hpp" +#include "datastore/snapshots/common/snapshot_path.hpp" +#include "datastore/snapshots/segment/segment_writer.hpp" #include "datastore/snapshots/snapshot_bundle.hpp" -#include "datastore/snapshots/snapshot_path.hpp" -#include "datastore/snapshots/snapshot_writer.hpp" #include "prune_mode.hpp" -#include "transactions/txn_snapshot_freezer.hpp" +#include "transactions/txn_segment_collation.hpp" namespace silkworm::db { @@ -81,23 +81,23 @@ std::unique_ptr Freezer::next_command() { }(); if (end + kFullImmutabilityThreshold <= tip) { - return std::make_unique(FreezerCommand{{start, end}, base_txn_id}); + return std::make_unique(SegmentCollationCommand{{start, end}, base_txn_id}); } return {}; } -static const SnapshotFreezer& get_snapshot_freezer(SnapshotType type) { - static HeaderSnapshotFreezer header_snapshot_freezer; - static BodySnapshotFreezer body_snapshot_freezer; - static TransactionSnapshotFreezer txn_snapshot_freezer; +static const SegmentCollation& get_collation(SnapshotType type) { + static HeaderSegmentCollation header_collation; + static BodySegmentCollation body_collation; + static TransactionSegmentCollation txn_collation; switch (type) { case SnapshotType::headers: - return header_snapshot_freezer; + return header_collation; case SnapshotType::bodies: - return body_snapshot_freezer; + return body_collation; case SnapshotType::transactions: - return txn_snapshot_freezer; + return txn_collation; default: SILKWORM_ASSERT(false); throw std::runtime_error("invalid type"); @@ -105,19 +105,19 @@ static const SnapshotFreezer& get_snapshot_freezer(SnapshotType type) { } std::shared_ptr Freezer::migrate(std::unique_ptr command) { - auto& freezer_command = dynamic_cast(*command); + auto& freezer_command = dynamic_cast(*command); auto range = freezer_command.range; auto bundle = snapshots_.bundle_factory().make(tmp_dir_path_, range); - for (auto& snapshot_ref : bundle.snapshots()) { - auto path = snapshot_ref.get().path(); - SnapshotFileWriter file_writer{path, tmp_dir_path_}; + for (auto& segment_ref : bundle.segments()) { + auto path = segment_ref.get().path(); + SegmentFileWriter file_writer{path, tmp_dir_path_}; { auto db_tx = db_access_.start_ro_tx(); - auto& freezer = get_snapshot_freezer(path.type()); + auto& freezer = get_collation(path.type()); freezer.copy(db_tx, freezer_command, file_writer); } - SnapshotFileWriter::flush(std::move(file_writer)); + SegmentFileWriter::flush(std::move(file_writer)); } return std::make_shared(std::move(bundle)); @@ -162,14 +162,14 @@ Task Freezer::cleanup() { } co_await stage_scheduler_.schedule([this, range](RWTxn& db_tx) { - this->cleanup(db_tx, range); + this->prune_collations(db_tx, range); }); } -void Freezer::cleanup(RWTxn& db_tx, BlockNumRange range) const { - get_snapshot_freezer(SnapshotType::transactions).cleanup(db_tx, range); - get_snapshot_freezer(SnapshotType::bodies).cleanup(db_tx, range); - get_snapshot_freezer(SnapshotType::headers).cleanup(db_tx, range); +void Freezer::prune_collations(RWTxn& db_tx, BlockNumRange range) const { + get_collation(SnapshotType::transactions).prune(db_tx, range); + get_collation(SnapshotType::bodies).prune(db_tx, range); + get_collation(SnapshotType::headers).prune(db_tx, range); } } // namespace silkworm::db diff --git a/silkworm/db/freezer.hpp b/silkworm/db/freezer.hpp index ca158603fe..3276f6700c 100644 --- a/silkworm/db/freezer.hpp +++ b/silkworm/db/freezer.hpp @@ -47,7 +47,7 @@ class Freezer : public DataMigration { void commit(std::shared_ptr result) override; Task cleanup() override; BlockNumRange cleanup_range(); - void cleanup(RWTxn& db_tx, BlockNumRange range) const; + void prune_collations(RWTxn& db_tx, BlockNumRange range) const; db::ROAccess db_access_; snapshots::SnapshotRepository& snapshots_; diff --git a/silkworm/db/snapshot_benchmark.cpp b/silkworm/db/snapshot_benchmark.cpp index 747a609b14..410e12edf5 100644 --- a/silkworm/db/snapshot_benchmark.cpp +++ b/silkworm/db/snapshot_benchmark.cpp @@ -80,13 +80,13 @@ static void build_header_index(benchmark::State& state) { // These sample snapshot files just contain data for block range [1'500'012, 1'500'013], hence current snapshot // file name format is not sufficient to support them (see checks commented out below) - test::SampleHeaderSnapshotFile header_snapshot{tmp_dir.path()}; - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; - test::SampleTransactionSnapshotFile txn_snapshot{tmp_dir.path()}; + test::SampleHeaderSnapshotFile header_segment{tmp_dir.path()}; + test::SampleBodySnapshotFile body_segment{tmp_dir.path()}; + test::SampleTransactionSnapshotFile txn_segment{tmp_dir.path()}; for ([[maybe_unused]] auto _ : state) { - auto header_index = HeaderIndex::make(header_snapshot.path()); - header_index.set_base_data_id(header_snapshot.block_num_range().start); + auto header_index = HeaderIndex::make(header_segment.path()); + header_index.set_base_data_id(header_segment.block_num_range().start); header_index.build(); } } @@ -99,11 +99,11 @@ static void build_body_index(benchmark::State& state) { // These sample snapshot files just contain data for block range [1'500'012, 1'500'013], hence current snapshot // file name format is not sufficient to support them (see checks commented out below) - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; + test::SampleBodySnapshotFile body_segment{tmp_dir.path()}; for ([[maybe_unused]] auto _ : state) { - auto body_index = BodyIndex::make(body_snapshot.path()); - body_index.set_base_data_id(body_snapshot.block_num_range().start); + auto body_index = BodyIndex::make(body_segment.path()); + body_index.set_base_data_id(body_segment.block_num_range().start); body_index.build(); } } @@ -116,19 +116,19 @@ static void build_tx_index(benchmark::State& state) { // These sample snapshot files just contain data for block range [1'500'012, 1'500'013], hence current snapshot // file name format is not sufficient to support them (see checks commented out below) - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; - test::SampleTransactionSnapshotFile txn_snapshot{tmp_dir.path()}; + test::SampleBodySnapshotFile body_segment{tmp_dir.path()}; + test::SampleTransactionSnapshotFile txn_segment{tmp_dir.path()}; for ([[maybe_unused]] auto _ : state) { - auto& body_snapshot_path = body_snapshot.path(); - auto body_index = snapshots::BodyIndex::make(body_snapshot_path); - body_index.set_base_data_id(body_snapshot.block_num_range().start); + auto& body_segment_path = body_segment.path(); + auto body_index = snapshots::BodyIndex::make(body_segment_path); + body_index.set_base_data_id(body_segment.block_num_range().start); body_index.build(); - auto& txn_snapshot_path = txn_snapshot.path(); - auto tx_index = TransactionIndex::make(body_snapshot_path, txn_snapshot_path); + auto& txn_segment_path = txn_segment.path(); + auto tx_index = TransactionIndex::make(body_segment_path, txn_segment_path); tx_index.build(); - auto tx_index_hash_to_block = TransactionToBlockIndex::make(body_snapshot_path, txn_snapshot_path, txn_snapshot.block_num_range().start); + auto tx_index_hash_to_block = TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment.block_num_range().start); tx_index_hash_to_block.build(); } } @@ -142,23 +142,23 @@ static void reopen_folder(benchmark::State& state) { // These sample snapshot files just contain data for block range [1'500'012, 1'500'013], hence current snapshot // file name format is not sufficient to support them (see checks commented out below) - test::SampleHeaderSnapshotFile header_snapshot{tmp_dir.path()}; - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; - test::SampleTransactionSnapshotFile txn_snapshot{tmp_dir.path()}; + test::SampleHeaderSnapshotFile header_segment{tmp_dir.path()}; + test::SampleBodySnapshotFile body_segment{tmp_dir.path()}; + test::SampleTransactionSnapshotFile txn_segment{tmp_dir.path()}; - auto header_index = HeaderIndex::make(header_snapshot.path()); - header_index.set_base_data_id(header_snapshot.block_num_range().start); + auto header_index = HeaderIndex::make(header_segment.path()); + header_index.set_base_data_id(header_segment.block_num_range().start); header_index.build(); - auto& body_snapshot_path = body_snapshot.path(); - auto body_index = BodyIndex::make(body_snapshot_path); - body_index.set_base_data_id(body_snapshot.block_num_range().start); + auto& body_segment_path = body_segment.path(); + auto body_index = BodyIndex::make(body_segment_path); + body_index.set_base_data_id(body_segment.block_num_range().start); body_index.build(); - auto& txn_snapshot_path = txn_snapshot.path(); - auto tx_index = TransactionIndex::make(body_snapshot_path, txn_snapshot_path); + auto& txn_segment_path = txn_segment.path(); + auto tx_index = TransactionIndex::make(body_segment_path, txn_segment_path); tx_index.build(); - auto tx_index_hash_to_block = TransactionToBlockIndex::make(body_snapshot_path, txn_snapshot_path, txn_snapshot.block_num_range().start); + auto tx_index_hash_to_block = TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment.block_num_range().start); tx_index_hash_to_block.build(); for ([[maybe_unused]] auto _ : state) { diff --git a/silkworm/db/snapshot_bundle_factory_impl.cpp b/silkworm/db/snapshot_bundle_factory_impl.cpp index deb92aa159..0471c4d9a5 100644 --- a/silkworm/db/snapshot_bundle_factory_impl.cpp +++ b/silkworm/db/snapshot_bundle_factory_impl.cpp @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include @@ -29,13 +29,13 @@ using namespace snapshots; SnapshotBundle SnapshotBundleFactoryImpl::make(PathByTypeProvider snapshot_path, PathByTypeProvider index_path) const { return SnapshotBundle{{ - .header_snapshot = Snapshot(snapshot_path(SnapshotType::headers)), + .header_segment = SegmentFileReader(snapshot_path(SnapshotType::headers)), .idx_header_hash = Index(index_path(SnapshotType::headers)), - .body_snapshot = Snapshot(snapshot_path(SnapshotType::bodies)), + .body_segment = SegmentFileReader(snapshot_path(SnapshotType::bodies)), .idx_body_number = Index(index_path(SnapshotType::bodies)), - .txn_snapshot = Snapshot(snapshot_path(SnapshotType::transactions)), + .txn_segment = SegmentFileReader(snapshot_path(SnapshotType::transactions)), .idx_txn_hash = Index(index_path(SnapshotType::transactions)), .idx_txn_hash_2_block = Index(index_path(SnapshotType::transactions_to_block)), }}; @@ -52,18 +52,18 @@ SnapshotBundle SnapshotBundleFactoryImpl::make(const std::filesystem::path& dir_ return make(std::move(snapshot_path), std::move(index_path)); } -std::vector> SnapshotBundleFactoryImpl::index_builders(const SnapshotPath& seg_file) const { - switch (seg_file.type()) { +std::vector> SnapshotBundleFactoryImpl::index_builders(const SnapshotPath& segment_path) const { + switch (segment_path.type()) { case SnapshotType::headers: - return {std::make_shared(HeaderIndex::make(seg_file))}; + return {std::make_shared(HeaderIndex::make(segment_path))}; case SnapshotType::bodies: - return {std::make_shared(BodyIndex::make(seg_file))}; + return {std::make_shared(BodyIndex::make(segment_path))}; case SnapshotType::transactions: { - auto bodies_segment_path = seg_file.related_path(SnapshotType::bodies, kSegmentExtension); + auto bodies_segment_path = segment_path.related_path(SnapshotType::bodies, kSegmentExtension); if (!bodies_segment_path.exists()) return {}; return { - std::make_shared(TransactionIndex::make(bodies_segment_path, seg_file)), - std::make_shared(TransactionToBlockIndex::make(bodies_segment_path, seg_file)), + std::make_shared(TransactionIndex::make(bodies_segment_path, segment_path)), + std::make_shared(TransactionToBlockIndex::make(bodies_segment_path, segment_path)), }; } default: @@ -72,9 +72,9 @@ std::vector> SnapshotBundleFactoryImpl::index_buil } } -std::vector> SnapshotBundleFactoryImpl::index_builders(const SnapshotPathList& snapshot_paths) const { +std::vector> SnapshotBundleFactoryImpl::index_builders(const SnapshotPathList& segment_paths) const { std::vector> all_builders; - for (const auto& path : snapshot_paths) { + for (const auto& path : segment_paths) { auto builders = index_builders(path); all_builders.insert(all_builders.end(), builders.begin(), builders.end()); } diff --git a/silkworm/db/snapshot_bundle_factory_impl.hpp b/silkworm/db/snapshot_bundle_factory_impl.hpp index c537387b4b..dee710f773 100644 --- a/silkworm/db/snapshot_bundle_factory_impl.hpp +++ b/silkworm/db/snapshot_bundle_factory_impl.hpp @@ -25,8 +25,8 @@ struct SnapshotBundleFactoryImpl : public snapshots::SnapshotBundleFactory { snapshots::SnapshotBundle make(PathByTypeProvider snapshot_path, PathByTypeProvider index_path) const override; snapshots::SnapshotBundle make(const std::filesystem::path& dir_path, BlockNumRange range) const override; - std::vector> index_builders(const snapshots::SnapshotPath& seg_file) const override; - std::vector> index_builders(const snapshots::SnapshotPathList& snapshot_paths) const override; + std::vector> index_builders(const snapshots::SnapshotPath& segment_path) const override; + std::vector> index_builders(const snapshots::SnapshotPathList& segment_paths) const override; }; } // namespace silkworm::db diff --git a/silkworm/db/snapshot_index_builder_test.cpp b/silkworm/db/snapshot_index_builder_test.cpp index e72c9a270f..11c41faa12 100644 --- a/silkworm/db/snapshot_index_builder_test.cpp +++ b/silkworm/db/snapshot_index_builder_test.cpp @@ -45,9 +45,9 @@ TEST_CASE("Index::Index", "[silkworm][snapshot][index]") { TEST_CASE("BodyIndex::build OK", "[silkworm][snapshot][index]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; - test::SampleBodySnapshotFile body_snapshot_file{tmp_dir.path()}; - auto body_index = BodyIndex::make(body_snapshot_file.path()); - body_index.set_base_data_id(body_snapshot_file.block_num_range().start); + test::SampleBodySnapshotFile body_segment_file{tmp_dir.path()}; + auto body_index = BodyIndex::make(body_segment_file.path()); + body_index.set_base_data_id(body_segment_file.block_num_range().start); CHECK_NOTHROW(body_index.build()); } @@ -58,14 +58,14 @@ TEST_CASE("TransactionIndex::build KO: empty snapshot", "[silkworm][snapshot][in constexpr const char* kTransactionsSnapshotFileName{"v1-014500-015000-transactions.seg"}; SECTION("KO: empty body snapshot", "[.]") { - test::TemporarySnapshotFile bodies_snapshot_file{tmp_dir.path(), kBodiesSnapshotFileName}; - test::TemporarySnapshotFile txs_snapshot_file{tmp_dir.path(), kTransactionsSnapshotFileName}; + test::TemporarySnapshotFile body_segment_file{tmp_dir.path(), kBodiesSnapshotFileName}; + test::TemporarySnapshotFile txn_segment_file{tmp_dir.path(), kTransactionsSnapshotFileName}; - auto& txs_snapshot_path = txs_snapshot_file.path(); - auto& bodies_snapshot_path = bodies_snapshot_file.path(); + auto& txn_segment_path = txn_segment_file.path(); + auto& body_segment_path = body_segment_file.path(); - CHECK_THROWS_WITH(TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), ContainsSubstring("empty body snapshot")); - CHECK_THROWS_WITH(TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), ContainsSubstring("empty body snapshot")); + CHECK_THROWS_WITH(TransactionIndex::make(body_segment_path, txn_segment_path).build(), ContainsSubstring("empty body snapshot")); + CHECK_THROWS_WITH(TransactionToBlockIndex::make(body_segment_path, txn_segment_path).build(), ContainsSubstring("empty body snapshot")); } } @@ -75,7 +75,7 @@ TEST_CASE("TransactionIndex::build KO: invalid snapshot", "[silkworm][snapshot][ constexpr const char* kTransactionsSnapshotFileName{"v1-015000-015500-transactions.seg"}; SECTION("KO: invalid zero word length") { - test::TemporarySnapshotFile bodies_snapshot_file{ + test::TemporarySnapshotFile body_segment_file{ tmp_dir.path(), "v1-015000-015500-bodies.seg", test::SnapshotHeader{ @@ -85,83 +85,83 @@ TEST_CASE("TransactionIndex::build KO: invalid snapshot", "[silkworm][snapshot][ .positions = {}}, test::SnapshotBody{ *from_hex("0000000000000000")}}; - test::TemporarySnapshotFile txs_snapshot_file{tmp_dir.path(), kTransactionsSnapshotFileName}; + test::TemporarySnapshotFile txn_segment_file{tmp_dir.path(), kTransactionsSnapshotFileName}; - auto& txs_snapshot_path = txs_snapshot_file.path(); - auto& bodies_snapshot_path = bodies_snapshot_file.path(); + auto& txn_segment_path = txn_segment_file.path(); + auto& body_segment_path = body_segment_file.path(); - CHECK_THROWS_WITH(TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), StartsWith("invalid zero word length")); - CHECK_THROWS_WITH(TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), StartsWith("invalid zero word length")); + CHECK_THROWS_WITH(TransactionIndex::make(body_segment_path, txn_segment_path).build(), StartsWith("invalid zero word length")); + CHECK_THROWS_WITH(TransactionToBlockIndex::make(body_segment_path, txn_segment_path).build(), StartsWith("invalid zero word length")); } SECTION("KO: invalid position depth") { - test::SampleBodySnapshotFile bodies_snapshot_file{ + test::SampleBodySnapshotFile body_segment_file{ tmp_dir.path(), "000000000000000e000000000000000000000000000000000000000000000004" "c100010801c6837004d980c001c6837004d980c001c6837004d980c001c68370" // {c1, 00} <- c1 instead of 01 "04d980c001c6837004d980c001c6837004d980c001c6837004d980c001c68370" "04d980c001c6837004d980c001c6837004d980c001c6837004d980c001c68370" "04d980c001c6837004d980c001c6837004d901c0"}; - auto& bodies_snapshot_path = bodies_snapshot_file.path(); - test::SampleTransactionSnapshotFile txs_snapshot_file{tmp_dir.path()}; - auto& txs_snapshot_path = txs_snapshot_file.path(); + auto& body_segment_path = body_segment_file.path(); + test::SampleTransactionSnapshotFile txn_segment_file{tmp_dir.path()}; + auto& txn_segment_path = txn_segment_file.path(); - CHECK_THROWS_WITH(TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), ContainsSubstring("invalid: position depth")); - CHECK_THROWS_WITH(TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path, txs_snapshot_file.block_num_range().start).build(), ContainsSubstring("invalid: position depth")); + CHECK_THROWS_WITH(TransactionIndex::make(body_segment_path, txn_segment_path).build(), ContainsSubstring("invalid: position depth")); + CHECK_THROWS_WITH(TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment_file.block_num_range().start).build(), ContainsSubstring("invalid: position depth")); } SECTION("KO: invalid position value") { - test::SampleBodySnapshotFile bodies_snapshot_file{ + test::SampleBodySnapshotFile body_segment_file{ tmp_dir.path(), "000000000000000e000000000000000000000000000000000000000000000004" "01ff010801c6837004d980c001c6837004d980c001c6837004d980c001c68370" // {01, ff} <- ff instead of 00 "04d980c001c6837004d980c001c6837004d980c001c6837004d980c001c68370" "04d980c001c6837004d980c001c6837004d980c001c6837004d980c001c68370" "04d980c001c6837004d980c001c6837004d901c0"}; - auto& bodies_snapshot_path = bodies_snapshot_file.path(); - test::SampleTransactionSnapshotFile txs_snapshot_file{tmp_dir.path()}; - auto& txs_snapshot_path = txs_snapshot_file.path(); + auto& body_segment_path = body_segment_file.path(); + test::SampleTransactionSnapshotFile txn_segment_file{tmp_dir.path()}; + auto& txn_segment_path = txn_segment_file.path(); - CHECK_THROWS_WITH(TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), ContainsSubstring("invalid: position read")); - CHECK_THROWS_WITH(TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path, txs_snapshot_file.block_num_range().start).build(), ContainsSubstring("invalid: position read")); + CHECK_THROWS_WITH(TransactionIndex::make(body_segment_path, txn_segment_path).build(), ContainsSubstring("invalid: position read")); + CHECK_THROWS_WITH(TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment_file.block_num_range().start).build(), ContainsSubstring("invalid: position read")); } SECTION("KO: invalid positions count") { - test::SampleBodySnapshotFile bodies_snapshot_file{ + test::SampleBodySnapshotFile body_segment_file{ tmp_dir.path(), "000000000000000e000000000000000000000000000000000000000000000005" // POSITIONS=5 <- 5 instead of 4 "0100010801c6837004d980c001c6837004d980c001c6837004d980c001c68370" "04d980c001c6837004d980c001c6837004d980c001c6837004d980c001c68370" "04d980c001c6837004d980c001c6837004d980c001c6837004d980c001c68370" "04d980c001c6837004d980c001c6837004d901c0"}; - auto& bodies_snapshot_path = bodies_snapshot_file.path(); - test::SampleTransactionSnapshotFile txs_snapshot_file{tmp_dir.path()}; - auto& txs_snapshot_path = txs_snapshot_file.path(); + auto& body_segment_path = body_segment_file.path(); + test::SampleTransactionSnapshotFile txn_segment_file{tmp_dir.path()}; + auto& txn_segment_path = txn_segment_file.path(); - CHECK_THROWS_WITH(TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), ContainsSubstring("invalid: position read")); - CHECK_THROWS_WITH(TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path, txs_snapshot_file.block_num_range().start).build(), ContainsSubstring("invalid: position read")); + CHECK_THROWS_WITH(TransactionIndex::make(body_segment_path, txn_segment_path).build(), ContainsSubstring("invalid: position read")); + CHECK_THROWS_WITH(TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment_file.block_num_range().start).build(), ContainsSubstring("invalid: position read")); } SECTION("KO: invalid RLP") { - test::SampleBodySnapshotFile bodies_snapshot_file{ + test::SampleBodySnapshotFile body_segment_file{ tmp_dir.path(), "000000000000000e000000000000000000000000000000000000000000000004" "0100010801c6837004d980c001c6837004d980c001c6837004d980c001c68370" "04d980c001c6837004d980c001c6837004d980c001c6837004d980c001c68370" "04d980c001c6837004d980c001c6837004d980c001c6837004d980c001c68370" "04d980c001c6837004d980c001c7837004d901c0"}; // {01, c7837004d980c0} <- c7 instead of c6 - auto& bodies_snapshot_path = bodies_snapshot_file.path(); - test::SampleTransactionSnapshotFile txs_snapshot_file{tmp_dir.path()}; - auto& txs_snapshot_path = txs_snapshot_file.path(); + auto& body_segment_path = body_segment_file.path(); + test::SampleTransactionSnapshotFile txn_segment_file{tmp_dir.path()}; + auto& txn_segment_path = txn_segment_file.path(); - CHECK_THROWS_AS(TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path).build(), DecodingException); - CHECK_THROWS_AS(TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path, txs_snapshot_file.block_num_range().start).build(), DecodingException); + CHECK_THROWS_AS(TransactionIndex::make(body_segment_path, txn_segment_path).build(), DecodingException); + CHECK_THROWS_AS(TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment_file.block_num_range().start).build(), DecodingException); } SECTION("KO: unexpected tx amount") { - test::SampleBodySnapshotFile bodies_snapshot_file{tmp_dir.path()}; - auto& bodies_snapshot_path = bodies_snapshot_file.path(); - test::SampleTransactionSnapshotFile txs_snapshot_file{ + test::SampleBodySnapshotFile body_segment_file{tmp_dir.path()}; + auto& body_segment_path = body_segment_file.path(); + test::SampleTransactionSnapshotFile txn_segment_file{ tmp_dir.path(), "000000000000000C" // WC = 12 "0000000000000004" // EWC = 4 @@ -176,11 +176,11 @@ TEST_CASE("TransactionIndex::build KO: invalid snapshot", "[silkworm][snapshot][ "59DE97C1" // Txn position 0 block 1'500'012 END // 11 txs missing here... }; - auto& txs_snapshot_path = txs_snapshot_file.path(); + auto& txn_segment_path = txn_segment_file.path(); - auto tx_index = TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path); + auto tx_index = TransactionIndex::make(body_segment_path, txn_segment_path); CHECK_THROWS_WITH(tx_index.build(), StartsWith("keys expected")); - auto tx_index_hash_to_block = TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path, txs_snapshot_file.block_num_range().start); + auto tx_index_hash_to_block = TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment_file.block_num_range().start); CHECK_THROWS_WITH(tx_index_hash_to_block.build(), ContainsSubstring("tx count mismatch")); } } @@ -188,14 +188,14 @@ TEST_CASE("TransactionIndex::build KO: invalid snapshot", "[silkworm][snapshot][ TEST_CASE("TransactionIndex::build OK", "[silkworm][snapshot][index]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; - test::SampleBodySnapshotFile bodies_snapshot_file{tmp_dir.path()}; - auto& bodies_snapshot_path = bodies_snapshot_file.path(); - test::SampleTransactionSnapshotFile txs_snapshot_file{tmp_dir.path()}; - auto& txs_snapshot_path = txs_snapshot_file.path(); + test::SampleBodySnapshotFile body_segment_file{tmp_dir.path()}; + auto& body_segment_path = body_segment_file.path(); + test::SampleTransactionSnapshotFile txn_segment_file{tmp_dir.path()}; + auto& txn_segment_path = txn_segment_file.path(); - auto tx_index = TransactionIndex::make(bodies_snapshot_path, txs_snapshot_path); + auto tx_index = TransactionIndex::make(body_segment_path, txn_segment_path); tx_index.build(); - auto tx_index_hash_to_block = TransactionToBlockIndex::make(bodies_snapshot_path, txs_snapshot_path, txs_snapshot_file.block_num_range().start); + auto tx_index_hash_to_block = TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment_file.block_num_range().start); tx_index_hash_to_block.build(); } diff --git a/silkworm/db/snapshot_recompress.cpp b/silkworm/db/snapshot_recompress.cpp index b1ea09f6de..ff24d64974 100644 --- a/silkworm/db/snapshot_recompress.cpp +++ b/silkworm/db/snapshot_recompress.cpp @@ -19,17 +19,17 @@ #include -#include "blocks/bodies/body_snapshot.hpp" -#include "blocks/headers/header_snapshot.hpp" -#include "datastore/snapshots/snapshot_path.hpp" -#include "transactions/txn_snapshot.hpp" +#include "blocks/bodies/body_segment.hpp" +#include "blocks/headers/header_segment.hpp" +#include "datastore/snapshots/common/snapshot_path.hpp" +#include "transactions/txn_segment.hpp" namespace silkworm::snapshots { -template -void copy_reader_to_writer(const Snapshot& file_reader, SnapshotFileWriter& file_writer) { - TSnapshotReader reader{file_reader}; - TSnapshotWriter writer{file_writer}; +template +void copy_reader_to_writer(const SegmentFileReader& file_reader, SegmentFileWriter& file_writer) { + TSegmentReader reader{file_reader}; + TSegmentWriter writer{file_writer}; std::copy(reader.begin(), reader.end(), writer.out()); } @@ -37,29 +37,29 @@ void snapshot_file_recompress(const std::filesystem::path& path) { auto path_opt = SnapshotPath::parse(path); if (!path_opt) throw std::runtime_error{"bad snapshot path"}; - Snapshot file_reader{*path_opt}; + SegmentFileReader file_reader{*path_opt}; file_reader.reopen_segment(); auto out_path = path; out_path.replace_extension("seg2"); TemporaryDirectory tmp_dir; - SnapshotFileWriter file_writer{*SnapshotPath::parse(out_path), tmp_dir.path()}; + SegmentFileWriter file_writer{*SnapshotPath::parse(out_path), tmp_dir.path()}; switch (path_opt->type()) { case SnapshotType::headers: - copy_reader_to_writer(file_reader, file_writer); + copy_reader_to_writer(file_reader, file_writer); break; case SnapshotType::bodies: - copy_reader_to_writer(file_reader, file_writer); + copy_reader_to_writer(file_reader, file_writer); break; case SnapshotType::transactions: - copy_reader_to_writer(file_reader, file_writer); + copy_reader_to_writer(file_reader, file_writer); break; default: throw std::runtime_error{"invalid snapshot type"}; } - SnapshotFileWriter::flush(std::move(file_writer)); + SegmentFileWriter::flush(std::move(file_writer)); } } // namespace silkworm::snapshots diff --git a/silkworm/db/snapshot_repository_test.cpp b/silkworm/db/snapshot_repository_test.cpp index c7f5f06dcd..f994a7ab7b 100644 --- a/silkworm/db/snapshot_repository_test.cpp +++ b/silkworm/db/snapshot_repository_test.cpp @@ -147,9 +147,9 @@ TEST_CASE("SnapshotRepository::find_segment", "[silkworm][node][snapshot]") { // These sample snapshot files just contain data for block range [1'500'012, 1'500'013], hence current snapshot // file name format is not sufficient to support them (see checks commented out below) - test::SampleHeaderSnapshotFile header_snapshot{tmp_dir.path()}; - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; - test::SampleTransactionSnapshotFile txn_snapshot{tmp_dir.path()}; + test::SampleHeaderSnapshotFile header_segment{tmp_dir.path()}; + test::SampleBodySnapshotFile body_segment{tmp_dir.path()}; + test::SampleTransactionSnapshotFile txn_segment{tmp_dir.path()}; SECTION("header w/o index") { CHECK_FALSE_FIRST(repository.find_segment(SnapshotType::headers, 1'500'011)); @@ -170,16 +170,16 @@ TEST_CASE("SnapshotRepository::find_segment", "[silkworm][node][snapshot]") { CHECK_FALSE_FIRST(repository.find_segment(SnapshotType::transactions, 1'500'014)); } - auto header_index = HeaderIndex::make(header_snapshot.path()); - header_index.set_base_data_id(header_snapshot.block_num_range().start); + auto header_index = HeaderIndex::make(header_segment.path()); + header_index.set_base_data_id(header_segment.block_num_range().start); REQUIRE_NOTHROW(header_index.build()); - auto& body_snapshot_path = body_snapshot.path(); - auto body_index = BodyIndex::make(body_snapshot_path); - body_index.set_base_data_id(body_snapshot.block_num_range().start); + auto& body_segment_path = body_segment.path(); + auto body_index = BodyIndex::make(body_segment_path); + body_index.set_base_data_id(body_segment.block_num_range().start); REQUIRE_NOTHROW(body_index.build()); - auto& txn_snapshot_path = txn_snapshot.path(); - REQUIRE_NOTHROW(TransactionIndex::make(body_snapshot_path, txn_snapshot_path).build()); - REQUIRE_NOTHROW(TransactionToBlockIndex::make(body_snapshot_path, txn_snapshot_path, txn_snapshot.block_num_range().start).build()); + auto& txn_segment_path = txn_segment.path(); + REQUIRE_NOTHROW(TransactionIndex::make(body_segment_path, txn_segment_path).build()); + REQUIRE_NOTHROW(TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment.block_num_range().start).build()); REQUIRE_NOTHROW(repository.reopen_folder()); @@ -214,20 +214,20 @@ TEST_CASE("SnapshotRepository::find_block_number", "[silkworm][node][snapshot]") // These sample snapshot files just contain data for block range [1'500'012, 1'500'013], hence current snapshot // file name format is not sufficient to support them (see checks commented out below) - test::SampleHeaderSnapshotFile header_snapshot{tmp_dir.path()}; - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; - test::SampleTransactionSnapshotFile txn_snapshot{tmp_dir.path()}; + test::SampleHeaderSnapshotFile header_segment{tmp_dir.path()}; + test::SampleBodySnapshotFile body_segment{tmp_dir.path()}; + test::SampleTransactionSnapshotFile txn_segment{tmp_dir.path()}; - auto header_index = HeaderIndex::make(header_snapshot.path()); - header_index.set_base_data_id(header_snapshot.block_num_range().start); + auto header_index = HeaderIndex::make(header_segment.path()); + header_index.set_base_data_id(header_segment.block_num_range().start); REQUIRE_NOTHROW(header_index.build()); - auto& body_snapshot_path = body_snapshot.path(); - auto body_index = BodyIndex::make(body_snapshot_path); - body_index.set_base_data_id(body_snapshot.block_num_range().start); + auto& body_segment_path = body_segment.path(); + auto body_index = BodyIndex::make(body_segment_path); + body_index.set_base_data_id(body_segment.block_num_range().start); REQUIRE_NOTHROW(body_index.build()); - auto& txn_snapshot_path = txn_snapshot.path(); - REQUIRE_NOTHROW(TransactionIndex::make(body_snapshot_path, txn_snapshot_path).build()); - REQUIRE_NOTHROW(TransactionToBlockIndex::make(body_snapshot_path, txn_snapshot_path, txn_snapshot.block_num_range().start).build()); + auto& txn_segment_path = txn_segment.path(); + REQUIRE_NOTHROW(TransactionIndex::make(body_segment_path, txn_segment_path).build()); + REQUIRE_NOTHROW(TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment.block_num_range().start).build()); REQUIRE_NOTHROW(repository.reopen_folder()); @@ -263,12 +263,12 @@ TEST_CASE("SnapshotRepository::remove_stale_indexes", "[silkworm][node][snapshot SnapshotRepository repository{settings, bundle_factory()}; // create a snapshot file - test::SampleHeaderSnapshotFile header_snapshot_file{tmp_dir.path()}; - auto& header_snapshot_path = header_snapshot_file.path(); + test::SampleHeaderSnapshotFile header_segment_file{tmp_dir.path()}; + auto& header_segment_path = header_segment_file.path(); // build an index - auto index_builder = HeaderIndex::make(header_snapshot_path); - index_builder.set_base_data_id(header_snapshot_file.block_num_range().start); + auto index_builder = HeaderIndex::make(header_segment_path); + index_builder.set_base_data_id(header_segment_file.block_num_range().start); REQUIRE_NOTHROW(index_builder.build()); auto index_path = index_builder.path().path(); @@ -277,7 +277,7 @@ TEST_CASE("SnapshotRepository::remove_stale_indexes", "[silkworm][node][snapshot CHECK(std::filesystem::exists(index_path)); // move the snapshot last write time 1 hour to the future to make its index "stale" - const auto last_write_time_diff = move_last_write_time(header_snapshot_path.path(), 1h); + const auto last_write_time_diff = move_last_write_time(header_segment_path.path(), 1h); CHECK((last_write_time_diff.count() > 0)); // the index is stale diff --git a/silkworm/db/snapshot_sync.cpp b/silkworm/db/snapshot_sync.cpp index c5cc7a0d61..d548a3aaa9 100644 --- a/silkworm/db/snapshot_sync.cpp +++ b/silkworm/db/snapshot_sync.cpp @@ -26,12 +26,6 @@ #include #include -#include -#include -#include -#include -#include -#include #include #include #include @@ -39,6 +33,13 @@ #include #include +#include "blocks/headers/header_segment.hpp" +#include "datastore/mdbx/etl_mdbx_collector.hpp" +#include "datastore/snapshots/bittorrent/torrent_file.hpp" +#include "datastore/snapshots/common/snapshot_path.hpp" +#include "snapshot_bundle_factory_impl.hpp" +#include "stages.hpp" + namespace silkworm::db { using namespace silkworm::snapshots; @@ -171,8 +172,8 @@ Task SnapshotSync::download_snapshots() { const size_t num_snapshots = snapshot_config.preverified_snapshots().size(); SILK_INFO << "SnapshotSync: download started: [0/" << num_snapshots << "]"; - auto log_added = [](const std::filesystem::path& snapshot_file) { - SILK_TRACE << "SnapshotSync: download started for: " << snapshot_file.filename().string(); + auto log_added = [](const std::filesystem::path& path) { + SILK_TRACE << "SnapshotSync: download started for: " << path.filename().string(); }; boost::signals2::scoped_connection added_subscription{client_.added_subscription.connect(log_added)}; @@ -201,8 +202,8 @@ Task SnapshotSync::download_snapshots() { auto executor = co_await boost::asio::this_coro::executor; // make the buffer bigger so that try_send always succeeds in case of duplicate files (see snapshot_set below) concurrency::Channel completed_channel{executor, num_snapshots * 2}; - auto log_completed = [&](const std::filesystem::path& snapshot_file) { - completed_channel.try_send(snapshot_file); + auto log_completed = [&](const std::filesystem::path& path) { + completed_channel.try_send(path); }; boost::signals2::scoped_connection completed_subscription{client_.completed_subscription.connect(log_completed)}; @@ -275,7 +276,7 @@ void SnapshotSync::seed_frozen_local_snapshots() { for (auto& bundle_ptr : repository_.view_bundles()) { auto& bundle = *bundle_ptr; bool is_frozen = bundle.block_range().size() >= kMaxMergerSnapshotSize; - const auto first_snapshot = bundle.snapshots()[0]; + const auto first_snapshot = bundle.segments()[0]; // assume that if one snapshot in the bundle is preverified, then all of them are bool is_preverified = snapshots_config_.contains_file_name(first_snapshot.get().path().filename()); if (is_frozen && !is_preverified) { @@ -333,7 +334,7 @@ void SnapshotSync::update_block_headers(RWTxn& txn, BlockNum max_block_available for (const auto& bundle_ptr : repository_.view_bundles()) { const auto& bundle = *bundle_ptr; - for (const BlockHeader& header : HeaderSnapshotReader{bundle.header_snapshot}) { + for (const BlockHeader& header : HeaderSegmentReader{bundle.header_segment}) { SILK_TRACE << "SnapshotSync: header number=" << header.number << " hash=" << Hash{header.hash()}.to_hex(); const auto block_number = header.number; if (block_number > max_block_available) continue; @@ -386,9 +387,9 @@ void SnapshotSync::update_block_bodies(RWTxn& txn, BlockNum max_block_available) } // Reset sequence for kBlockTransactions table - const auto [tx_snapshot, _] = repository_.find_segment(SnapshotType::transactions, max_block_available); - ensure(tx_snapshot.has_value(), "SnapshotSync: snapshots max block not found in any snapshot"); - const auto last_tx_id = tx_snapshot->index.base_data_id() + tx_snapshot->snapshot.item_count(); + const auto [txn_segment, _] = repository_.find_segment(SnapshotType::transactions, max_block_available); + ensure(txn_segment.has_value(), "SnapshotSync: snapshots max block not found in any snapshot"); + const auto last_tx_id = txn_segment->index.base_data_id() + txn_segment->segment.item_count(); reset_map_sequence(txn, table::kBlockTransactions.name, last_tx_id + 1); SILK_INFO << "SnapshotSync: database table BlockTransactions sequence reset"; diff --git a/silkworm/db/snapshot_sync.hpp b/silkworm/db/snapshot_sync.hpp index b615a9766c..995f62b0b9 100644 --- a/silkworm/db/snapshot_sync.hpp +++ b/silkworm/db/snapshot_sync.hpp @@ -28,20 +28,20 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include "access_layer.hpp" +#include "datastore/mdbx/mdbx.hpp" #include "datastore/snapshot_merger.hpp" +#include "datastore/snapshots/bittorrent/client.hpp" +#include "datastore/snapshots/common/snapshot_path.hpp" +#include "datastore/snapshots/config/config.hpp" +#include "datastore/snapshots/snapshot_bundle.hpp" +#include "datastore/snapshots/snapshot_repository.hpp" +#include "datastore/snapshots/snapshot_settings.hpp" #include "datastore/stage_scheduler.hpp" +#include "freezer.hpp" namespace silkworm::db { diff --git a/silkworm/db/snapshot_sync_test.cpp b/silkworm/db/snapshot_sync_test.cpp index e18e0e5f7a..b36f4793dd 100644 --- a/silkworm/db/snapshot_sync_test.cpp +++ b/silkworm/db/snapshot_sync_test.cpp @@ -114,41 +114,41 @@ TEST_CASE("SnapshotSync::update_block_headers", "[db][snapshot][sync]") { auto tmp_dir_path = test.tmp_dir.path(); // Create a sample Header snapshot+index - snapshots::test_util::SampleHeaderSnapshotFile header_snapshot_file{tmp_dir_path}; - auto& header_snapshot_path = header_snapshot_file.path(); - Snapshot header_snapshot{header_snapshot_path}; - auto header_index_builder = HeaderIndex::make(header_snapshot_path); - header_index_builder.set_base_data_id(header_snapshot_file.block_num_range().start); + snapshots::test_util::SampleHeaderSnapshotFile header_segment_file{tmp_dir_path}; + auto& header_segment_path = header_segment_file.path(); + SegmentFileReader header_segment{header_segment_path}; + auto header_index_builder = HeaderIndex::make(header_segment_path); + header_index_builder.set_base_data_id(header_segment_file.block_num_range().start); REQUIRE_NOTHROW(header_index_builder.build()); - Index idx_header_hash{header_snapshot_path.index_file()}; + Index idx_header_hash{header_segment_path.index_file()}; // Create a sample Body snapshot+index - snapshots::test_util::SampleBodySnapshotFile body_snapshot_file{tmp_dir_path}; - auto& body_snapshot_path = body_snapshot_file.path(); - Snapshot body_snapshot{body_snapshot_path}; - auto body_index_builder = BodyIndex::make(body_snapshot_path); - body_index_builder.set_base_data_id(body_snapshot_file.block_num_range().start); + snapshots::test_util::SampleBodySnapshotFile body_segment_file{tmp_dir_path}; + auto& body_segment_path = body_segment_file.path(); + SegmentFileReader body_segment{body_segment_path}; + auto body_index_builder = BodyIndex::make(body_segment_path); + body_index_builder.set_base_data_id(body_segment_file.block_num_range().start); REQUIRE_NOTHROW(body_index_builder.build()); - Index idx_body_number{body_snapshot_path.index_file()}; + Index idx_body_number{body_segment_path.index_file()}; // Create a sample Transaction snapshot+indexes - snapshots::test_util::SampleTransactionSnapshotFile txn_snapshot_file{tmp_dir_path}; - auto& txn_snapshot_path = txn_snapshot_file.path(); - Snapshot txn_snapshot{txn_snapshot_path}; - REQUIRE_NOTHROW(TransactionIndex::make(body_snapshot_path, txn_snapshot_path).build()); - REQUIRE_NOTHROW(TransactionToBlockIndex::make(body_snapshot_path, txn_snapshot_path, txn_snapshot_file.block_num_range().start).build()); - Index idx_txn_hash{txn_snapshot_path.related_path(SnapshotType::transactions, kIdxExtension)}; - Index idx_txn_hash_2_block{txn_snapshot_path.related_path(SnapshotType::transactions_to_block, kIdxExtension)}; + snapshots::test_util::SampleTransactionSnapshotFile txn_segment_file{tmp_dir_path}; + auto& txn_segment_path = txn_segment_file.path(); + SegmentFileReader txn_segment{txn_segment_path}; + REQUIRE_NOTHROW(TransactionIndex::make(body_segment_path, txn_segment_path).build()); + REQUIRE_NOTHROW(TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment_file.block_num_range().start).build()); + Index idx_txn_hash{txn_segment_path.related_path(SnapshotType::transactions, kIdxExtension)}; + Index idx_txn_hash_2_block{txn_segment_path.related_path(SnapshotType::transactions_to_block, kIdxExtension)}; // Add a sample Snapshot bundle to the repository SnapshotBundle bundle{{ - .header_snapshot = std::move(header_snapshot), + .header_segment = std::move(header_segment), .idx_header_hash = std::move(idx_header_hash), - .body_snapshot = std::move(body_snapshot), + .body_segment = std::move(body_segment), .idx_body_number = std::move(idx_body_number), - .txn_snapshot = std::move(txn_snapshot), + .txn_segment = std::move(txn_segment), .idx_txn_hash = std::move(idx_txn_hash), .idx_txn_hash_2_block = std::move(idx_txn_hash_2_block), }}; @@ -157,7 +157,7 @@ TEST_CASE("SnapshotSync::update_block_headers", "[db][snapshot][sync]") { // Update the block headers in the database according to the repository content auto& tmp_db = test.context; - BlockNum max_block_available = header_snapshot_file.block_num_range().end - 1; + BlockNum max_block_available = header_segment_file.block_num_range().end - 1; auto is_stopping = [] { return false; }; CHECK_NOTHROW(snapshot_sync.update_block_headers(tmp_db.rw_txn(), max_block_available, is_stopping)); diff --git a/silkworm/db/snapshot_test.cpp b/silkworm/db/snapshot_test.cpp index c90f59e784..5ff110120d 100644 --- a/silkworm/db/snapshot_test.cpp +++ b/silkworm/db/snapshot_test.cpp @@ -19,21 +19,22 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include +#include "blocks/bodies/body_index.hpp" +#include "blocks/bodies/body_queries.hpp" +#include "blocks/headers/header_index.hpp" +#include "blocks/headers/header_queries.hpp" +#include "datastore/snapshots/index_builder.hpp" +#include "datastore/snapshots/segment/segment_reader.hpp" +#include "test_util/temp_snapshots.hpp" +#include "transactions/txn_index.hpp" +#include "transactions/txn_queries.hpp" +#include "transactions/txn_segment_word_codec.hpp" +#include "transactions/txn_to_block_index.hpp" + namespace silkworm::snapshots { namespace test = test_util; @@ -53,11 +54,11 @@ class SnapshotPathForTest : public SnapshotPath { } {} }; -class SnapshotForTest : public Snapshot { +class SnapshotForTest : public SegmentFileReader { public: - explicit SnapshotForTest(SnapshotPath path) : Snapshot(std::move(path)) {} + explicit SnapshotForTest(SnapshotPath path) : SegmentFileReader(std::move(path)) {} SnapshotForTest(const std::filesystem::path& tmp_dir, StepRange step_range) - : Snapshot{SnapshotPathForTest{tmp_dir, step_range}} {} + : SegmentFileReader{SnapshotPathForTest{tmp_dir, step_range}} {} }; TEST_CASE("Snapshot::Snapshot", "[silkworm][node][snapshot][snapshot]") { @@ -121,18 +122,18 @@ TEST_CASE("Snapshot::close", "[silkworm][node][snapshot][snapshot]") { TEST_CASE("HeaderSnapshot::header_by_number OK", "[silkworm][node][snapshot][index]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; - test::SampleHeaderSnapshotFile header_snapshot_file{tmp_dir.path()}; // contains headers for [1'500'012, 1'500'013] - auto& header_snapshot_path = header_snapshot_file.path(); - auto header_index = HeaderIndex::make(header_snapshot_path); - header_index.set_base_data_id(header_snapshot_file.block_num_range().start); + test::SampleHeaderSnapshotFile header_segment_file{tmp_dir.path()}; // contains headers for [1'500'012, 1'500'013] + auto& header_segment_path = header_segment_file.path(); + auto header_index = HeaderIndex::make(header_segment_path); + header_index.set_base_data_id(header_segment_file.block_num_range().start); REQUIRE_NOTHROW(header_index.build()); - Snapshot header_snapshot{header_snapshot_path}; - header_snapshot.reopen_segment(); + SegmentFileReader header_segment{header_segment_path}; + header_segment.reopen_segment(); - Index idx_header_hash{header_snapshot_path.index_file()}; + Index idx_header_hash{header_segment_path.index_file()}; idx_header_hash.reopen_index(); - HeaderFindByBlockNumQuery header_by_number{{header_snapshot, idx_header_hash}}; + HeaderFindByBlockNumQuery header_by_number{{header_segment, idx_header_hash}}; CHECK(!header_by_number.exec(1'500'011)); CHECK(header_by_number.exec(1'500'012)); @@ -164,18 +165,18 @@ TEST_CASE("HeaderSnapshot::header_by_number OK", "[silkworm][node][snapshot][ind TEST_CASE("BodySnapshot::body_by_number OK", "[silkworm][node][snapshot][index]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; - test::SampleBodySnapshotFile body_snapshot_file{tmp_dir.path()}; // contains bodies for [1'500'012, 1'500'013] - auto& body_snapshot_path = body_snapshot_file.path(); - auto body_index = BodyIndex::make(body_snapshot_path); - body_index.set_base_data_id(body_snapshot_file.block_num_range().start); + test::SampleBodySnapshotFile body_segment_file{tmp_dir.path()}; // contains bodies for [1'500'012, 1'500'013] + auto& body_segment_path = body_segment_file.path(); + auto body_index = BodyIndex::make(body_segment_path); + body_index.set_base_data_id(body_segment_file.block_num_range().start); REQUIRE_NOTHROW(body_index.build()); - Snapshot body_snapshot{body_snapshot_path}; - body_snapshot.reopen_segment(); + SegmentFileReader body_segment{body_segment_path}; + body_segment.reopen_segment(); - Index idx_body_number{body_snapshot_path.index_file()}; + Index idx_body_number{body_segment_path.index_file()}; idx_body_number.reopen_index(); - BodyFindByBlockNumQuery body_by_number{{body_snapshot, idx_body_number}}; + BodyFindByBlockNumQuery body_by_number{{body_segment, idx_body_number}}; CHECK(!body_by_number.exec(1'500'011)); CHECK(body_by_number.exec(1'500'012)); @@ -185,26 +186,26 @@ TEST_CASE("BodySnapshot::body_by_number OK", "[silkworm][node][snapshot][index]" CHECK(body_for_storage->base_txn_id == 7'341'271); CHECK(body_for_storage->txn_count == 2 + 1); // 2 system txs + 1 tx } - // CHECK(!body_snapshot.body_by_number(1'500'014)); // TODO(canepat) assert in EF, should return std::nullopt instead + // CHECK(!body_segment.body_by_number(1'500'014)); // TODO(canepat) assert in EF, should return std::nullopt instead } // https://etherscan.io/block/1500013 TEST_CASE("TransactionSnapshot::txn_by_id OK", "[silkworm][node][snapshot][index]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; - auto& body_snapshot_path = body_snapshot.path(); - test::SampleTransactionSnapshotFile tx_snapshot_file{tmp_dir.path()}; // contains txs for [1'500'012, 1'500'013] - auto& tx_snapshot_path = tx_snapshot_file.path(); - auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); + test::SampleBodySnapshotFile body_segment{tmp_dir.path()}; + auto& body_segment_path = body_segment.path(); + test::SampleTransactionSnapshotFile txn_segment_file{tmp_dir.path()}; // contains txs for [1'500'012, 1'500'013] + auto& txn_segment_path = txn_segment_file.path(); + auto tx_index = TransactionIndex::make(body_segment_path, txn_segment_path); CHECK_NOTHROW(tx_index.build()); - Snapshot tx_snapshot{tx_snapshot_path}; - tx_snapshot.reopen_segment(); + SegmentFileReader txn_segment{txn_segment_path}; + txn_segment.reopen_segment(); - Index idx_txn_hash{tx_snapshot_path.index_file()}; + Index idx_txn_hash{txn_segment_path.index_file()}; idx_txn_hash.reopen_index(); - TransactionFindByIdQuery txn_by_id{{tx_snapshot, idx_txn_hash}}; + TransactionFindByIdQuery txn_by_id{{txn_segment, idx_txn_hash}}; const auto transaction = txn_by_id.exec(7'341'272); CHECK(transaction.has_value()); @@ -219,25 +220,25 @@ TEST_CASE("TransactionSnapshot::txn_by_id OK", "[silkworm][node][snapshot][index TEST_CASE("TransactionSnapshot::block_num_by_txn_hash OK", "[silkworm][node][snapshot][index]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; - auto& body_snapshot_path = body_snapshot.path(); - test::SampleTransactionSnapshotFile tx_snapshot_file{tmp_dir.path()}; // contains txs for [1'500'012, 1'500'013] - auto& tx_snapshot_path = tx_snapshot_file.path(); - auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); + test::SampleBodySnapshotFile body_segment{tmp_dir.path()}; + auto& body_segment_path = body_segment.path(); + test::SampleTransactionSnapshotFile txn_segment_file{tmp_dir.path()}; // contains txs for [1'500'012, 1'500'013] + auto& txn_segment_path = txn_segment_file.path(); + auto tx_index = TransactionIndex::make(body_segment_path, txn_segment_path); REQUIRE_NOTHROW(tx_index.build()); - auto tx_index_hash_to_block = TransactionToBlockIndex::make(body_snapshot_path, tx_snapshot_path, tx_snapshot_file.block_num_range().start); + auto tx_index_hash_to_block = TransactionToBlockIndex::make(body_segment_path, txn_segment_path, txn_segment_file.block_num_range().start); REQUIRE_NOTHROW(tx_index_hash_to_block.build()); - Snapshot tx_snapshot{tx_snapshot_path}; - tx_snapshot.reopen_segment(); + SegmentFileReader txn_segment{txn_segment_path}; + txn_segment.reopen_segment(); - Index idx_txn_hash{tx_snapshot_path.index_file()}; + Index idx_txn_hash{txn_segment_path.index_file()}; idx_txn_hash.reopen_index(); - TransactionFindByIdQuery txn_by_id{{tx_snapshot, idx_txn_hash}}; + TransactionFindByIdQuery txn_by_id{{txn_segment, idx_txn_hash}}; - Index idx_txn_hash_2_block{tx_snapshot_path.related_path(SnapshotType::transactions_to_block, kIdxExtension)}; + Index idx_txn_hash_2_block{txn_segment_path.related_path(SnapshotType::transactions_to_block, kIdxExtension)}; idx_txn_hash_2_block.reopen_index(); - TransactionBlockNumByTxnHashQuery block_num_by_txn_hash{idx_txn_hash_2_block, TransactionFindByHashQuery{{tx_snapshot, idx_txn_hash}}}; + TransactionBlockNumByTxnHashQuery block_num_by_txn_hash{idx_txn_hash_2_block, TransactionFindByHashQuery{{txn_segment, idx_txn_hash}}}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 auto transaction = txn_by_id.exec(7'341'269); // known txn id in block 1'500'012 @@ -263,19 +264,19 @@ TEST_CASE("TransactionSnapshot::block_num_by_txn_hash OK", "[silkworm][node][sna TEST_CASE("TransactionSnapshot::txn_range OK", "[silkworm][node][snapshot][index]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; - auto& body_snapshot_path = body_snapshot.path(); - test::SampleTransactionSnapshotFile tx_snapshot_file{tmp_dir.path()}; // contains txs for [1'500'012, 1'500'013] - auto& tx_snapshot_path = tx_snapshot_file.path(); - auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); + test::SampleBodySnapshotFile body_segment{tmp_dir.path()}; + auto& body_segment_path = body_segment.path(); + test::SampleTransactionSnapshotFile txn_segment_file{tmp_dir.path()}; // contains txs for [1'500'012, 1'500'013] + auto& txn_segment_path = txn_segment_file.path(); + auto tx_index = TransactionIndex::make(body_segment_path, txn_segment_path); REQUIRE_NOTHROW(tx_index.build()); - Snapshot tx_snapshot{tx_snapshot_path}; - tx_snapshot.reopen_segment(); + SegmentFileReader txn_segment{txn_segment_path}; + txn_segment.reopen_segment(); - Index idx_txn_hash{tx_snapshot_path.index_file()}; + Index idx_txn_hash{txn_segment_path.index_file()}; idx_txn_hash.reopen_index(); - TransactionRangeFromIdQuery query{{tx_snapshot, idx_txn_hash}}; + TransactionRangeFromIdQuery query{{txn_segment, idx_txn_hash}}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 CHECK(query.exec_into_vector(7'341'263, 0).empty()); @@ -295,19 +296,19 @@ TEST_CASE("TransactionSnapshot::txn_range OK", "[silkworm][node][snapshot][index TEST_CASE("TransactionSnapshot::txn_rlp_range OK", "[silkworm][node][snapshot][index]") { SetLogVerbosityGuard guard{log::Level::kNone}; TemporaryDirectory tmp_dir; - test::SampleBodySnapshotFile body_snapshot{tmp_dir.path()}; - auto& body_snapshot_path = body_snapshot.path(); - test::SampleTransactionSnapshotFile tx_snapshot_file{tmp_dir.path()}; // contains txs for [1'500'012, 1'500'013] - auto& tx_snapshot_path = tx_snapshot_file.path(); - auto tx_index = TransactionIndex::make(body_snapshot_path, tx_snapshot_path); + test::SampleBodySnapshotFile body_segment{tmp_dir.path()}; + auto& body_segment_path = body_segment.path(); + test::SampleTransactionSnapshotFile txn_segment_file{tmp_dir.path()}; // contains txs for [1'500'012, 1'500'013] + auto& txn_segment_path = txn_segment_file.path(); + auto tx_index = TransactionIndex::make(body_segment_path, txn_segment_path); REQUIRE_NOTHROW(tx_index.build()); - Snapshot tx_snapshot{tx_snapshot_path}; - tx_snapshot.reopen_segment(); + SegmentFileReader txn_segment{txn_segment_path}; + txn_segment.reopen_segment(); - Index idx_txn_hash{tx_snapshot_path.index_file()}; + Index idx_txn_hash{txn_segment_path.index_file()}; idx_txn_hash.reopen_index(); - TransactionPayloadRlpRangeFromIdQuery query{{tx_snapshot, idx_txn_hash}}; + TransactionPayloadRlpRangeFromIdQuery query{{txn_segment, idx_txn_hash}}; // block 1'500'012: base_txn_id is 7'341'263, txn_count is 7 CHECK(query.exec_into_vector(7'341'263, 0).empty()); diff --git a/silkworm/db/transactions/txn_index.cpp b/silkworm/db/transactions/txn_index.cpp index 0b23256586..a22dec4d69 100644 --- a/silkworm/db/transactions/txn_index.cpp +++ b/silkworm/db/transactions/txn_index.cpp @@ -17,9 +17,9 @@ #include "txn_index.hpp" #include -#include +#include -#include "txn_snapshot_word_serializer.hpp" +#include "txn_segment_word_codec.hpp" namespace silkworm::snapshots { @@ -30,9 +30,9 @@ Bytes TransactionKeyFactory::make(ByteView key_data, uint64_t i) { std::pair TransactionIndex::compute_txs_amount( SnapshotPath bodies_segment_path, std::optional bodies_segment_region) { - Snapshot bodies_snapshot{std::move(bodies_segment_path), bodies_segment_region}; - bodies_snapshot.reopen_segment(); - auto result = BodyTxsAmountQuery{bodies_snapshot}.exec(); + SegmentFileReader body_segment{std::move(bodies_segment_path), bodies_segment_region}; + body_segment.reopen_segment(); + auto result = BodyTxsAmountQuery{body_segment}.exec(); return {result.first_tx_id, result.count}; } diff --git a/silkworm/db/transactions/txn_index.hpp b/silkworm/db/transactions/txn_index.hpp index 37d526c7d9..b0ebad5624 100644 --- a/silkworm/db/transactions/txn_index.hpp +++ b/silkworm/db/transactions/txn_index.hpp @@ -22,8 +22,8 @@ #include #include +#include #include -#include #include namespace silkworm::snapshots { diff --git a/silkworm/db/transactions/txn_queries.hpp b/silkworm/db/transactions/txn_queries.hpp index da192c02a6..21cb5e8352 100644 --- a/silkworm/db/transactions/txn_queries.hpp +++ b/silkworm/db/transactions/txn_queries.hpp @@ -22,14 +22,14 @@ #include #include -#include "txn_snapshot.hpp" +#include "txn_segment.hpp" namespace silkworm::snapshots { -using TransactionFindByIdQuery = FindByIdQuery; -using TransactionFindByHashQuery = FindByHashQuery; -using TransactionRangeFromIdQuery = RangeFromIdQuery; -using TransactionPayloadRlpRangeFromIdQuery = RangeFromIdQuery>; +using TransactionFindByIdQuery = FindByIdQuery; +using TransactionFindByHashQuery = FindByHashQuery; +using TransactionRangeFromIdQuery = RangeFromIdQuery; +using TransactionPayloadRlpRangeFromIdQuery = RangeFromIdQuery>; class TransactionBlockNumByTxnHashQuery { public: @@ -60,11 +60,11 @@ class TransactionBlockNumByTxnHashRepoQuery { std::optional exec(const Hash& hash) { for (const TBundle& bundle_ptr : bundles_) { const auto& bundle = *bundle_ptr; - const Snapshot& snapshot = bundle.txn_snapshot; + const SegmentFileReader& segment = bundle.txn_segment; const Index& idx_txn_hash = bundle.idx_txn_hash; const Index& idx_txn_hash_2_block = bundle.idx_txn_hash_2_block; - TransactionFindByHashQuery cross_check_query{{snapshot, idx_txn_hash}}; + TransactionFindByHashQuery cross_check_query{{segment, idx_txn_hash}}; TransactionBlockNumByTxnHashQuery query{idx_txn_hash_2_block, cross_check_query}; auto block_num = query.exec(hash); if (block_num) { diff --git a/silkworm/db/transactions/txn_snapshot.hpp b/silkworm/db/transactions/txn_segment.hpp similarity index 61% rename from silkworm/db/transactions/txn_snapshot.hpp rename to silkworm/db/transactions/txn_segment.hpp index 663e19aff1..12c7a5553d 100644 --- a/silkworm/db/transactions/txn_snapshot.hpp +++ b/silkworm/db/transactions/txn_segment.hpp @@ -16,17 +16,17 @@ #pragma once -#include -#include +#include +#include -#include "txn_snapshot_word_serializer.hpp" +#include "txn_segment_word_codec.hpp" namespace silkworm::snapshots { -using TransactionSnapshotReader = SnapshotReader; -using TransactionSnapshotWriter = SnapshotWriter; +using TransactionSegmentReader = SegmentReader; +using TransactionSegmentWriter = SegmentWriter; template -using TransactionSnapshotPayloadRlpReader = SnapshotReader>; +using TransactionSegmentPayloadRlpReader = SegmentReader>; } // namespace silkworm::snapshots diff --git a/silkworm/db/transactions/txn_snapshot_freezer.cpp b/silkworm/db/transactions/txn_segment_collation.cpp similarity index 75% rename from silkworm/db/transactions/txn_snapshot_freezer.cpp rename to silkworm/db/transactions/txn_segment_collation.cpp index f4e5533760..307bb6f9be 100644 --- a/silkworm/db/transactions/txn_snapshot_freezer.cpp +++ b/silkworm/db/transactions/txn_segment_collation.cpp @@ -14,27 +14,27 @@ limitations under the License. */ -#include "txn_snapshot_freezer.hpp" +#include "txn_segment_collation.hpp" #include #include #include -#include "txn_snapshot.hpp" +#include "txn_segment.hpp" namespace silkworm::db { -void TransactionSnapshotFreezer::copy(ROTxn& txn, const FreezerCommand& command, snapshots::SnapshotFileWriter& file_writer) const { +void TransactionSegmentCollation::copy(ROTxn& txn, const SegmentCollationCommand& command, snapshots::SegmentFileWriter& file_writer) const { BlockNumRange range = command.range; - snapshots::TransactionSnapshotWriter writer{file_writer}; + snapshots::TransactionSegmentWriter writer{file_writer}; auto out = writer.out(); auto system_tx = snapshots::empty_system_tx(); for (BlockNum i = range.start; i < range.end; ++i) { BlockBody body; bool found = read_canonical_body(txn, i, /* read_senders = */ true, body); - if (!found) throw std::runtime_error{"TransactionSnapshotFreezer::copy missing body for block " + std::to_string(i)}; + if (!found) throw std::runtime_error{"TransactionSegmentCollation::copy missing body for block " + std::to_string(i)}; *out++ = system_tx; for (auto& value : body.transactions) { @@ -44,7 +44,7 @@ void TransactionSnapshotFreezer::copy(ROTxn& txn, const FreezerCommand& command, } } -void TransactionSnapshotFreezer::cleanup(RWTxn& txn, BlockNumRange range) const { +void TransactionSegmentCollation::prune(RWTxn& txn, BlockNumRange range) const { for (BlockNum i = range.start, count = 1; i < range.end; ++i, ++count) { auto hash_opt = read_canonical_header_hash(txn, i); if (!hash_opt) continue; @@ -59,7 +59,7 @@ void TransactionSnapshotFreezer::cleanup(RWTxn& txn, BlockNumRange range) const } if ((count > 10000) && ((count % 10000) == 0)) { - log::Debug("TransactionSnapshotFreezer") << "cleaned up until block " << i; + log::Debug("TransactionSegmentCollation") << "cleaned up until block " << i; } } } diff --git a/silkworm/db/blocks/bodies/body_snapshot_freezer.hpp b/silkworm/db/transactions/txn_segment_collation.hpp similarity index 65% rename from silkworm/db/blocks/bodies/body_snapshot_freezer.hpp rename to silkworm/db/transactions/txn_segment_collation.hpp index e8955bab3b..884eaf66d8 100644 --- a/silkworm/db/blocks/bodies/body_snapshot_freezer.hpp +++ b/silkworm/db/transactions/txn_segment_collation.hpp @@ -16,15 +16,15 @@ #pragma once -#include +#include namespace silkworm::db { -class BodySnapshotFreezer : public SnapshotFreezer { +class TransactionSegmentCollation : public SegmentCollation { public: - ~BodySnapshotFreezer() override = default; - void copy(ROTxn& txn, const FreezerCommand& command, snapshots::SnapshotFileWriter& file_writer) const override; - void cleanup(RWTxn& txn, BlockNumRange range) const override; + ~TransactionSegmentCollation() override = default; + void copy(ROTxn& txn, const SegmentCollationCommand& command, snapshots::SegmentFileWriter& file_writer) const override; + void prune(RWTxn& txn, BlockNumRange range) const override; }; } // namespace silkworm::db diff --git a/silkworm/db/transactions/txn_snapshot_word_serializer.cpp b/silkworm/db/transactions/txn_segment_word_codec.cpp similarity index 97% rename from silkworm/db/transactions/txn_snapshot_word_serializer.cpp rename to silkworm/db/transactions/txn_segment_word_codec.cpp index c53a212f23..a205cdf75e 100644 --- a/silkworm/db/transactions/txn_snapshot_word_serializer.cpp +++ b/silkworm/db/transactions/txn_segment_word_codec.cpp @@ -14,7 +14,7 @@ limitations under the License. */ -#include "txn_snapshot_word_serializer.hpp" +#include "txn_segment_word_codec.hpp" #include #include @@ -32,7 +32,7 @@ namespace silkworm::snapshots { -TransactionSnapshotWord slice_tx_data(ByteView buffer) { +TransactionSegmentWord slice_tx_data(ByteView buffer) { // Skip first byte of tx hash plus sender address length for transaction decoding constexpr int kTxRlpDataOffset{1 + kAddressLength}; @@ -46,7 +46,7 @@ TransactionSnapshotWord slice_tx_data(ByteView buffer) { ByteView senders_data = buffer.substr(1, kAddressLength); ByteView tx_rlp = buffer.substr(kTxRlpDataOffset); - return TransactionSnapshotWord{ + return TransactionSegmentWord{ first_hash_byte, senders_data, tx_rlp, diff --git a/silkworm/db/transactions/txn_snapshot_word_serializer.hpp b/silkworm/db/transactions/txn_segment_word_codec.hpp similarity index 70% rename from silkworm/db/transactions/txn_snapshot_word_serializer.hpp rename to silkworm/db/transactions/txn_segment_word_codec.hpp index dccf50ee5a..0c25879bc3 100644 --- a/silkworm/db/transactions/txn_snapshot_word_serializer.hpp +++ b/silkworm/db/transactions/txn_segment_word_codec.hpp @@ -22,17 +22,17 @@ #include #include #include -#include +#include namespace silkworm::snapshots { -struct TransactionSnapshotWord { +struct TransactionSegmentWord { uint8_t first_hash_byte{}; ByteView senders_data; ByteView tx_rlp; }; -TransactionSnapshotWord slice_tx_data(ByteView buffer); +TransactionSegmentWord slice_tx_data(ByteView buffer); ByteView slice_tx_payload(ByteView tx_rlp); @@ -46,11 +46,11 @@ void decode_word_into_tx(ByteView word, Transaction& tx); Transaction empty_system_tx(); -struct TransactionSnapshotWordSerializer : public SnapshotWordSerializer { +struct TransactionSegmentWordEncoder : public Encoder { Transaction value; Bytes word; - ~TransactionSnapshotWordSerializer() override = default; + ~TransactionSegmentWordEncoder() override = default; ByteView encode_word() override { word.clear(); @@ -59,28 +59,28 @@ struct TransactionSnapshotWordSerializer : public SnapshotWordSerializer { } }; -static_assert(SnapshotWordSerializerConcept); +static_assert(EncoderConcept); -struct TransactionSnapshotWordDeserializer : public SnapshotWordDeserializer { +struct TransactionSegmentWordDecoder : public Decoder { Transaction value; - ~TransactionSnapshotWordDeserializer() override = default; + ~TransactionSegmentWordDecoder() override = default; void decode_word(ByteView word) override { decode_word_into_tx(word, value); } }; -static_assert(SnapshotWordDeserializerConcept); +static_assert(DecoderConcept); template concept BytesOrByteView = std::same_as || std::same_as; template -struct TransactionSnapshotWordPayloadRlpDeserializer : public SnapshotWordDeserializer { +struct TransactionSegmentWordPayloadRlpDecoder : public Decoder { TBytes value; - ~TransactionSnapshotWordPayloadRlpDeserializer() override = default; + ~TransactionSegmentWordPayloadRlpDecoder() override = default; void decode_word(ByteView word) override { if (word.empty()) { @@ -93,6 +93,6 @@ struct TransactionSnapshotWordPayloadRlpDeserializer : public SnapshotWordDeseri } }; -static_assert(SnapshotWordDeserializerConcept>); +static_assert(DecoderConcept>); } // namespace silkworm::snapshots diff --git a/silkworm/db/transactions/txn_to_block_index.hpp b/silkworm/db/transactions/txn_to_block_index.hpp index b64368a700..9521bf878e 100644 --- a/silkworm/db/transactions/txn_to_block_index.hpp +++ b/silkworm/db/transactions/txn_to_block_index.hpp @@ -21,8 +21,8 @@ #include #include +#include #include -#include #include #include "txn_index.hpp" diff --git a/silkworm/db/transactions/txs_and_bodies_query.hpp b/silkworm/db/transactions/txs_and_bodies_query.hpp index 176596638e..1eb08135e7 100644 --- a/silkworm/db/transactions/txs_and_bodies_query.hpp +++ b/silkworm/db/transactions/txs_and_bodies_query.hpp @@ -26,8 +26,8 @@ #include #include #include +#include #include -#include #include namespace silkworm::snapshots {