From 68b672d55bd7296fa334b6257d29381c8ffa1afa Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 22 Mar 2022 00:45:44 +0100 Subject: [PATCH 1/2] Add acquire_node_ref() member function to HnswGraph. --- .../tensor/hnsw_saver/hnsw_save_load_test.cpp | 10 ++--- .../src/vespa/searchlib/tensor/hnsw_graph.cpp | 16 ++++---- .../src/vespa/searchlib/tensor/hnsw_graph.h | 26 ++++++++++--- .../src/vespa/searchlib/tensor/hnsw_index.cpp | 37 +++++++++++-------- .../searchlib/tensor/hnsw_index_saver.cpp | 6 +-- 5 files changed, 58 insertions(+), 37 deletions(-) diff --git a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp index 43ff6f5b83b9..add7801e03a9 100644 --- a/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp +++ b/searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp @@ -89,13 +89,13 @@ class CopyGraphTest : public ::testing::Test { HnswGraph copy; void expect_empty_d(uint32_t docid) const { - EXPECT_FALSE(copy.node_refs[docid].load_acquire().valid()); + EXPECT_FALSE(copy.acquire_node_ref(docid).valid()); } void expect_level_0(uint32_t docid, const V& exp_links) const { - auto levels = copy.get_level_array(docid); + auto levels = copy.acquire_level_array(docid); EXPECT_GE(levels.size(), 1); - auto links = copy.get_link_array(docid, 0); + auto links = copy.acquire_link_array(docid, 0); EXPECT_EQ(exp_links.size(), links.size()); for (size_t i = 0; i < exp_links.size() && i < links.size(); ++i) { EXPECT_EQ(exp_links[i], links[i]); @@ -103,9 +103,9 @@ class CopyGraphTest : public ::testing::Test { } void expect_level_1(uint32_t docid, const V& exp_links) const { - auto levels = copy.get_level_array(docid); + auto levels = copy.acquire_level_array(docid); EXPECT_EQ(2, levels.size()); - auto links = copy.get_link_array(docid, 1); + auto links = copy.acquire_link_array(docid, 1); EXPECT_EQ(exp_links.size(), links.size()); for (size_t i = 0; i < exp_links.size() && i < links.size(); ++i) { EXPECT_EQ(exp_links[i], links[i]); diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp index 8beb111ca591..3049b6437095 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp @@ -26,7 +26,7 @@ HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels) { node_refs.ensure_size(docid + 1, AtomicEntryRef()); // A document cannot be added twice. - assert(!node_refs[docid].load_acquire().valid()); + assert(!get_node_ref(docid).valid()); // Note: The level array instance lives as long as the document is present in the index. vespalib::Array levels(num_levels, AtomicEntryRef()); auto node_ref = nodes.add(levels); @@ -40,7 +40,7 @@ HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels) void HnswGraph::remove_node_for_document(uint32_t docid) { - auto node_ref = node_refs[docid].load_acquire(); + auto node_ref = get_node_ref(docid); assert(node_ref.valid()); auto levels = nodes.get(node_ref); vespalib::datastore::EntryRef invalid; @@ -48,7 +48,7 @@ HnswGraph::remove_node_for_document(uint32_t docid) // Ensure data referenced through the old ref can be recycled: nodes.remove(node_ref); for (size_t i = 0; i < levels.size(); ++i) { - auto old_links_ref = levels[i].load_acquire(); + auto old_links_ref = levels[i].load_relaxed(); links.remove(old_links_ref); } if (docid + 1 == node_refs_size.load(std::memory_order_relaxed)) { @@ -60,7 +60,7 @@ void HnswGraph::trim_node_refs_size() { uint32_t check_doc_id = node_refs_size.load(std::memory_order_relaxed) - 1; - while (check_doc_id > 0u && !node_refs[check_doc_id].load_relaxed().valid()) { + while (check_doc_id > 0u && !get_node_ref(check_doc_id).valid()) { --check_doc_id; } node_refs_size.store(check_doc_id + 1, std::memory_order_release); @@ -70,11 +70,11 @@ void HnswGraph::set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& new_links) { auto new_links_ref = links.add(new_links); - auto node_ref = node_refs[docid].load_acquire(); + auto node_ref = get_node_ref(docid); assert(node_ref.valid()); auto levels = nodes.get_writable(node_ref); assert(level < levels.size()); - auto old_links_ref = levels[level].load_acquire(); + auto old_links_ref = levels[level].load_relaxed(); levels[level].store_release(new_links_ref); links.remove(old_links_ref); } @@ -83,9 +83,9 @@ HnswGraph::Histograms HnswGraph::histograms() const { Histograms result; - size_t num_nodes = node_refs.size(); + size_t num_nodes = node_refs_size.load(std::memory_order_acquire); for (size_t i = 0; i < num_nodes; ++i) { - auto node_ref = node_refs[i].load_acquire(); + auto node_ref = acquire_node_ref(i); if (node_ref.valid()) { uint32_t levels = 0; uint32_t l0links = 0; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h index 5d6cff731024..e601caddb0ab 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h @@ -56,11 +56,15 @@ struct HnswGraph { void trim_node_refs_size(); NodeRef get_node_ref(uint32_t docid) const { - return node_refs[docid].load_acquire(); + return node_refs[docid].load_relaxed(); + } + + NodeRef acquire_node_ref(uint32_t docid) const { + return node_refs.acquire_elem_ref(docid).load_acquire(); } bool still_valid(uint32_t docid, NodeRef node_ref) const { - return node_ref.valid() && (get_node_ref(docid) == node_ref); + return node_ref.valid() && (acquire_node_ref(docid) == node_ref); } LevelArrayRef get_level_array(NodeRef node_ref) const { @@ -70,11 +74,16 @@ struct HnswGraph { return LevelArrayRef(); } - LevelArrayRef get_level_array(uint32_t docid) const { + LevelArrayRef get_level_array(uint32_t docid) { auto node_ref = get_node_ref(docid); return get_level_array(node_ref); } + LevelArrayRef acquire_level_array(uint32_t docid) const { + auto node_ref = acquire_node_ref(docid); + return get_level_array(node_ref); + } + LinkArrayRef get_link_array(LevelArrayRef levels, uint32_t level) const { if (level < levels.size()) { auto links_ref = levels[level].load_acquire(); @@ -85,11 +94,16 @@ struct HnswGraph { return LinkArrayRef(); } - LinkArrayRef get_link_array(uint32_t docid, uint32_t level) const { + LinkArrayRef get_link_array(uint32_t docid, uint32_t level) { auto levels = get_level_array(docid); return get_link_array(levels, level); } + LinkArrayRef acquire_link_array(uint32_t docid, uint32_t level) const { + auto levels = acquire_level_array(docid); + return get_link_array(levels, level); + } + LinkArrayRef get_link_array(NodeRef node_ref, uint32_t level) const { auto levels = get_level_array(node_ref); return get_link_array(levels, level); @@ -124,7 +138,7 @@ struct HnswGraph { while (true) { uint64_t value = get_entry_atomic(); entry.docid = (uint32_t)value; - entry.node_ref = get_node_ref(entry.docid); + entry.node_ref = acquire_node_ref(entry.docid); entry.level = (int32_t)(value >> 32); if ((entry.docid == 0) && (entry.level == -1) @@ -144,7 +158,7 @@ struct HnswGraph { } } - size_t size() const { return node_refs.size(); } + size_t size() const { return node_refs_size.load(std::memory_order_acquire); } struct Histograms { std::vector level_histogram; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp index c99e059815bc..5ae26757b0db 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp @@ -245,7 +245,7 @@ HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& e while (keep_searching) { keep_searching = false; for (uint32_t neighbor_docid : _graph.get_link_array(nearest.node_ref, level)) { - auto neighbor_ref = _graph.get_node_ref(neighbor_docid); + auto neighbor_ref = _graph.acquire_node_ref(neighbor_docid); double dist = calc_distance(input, neighbor_docid); if (_graph.still_valid(neighbor_docid, neighbor_ref) && dist < nearest.distance) @@ -289,7 +289,7 @@ HnswIndex::search_layer_helper(const TypedCells& input, uint32_t neighbors_to_fi if (neighbor_docid >= doc_id_limit) { continue; } - auto neighbor_ref = _graph.get_node_ref(neighbor_docid); + auto neighbor_ref = _graph.acquire_node_ref(neighbor_docid); if ((! neighbor_ref.valid()) || ! visited.try_mark(neighbor_docid)) { @@ -433,7 +433,7 @@ HnswIndex::prepare_add_document(uint32_t docid, TypedCells vector, vespalib::GenerationHandler::Guard read_guard) const { - uint32_t max_nodes = _graph.node_refs.size(); + uint32_t max_nodes = _graph.node_refs_size.load(std::memory_order_acquire); if (max_nodes < _cfg.min_size_before_two_phase()) { // the first documents added will do all work in write thread // to ensure they are linked together: @@ -543,7 +543,7 @@ HnswIndex::compact_link_arrays(CompactionSpec compaction_spec, const CompactionS auto context = _graph.links.compactWorst(compaction_spec, compaction_strategy); uint32_t doc_id_limit = _graph.node_refs.size(); for (uint32_t doc_id = 1; doc_id < doc_id_limit; ++doc_id) { - EntryRef level_ref = _graph.node_refs[doc_id].load_relaxed(); + EntryRef level_ref = _graph.get_node_ref(doc_id); if (level_ref.valid()) { vespalib::ArrayRef refs(_graph.nodes.get_writable(level_ref)); context->compact(refs); @@ -756,7 +756,7 @@ HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k, const BitVecto HnswNode HnswIndex::get_node(uint32_t docid) const { - auto node_ref = _graph.node_refs[docid].load_acquire(); + auto node_ref = _graph.acquire_node_ref(docid); if (!node_ref.valid()) { return HnswNode(); } @@ -790,15 +790,16 @@ bool HnswIndex::check_link_symmetry() const { bool all_sym = true; - for (size_t docid = 0; docid < _graph.node_refs.size(); ++docid) { - auto node_ref = _graph.node_refs[docid].load_acquire(); + size_t doc_id_limit = _graph.size(); + for (size_t docid = 0; docid < doc_id_limit; ++docid) { + auto node_ref = _graph.acquire_node_ref(docid); if (node_ref.valid()) { auto levels = _graph.nodes.get(node_ref); uint32_t level = 0; for (const auto& links_ref : levels) { auto links = _graph.links.get(links_ref.load_acquire()); for (auto neighbor_docid : links) { - auto neighbor_links = _graph.get_link_array(neighbor_docid, level); + auto neighbor_links = _graph.acquire_link_array(neighbor_docid, level); if (! has_link_to(neighbor_links, docid)) { all_sym = false; LOG(warning, "check_link_symmetry: docid %zu links to %u on level %u, but no backlink", @@ -822,8 +823,10 @@ HnswIndex::count_reachable_nodes() const } std::vector visited(_graph.size()); LinkArray found_links; - found_links.push_back(entry.docid); - visited[entry.docid] = true; + if (entry.docid < visited.size()) { + found_links.push_back(entry.docid); + visited[entry.docid] = true; + } vespalib::steady_time doom = vespalib::steady_clock::now() + MAX_COUNT_DURATION; while (search_level >= 0) { for (uint32_t idx = 0; idx < found_links.size(); ++idx) { @@ -831,11 +834,15 @@ HnswIndex::count_reachable_nodes() const return {found_links.size(), false}; } uint32_t docid = found_links[idx]; - auto neighbors = _graph.get_link_array(docid, search_level); - for (uint32_t neighbor : neighbors) { - if (visited[neighbor]) continue; - visited[neighbor] = true; - found_links.push_back(neighbor); + if (docid < visited.size()) { + auto neighbors = _graph.acquire_link_array(docid, search_level); + for (uint32_t neighbor : neighbors) { + if (neighbor >= visited.size() || visited[neighbor]) { + continue; + } + visited[neighbor] = true; + found_links.push_back(neighbor); + } } } --search_level; diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp index 2eb9f7d4acdc..cc2ecd584e1b 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp @@ -14,7 +14,7 @@ count_valid_link_arrays(const HnswGraph & graph) { size_t count(0); size_t num_nodes = graph.node_refs.size(); for (size_t i = 0; i < num_nodes; ++i) { - auto node_ref = graph.node_refs[i].load_acquire(); + auto node_ref = graph.get_node_ref(i); if (node_ref.valid()) { count += graph.nodes.get(node_ref).size(); } @@ -47,11 +47,11 @@ HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph) _meta_data.nodes.reserve(num_nodes+1); for (size_t i = 0; i < num_nodes; ++i) { _meta_data.nodes.push_back(_meta_data.refs.size()); - auto node_ref = graph.node_refs[i].load_acquire(); + auto node_ref = graph.get_node_ref(i); if (node_ref.valid()) { auto levels = graph.nodes.get(node_ref); for (const auto& links_ref : levels) { - _meta_data.refs.push_back(links_ref.load_acquire()); + _meta_data.refs.push_back(links_ref.load_relaxed()); } } } From 2188d3b1b8e15d3625e480fddeb7afd7001e42d4 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Tue, 22 Mar 2022 13:39:49 +0100 Subject: [PATCH 2/2] Restore const qualifier for get_link_array and get_level_array member functions. --- searchlib/src/vespa/searchlib/tensor/hnsw_graph.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h index e601caddb0ab..726abb8141d6 100644 --- a/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h +++ b/searchlib/src/vespa/searchlib/tensor/hnsw_graph.h @@ -74,7 +74,7 @@ struct HnswGraph { return LevelArrayRef(); } - LevelArrayRef get_level_array(uint32_t docid) { + LevelArrayRef get_level_array(uint32_t docid) const { auto node_ref = get_node_ref(docid); return get_level_array(node_ref); } @@ -94,7 +94,7 @@ struct HnswGraph { return LinkArrayRef(); } - LinkArrayRef get_link_array(uint32_t docid, uint32_t level) { + LinkArrayRef get_link_array(uint32_t docid, uint32_t level) const { auto levels = get_level_array(docid); return get_link_array(levels, level); }