Skip to content

Commit

Permalink
Merge pull request #21766 from vespa-engine/toregge/add-acquire-node-…
Browse files Browse the repository at this point in the history
…ref-to-hnsw-graph

Add acquire_node_ref() member function to HnswGraph.
  • Loading branch information
baldersheim authored Mar 23, 2022
2 parents b8f4f35 + 2188d3b commit b07fc57
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 35 deletions.
10 changes: 5 additions & 5 deletions searchlib/src/tests/tensor/hnsw_saver/hnsw_save_load_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,23 +89,23 @@ class CopyGraphTest : public ::testing::Test {
HnswGraph copy;

void expect_empty_d(uint32_t docid) const {
EXPECT_FALSE(copy.node_refs[docid].load_acquire().valid());
EXPECT_FALSE(copy.acquire_node_ref(docid).valid());
}

void expect_level_0(uint32_t docid, const V& exp_links) const {
auto levels = copy.get_level_array(docid);
auto levels = copy.acquire_level_array(docid);
EXPECT_GE(levels.size(), 1);
auto links = copy.get_link_array(docid, 0);
auto links = copy.acquire_link_array(docid, 0);
EXPECT_EQ(exp_links.size(), links.size());
for (size_t i = 0; i < exp_links.size() && i < links.size(); ++i) {
EXPECT_EQ(exp_links[i], links[i]);
}
}

void expect_level_1(uint32_t docid, const V& exp_links) const {
auto levels = copy.get_level_array(docid);
auto levels = copy.acquire_level_array(docid);
EXPECT_EQ(2, levels.size());
auto links = copy.get_link_array(docid, 1);
auto links = copy.acquire_link_array(docid, 1);
EXPECT_EQ(exp_links.size(), links.size());
for (size_t i = 0; i < exp_links.size() && i < links.size(); ++i) {
EXPECT_EQ(exp_links[i], links[i]);
Expand Down
16 changes: 8 additions & 8 deletions searchlib/src/vespa/searchlib/tensor/hnsw_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels)
{
node_refs.ensure_size(docid + 1, AtomicEntryRef());
// A document cannot be added twice.
assert(!node_refs[docid].load_acquire().valid());
assert(!get_node_ref(docid).valid());
// Note: The level array instance lives as long as the document is present in the index.
vespalib::Array<AtomicEntryRef> levels(num_levels, AtomicEntryRef());
auto node_ref = nodes.add(levels);
Expand All @@ -40,15 +40,15 @@ HnswGraph::make_node_for_document(uint32_t docid, uint32_t num_levels)
void
HnswGraph::remove_node_for_document(uint32_t docid)
{
auto node_ref = node_refs[docid].load_acquire();
auto node_ref = get_node_ref(docid);
assert(node_ref.valid());
auto levels = nodes.get(node_ref);
vespalib::datastore::EntryRef invalid;
node_refs[docid].store_release(invalid);
// Ensure data referenced through the old ref can be recycled:
nodes.remove(node_ref);
for (size_t i = 0; i < levels.size(); ++i) {
auto old_links_ref = levels[i].load_acquire();
auto old_links_ref = levels[i].load_relaxed();
links.remove(old_links_ref);
}
if (docid + 1 == node_refs_size.load(std::memory_order_relaxed)) {
Expand All @@ -60,7 +60,7 @@ void
HnswGraph::trim_node_refs_size()
{
uint32_t check_doc_id = node_refs_size.load(std::memory_order_relaxed) - 1;
while (check_doc_id > 0u && !node_refs[check_doc_id].load_relaxed().valid()) {
while (check_doc_id > 0u && !get_node_ref(check_doc_id).valid()) {
--check_doc_id;
}
node_refs_size.store(check_doc_id + 1, std::memory_order_release);
Expand All @@ -70,11 +70,11 @@ void
HnswGraph::set_link_array(uint32_t docid, uint32_t level, const LinkArrayRef& new_links)
{
auto new_links_ref = links.add(new_links);
auto node_ref = node_refs[docid].load_acquire();
auto node_ref = get_node_ref(docid);
assert(node_ref.valid());
auto levels = nodes.get_writable(node_ref);
assert(level < levels.size());
auto old_links_ref = levels[level].load_acquire();
auto old_links_ref = levels[level].load_relaxed();
levels[level].store_release(new_links_ref);
links.remove(old_links_ref);
}
Expand All @@ -83,9 +83,9 @@ HnswGraph::Histograms
HnswGraph::histograms() const
{
Histograms result;
size_t num_nodes = node_refs.size();
size_t num_nodes = node_refs_size.load(std::memory_order_acquire);
for (size_t i = 0; i < num_nodes; ++i) {
auto node_ref = node_refs[i].load_acquire();
auto node_ref = acquire_node_ref(i);
if (node_ref.valid()) {
uint32_t levels = 0;
uint32_t l0links = 0;
Expand Down
22 changes: 18 additions & 4 deletions searchlib/src/vespa/searchlib/tensor/hnsw_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,15 @@ struct HnswGraph {
void trim_node_refs_size();

NodeRef get_node_ref(uint32_t docid) const {
return node_refs[docid].load_acquire();
return node_refs[docid].load_relaxed();
}

NodeRef acquire_node_ref(uint32_t docid) const {
return node_refs.acquire_elem_ref(docid).load_acquire();
}

bool still_valid(uint32_t docid, NodeRef node_ref) const {
return node_ref.valid() && (get_node_ref(docid) == node_ref);
return node_ref.valid() && (acquire_node_ref(docid) == node_ref);
}

LevelArrayRef get_level_array(NodeRef node_ref) const {
Expand All @@ -75,6 +79,11 @@ struct HnswGraph {
return get_level_array(node_ref);
}

LevelArrayRef acquire_level_array(uint32_t docid) const {
auto node_ref = acquire_node_ref(docid);
return get_level_array(node_ref);
}

LinkArrayRef get_link_array(LevelArrayRef levels, uint32_t level) const {
if (level < levels.size()) {
auto links_ref = levels[level].load_acquire();
Expand All @@ -90,6 +99,11 @@ struct HnswGraph {
return get_link_array(levels, level);
}

LinkArrayRef acquire_link_array(uint32_t docid, uint32_t level) const {
auto levels = acquire_level_array(docid);
return get_link_array(levels, level);
}

LinkArrayRef get_link_array(NodeRef node_ref, uint32_t level) const {
auto levels = get_level_array(node_ref);
return get_link_array(levels, level);
Expand Down Expand Up @@ -124,7 +138,7 @@ struct HnswGraph {
while (true) {
uint64_t value = get_entry_atomic();
entry.docid = (uint32_t)value;
entry.node_ref = get_node_ref(entry.docid);
entry.node_ref = acquire_node_ref(entry.docid);
entry.level = (int32_t)(value >> 32);
if ((entry.docid == 0)
&& (entry.level == -1)
Expand All @@ -144,7 +158,7 @@ struct HnswGraph {
}
}

size_t size() const { return node_refs.size(); }
size_t size() const { return node_refs_size.load(std::memory_order_acquire); }

struct Histograms {
std::vector<uint32_t> level_histogram;
Expand Down
37 changes: 22 additions & 15 deletions searchlib/src/vespa/searchlib/tensor/hnsw_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ HnswIndex::find_nearest_in_layer(const TypedCells& input, const HnswCandidate& e
while (keep_searching) {
keep_searching = false;
for (uint32_t neighbor_docid : _graph.get_link_array(nearest.node_ref, level)) {
auto neighbor_ref = _graph.get_node_ref(neighbor_docid);
auto neighbor_ref = _graph.acquire_node_ref(neighbor_docid);
double dist = calc_distance(input, neighbor_docid);
if (_graph.still_valid(neighbor_docid, neighbor_ref)
&& dist < nearest.distance)
Expand Down Expand Up @@ -289,7 +289,7 @@ HnswIndex::search_layer_helper(const TypedCells& input, uint32_t neighbors_to_fi
if (neighbor_docid >= doc_id_limit) {
continue;
}
auto neighbor_ref = _graph.get_node_ref(neighbor_docid);
auto neighbor_ref = _graph.acquire_node_ref(neighbor_docid);
if ((! neighbor_ref.valid())
|| ! visited.try_mark(neighbor_docid))
{
Expand Down Expand Up @@ -433,7 +433,7 @@ HnswIndex::prepare_add_document(uint32_t docid,
TypedCells vector,
vespalib::GenerationHandler::Guard read_guard) const
{
uint32_t max_nodes = _graph.node_refs.size();
uint32_t max_nodes = _graph.node_refs_size.load(std::memory_order_acquire);
if (max_nodes < _cfg.min_size_before_two_phase()) {
// the first documents added will do all work in write thread
// to ensure they are linked together:
Expand Down Expand Up @@ -543,7 +543,7 @@ HnswIndex::compact_link_arrays(CompactionSpec compaction_spec, const CompactionS
auto context = _graph.links.compactWorst(compaction_spec, compaction_strategy);
uint32_t doc_id_limit = _graph.node_refs.size();
for (uint32_t doc_id = 1; doc_id < doc_id_limit; ++doc_id) {
EntryRef level_ref = _graph.node_refs[doc_id].load_relaxed();
EntryRef level_ref = _graph.get_node_ref(doc_id);
if (level_ref.valid()) {
vespalib::ArrayRef<AtomicEntryRef> refs(_graph.nodes.get_writable(level_ref));
context->compact(refs);
Expand Down Expand Up @@ -756,7 +756,7 @@ HnswIndex::top_k_candidates(const TypedCells &vector, uint32_t k, const BitVecto
HnswNode
HnswIndex::get_node(uint32_t docid) const
{
auto node_ref = _graph.node_refs[docid].load_acquire();
auto node_ref = _graph.acquire_node_ref(docid);
if (!node_ref.valid()) {
return HnswNode();
}
Expand Down Expand Up @@ -790,15 +790,16 @@ bool
HnswIndex::check_link_symmetry() const
{
bool all_sym = true;
for (size_t docid = 0; docid < _graph.node_refs.size(); ++docid) {
auto node_ref = _graph.node_refs[docid].load_acquire();
size_t doc_id_limit = _graph.size();
for (size_t docid = 0; docid < doc_id_limit; ++docid) {
auto node_ref = _graph.acquire_node_ref(docid);
if (node_ref.valid()) {
auto levels = _graph.nodes.get(node_ref);
uint32_t level = 0;
for (const auto& links_ref : levels) {
auto links = _graph.links.get(links_ref.load_acquire());
for (auto neighbor_docid : links) {
auto neighbor_links = _graph.get_link_array(neighbor_docid, level);
auto neighbor_links = _graph.acquire_link_array(neighbor_docid, level);
if (! has_link_to(neighbor_links, docid)) {
all_sym = false;
LOG(warning, "check_link_symmetry: docid %zu links to %u on level %u, but no backlink",
Expand All @@ -822,20 +823,26 @@ HnswIndex::count_reachable_nodes() const
}
std::vector<bool> visited(_graph.size());
LinkArray found_links;
found_links.push_back(entry.docid);
visited[entry.docid] = true;
if (entry.docid < visited.size()) {
found_links.push_back(entry.docid);
visited[entry.docid] = true;
}
vespalib::steady_time doom = vespalib::steady_clock::now() + MAX_COUNT_DURATION;
while (search_level >= 0) {
for (uint32_t idx = 0; idx < found_links.size(); ++idx) {
if (vespalib::steady_clock::now() > doom) {
return {found_links.size(), false};
}
uint32_t docid = found_links[idx];
auto neighbors = _graph.get_link_array(docid, search_level);
for (uint32_t neighbor : neighbors) {
if (visited[neighbor]) continue;
visited[neighbor] = true;
found_links.push_back(neighbor);
if (docid < visited.size()) {
auto neighbors = _graph.acquire_link_array(docid, search_level);
for (uint32_t neighbor : neighbors) {
if (neighbor >= visited.size() || visited[neighbor]) {
continue;
}
visited[neighbor] = true;
found_links.push_back(neighbor);
}
}
}
--search_level;
Expand Down
6 changes: 3 additions & 3 deletions searchlib/src/vespa/searchlib/tensor/hnsw_index_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ count_valid_link_arrays(const HnswGraph & graph) {
size_t count(0);
size_t num_nodes = graph.node_refs.size();
for (size_t i = 0; i < num_nodes; ++i) {
auto node_ref = graph.node_refs[i].load_acquire();
auto node_ref = graph.get_node_ref(i);
if (node_ref.valid()) {
count += graph.nodes.get(node_ref).size();
}
Expand Down Expand Up @@ -47,11 +47,11 @@ HnswIndexSaver::HnswIndexSaver(const HnswGraph &graph)
_meta_data.nodes.reserve(num_nodes+1);
for (size_t i = 0; i < num_nodes; ++i) {
_meta_data.nodes.push_back(_meta_data.refs.size());
auto node_ref = graph.node_refs[i].load_acquire();
auto node_ref = graph.get_node_ref(i);
if (node_ref.valid()) {
auto levels = graph.nodes.get(node_ref);
for (const auto& links_ref : levels) {
_meta_data.refs.push_back(links_ref.load_acquire());
_meta_data.refs.push_back(links_ref.load_relaxed());
}
}
}
Expand Down

0 comments on commit b07fc57

Please sign in to comment.