From 7c7e1390258fc313d5596a57da32401b1d57cf09 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Tue, 8 Oct 2024 11:41:18 +0200 Subject: [PATCH] test --- metagraph/src/cli/query.cpp | 5 +- .../src/graph/alignment/annotation_buffer.cpp | 1 + metagraph/src/graph/annotated_dbg.cpp | 101 +++++++++++---- metagraph/src/graph/annotated_dbg.hpp | 7 +- .../graph/representation/canonical_dbg.cpp | 16 --- .../graph/representation/hash/dbg_sshash.cpp | 118 +++++++++++++++--- .../graph/representation/hash/dbg_sshash.hpp | 5 +- .../tests/annotation/test_annotated_dbg.cpp | 33 ++--- .../annotation/test_annotated_dbg_helpers.cpp | 2 + .../tests/graph/all/test_dbg_helpers.cpp | 79 +++++++++++- .../tests/graph/all/test_dbg_helpers.hpp | 8 ++ metagraph/tests/graph/test_canonical_dbg.cpp | 3 +- metagraph/tests/graph/test_dbg_canonical.cpp | 3 +- 13 files changed, 303 insertions(+), 78 deletions(-) diff --git a/metagraph/src/cli/query.cpp b/metagraph/src/cli/query.cpp index cb7d2e35c9..fcb41c61b5 100644 --- a/metagraph/src/cli/query.cpp +++ b/metagraph/src/cli/query.cpp @@ -955,8 +955,9 @@ construct_query_graph(const AnnotatedDBG &anno_graph, #pragma omp parallel for num_threads(num_threads) for (size_t i = 0; i < contigs.size(); ++i) { contigs[i].second.reserve(contigs[i].first.length() - graph_init->get_k() + 1); - full_dbg.map_to_nodes(contigs[i].first, - [&](node_index node) { contigs[i].second.push_back(node); }); + call_annotated_nodes_offsets(full_dbg, contigs[i].first, [&](node_index node, int64_t) { + contigs[i].second.push_back(node); + }); } logger->trace("[Query graph construction] Contigs mapped to the full graph in {} sec", timer.elapsed()); diff --git a/metagraph/src/graph/alignment/annotation_buffer.cpp b/metagraph/src/graph/alignment/annotation_buffer.cpp index 4020f312a7..16d501385b 100644 --- a/metagraph/src/graph/alignment/annotation_buffer.cpp +++ b/metagraph/src/graph/alignment/annotation_buffer.cpp @@ -48,6 +48,7 @@ void AnnotationBuffer::fetch_queued_annotations() { for (const auto &path : queued_paths_) { std::vector base_path; + std::vector base_path_offsets; if (base_graph->get_mode() == DeBruijnGraph::CANONICAL) { // TODO: avoid this call of spell_path std::string query = spell_path(graph_, path); diff --git a/metagraph/src/graph/annotated_dbg.cpp b/metagraph/src/graph/annotated_dbg.cpp index 430365365a..6fc6ea257c 100644 --- a/metagraph/src/graph/annotated_dbg.cpp +++ b/metagraph/src/graph/annotated_dbg.cpp @@ -6,6 +6,7 @@ #include "annotation/representation/row_compressed/annotate_row_compressed.hpp" #include "annotation/int_matrix/base/int_matrix.hpp" #include "graph/representation/canonical_dbg.hpp" +#include "graph/representation/hash/dbg_sshash.hpp" #include "common/aligned_vector.hpp" #include "common/vectors/vector_algorithm.hpp" #include "common/vector_map.hpp" @@ -23,6 +24,21 @@ using Column = mtg::annot::matrix::BinaryMatrix::Column; typedef AnnotatedDBG::Label Label; typedef std::pair StringCountPair; +void call_annotated_nodes_offsets(const SequenceGraph &graph, + std::string_view sequence, + const std::function &callback) { + if (const auto *sshash = dynamic_cast(&graph)) { + if (sshash->is_monochromatic()) { + sshash->map_to_contigs_with_rc(sequence, [&](SequenceGraph::node_index i, int64_t offset, bool) { + callback(i, offset); + }); + return; + } + } + + graph.map_to_nodes(sequence, [&](SequenceGraph::node_index i) { callback(i, 0); }); +} + AnnotatedSequenceGraph ::AnnotatedSequenceGraph(std::shared_ptr graph, @@ -46,10 +62,9 @@ ::annotate_sequence(std::string_view sequence, std::vector indices; indices.reserve(sequence.size()); - - graph_->map_to_nodes(sequence, [&](node_index i) { + call_annotated_nodes_offsets(*graph_, sequence, [&](node_index i, int64_t) { if (i > 0) - indices.push_back(graph_to_anno_index(i)); + indices.emplace_back(graph_to_anno_index(i)); }); if (!indices.size()) @@ -80,9 +95,9 @@ ::annotate_sequences(const std::vector if (!indices.capacity()) indices.reserve(data[t].first.size()); - graph_->map_to_nodes(data[t].first, [&](node_index i) { + call_annotated_nodes_offsets(*graph_, data[t].first, [&](node_index i, int64_t) { if (i > 0) - indices.push_back(graph_to_anno_index(i)); + indices.emplace_back(graph_to_anno_index(i)); }); } @@ -117,10 +132,10 @@ void AnnotatedDBG::add_kmer_counts(std::string_view sequence, indices.reserve(sequence.size() - dbg_.get_k() + 1); size_t end = 0; - graph_->map_to_nodes(sequence, [&](node_index i) { + call_annotated_nodes_offsets(dbg_, sequence, [&](node_index i, int64_t) { // only insert indexes for matched k-mers and shift counts accordingly if (i > 0) { - indices.push_back(graph_to_anno_index(i)); + indices.emplace_back(graph_to_anno_index(i)); kmer_counts[indices.size() - 1] = kmer_counts[end++]; } }); @@ -139,15 +154,22 @@ void AnnotatedDBG::add_kmer_coord(std::string_view sequence, if (sequence.size() < dbg_.get_k()) return; - std::vector indices = map_to_nodes(dbg_, sequence); + std::vector indices; + std::vector offsets; + call_annotated_nodes_offsets(dbg_, sequence, [&](node_index i, int64_t offset) { + indices.emplace_back(i); + offsets.emplace_back(offset); + }); std::lock_guard lock(mutex_); + auto it = offsets.begin(); for (node_index i : indices) { // only insert coordinates for matched k-mers and increment the coordinates if (i > 0) - annotator_->add_label_coord(graph_to_anno_index(i), labels, coord); + annotator_->add_label_coord(graph_to_anno_index(i), labels, coord - *it); coord++; + ++it; } } @@ -156,10 +178,17 @@ void AnnotatedDBG::add_kmer_coords( assert(check_compatibility()); std::vector> ids; + std::vector> offsets; ids.reserve(data.size()); for (const auto &[sequence, labels, _] : data) { - if (sequence.size() >= dbg_.get_k()) - ids.push_back(map_to_nodes(dbg_, sequence)); + if (sequence.size() >= dbg_.get_k()) { + auto &id = ids.emplace_back(); + auto &offset = offsets.emplace_back(); + call_annotated_nodes_offsets(dbg_, sequence, [&](node_index i, int64_t o) { + id.emplace_back(i); + offset.emplace_back(o); + }); + } } std::lock_guard lock(mutex_); @@ -168,11 +197,13 @@ void AnnotatedDBG::add_kmer_coords( const auto &labels = std::get<1>(data[t]); uint64_t coord = std::get<2>(data[t]); + auto it = offsets[t].begin(); for (node_index i : ids[t]) { // only insert coordinates for matched k-mers and increment the coordinates if (i > 0) - annotator_->add_label_coord(graph_to_anno_index(i), labels, coord); + annotator_->add_label_coord(graph_to_anno_index(i), labels, coord - *it); coord++; + ++it; } } } @@ -200,10 +231,10 @@ void AnnotatedDBG::annotate_kmer_coords( coords[last].reserve(sequence.size() - dbg_.get_k() + 1); } - graph_->map_to_nodes(sequence, [&](node_index i) { + call_annotated_nodes_offsets(*graph_, sequence, [&](node_index i, int64_t o) { if (i > 0) { ids[last].push_back(graph_to_anno_index(i)); - coords[last].emplace_back(graph_to_anno_index(i), coord); + coords[last].emplace_back(graph_to_anno_index(i), coord - o); } coord++; }); @@ -238,7 +269,7 @@ std::vector