Skip to content

Commit

Permalink
Move get_last, row_diff_traverse, row_diff_successor into row_diff_bu…
Browse files Browse the repository at this point in the history
…ilder
  • Loading branch information
adamant-pwn committed Oct 10, 2024
1 parent 4e971da commit 7737e26
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 113 deletions.
102 changes: 93 additions & 9 deletions metagraph/src/annotation/row_diff_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "common/elias_fano/elias_fano_merger.hpp"
#include "common/utils/file_utils.hpp"
#include "common/vectors/bit_vector_sdsl.hpp"
#include "common/vectors/bit_vector_dyn.hpp"
#include "graph/annotated_dbg.hpp"

const uint64_t BLOCK_SIZE = 1 << 25;
Expand All @@ -26,6 +27,7 @@ namespace annot {
using namespace mtg::annot::matrix;
using mtg::common::logger;
using mtg::graph::boss::BOSS;
using node_index = graph::DeBruijnGraph::node_index;
namespace fs = std::filesystem;

using anchor_bv_type = RowDiff<ColumnMajor>::anchor_bv_type;
Expand Down Expand Up @@ -264,6 +266,23 @@ void sum_and_call_counts(const fs::path &dir,
}
}

std::shared_ptr<const bit_vector> get_last(const graph::DeBruijnGraph &graph) {
if (auto* dbg_succ = dynamic_cast<graph::DBGSuccinct const*>(&graph)) {
return std::shared_ptr<const bit_vector>(
std::shared_ptr<const bit_vector>{}, &dbg_succ->get_boss().get_last());
} else {
bit_vector_dyn last_bv(graph.max_index() + 1);
graph.call_nodes([&](node_index v) {
std::pair<char, node_index> last;
graph.call_outgoing_kmers(v, [&](node_index u, char c) {
last = std::max(last, std::pair{c, u});
});
last_bv.set(last.second, true);
});
return std::make_shared<bit_vector_dyn>(std::move(last_bv));
}
}

rd_succ_bv_type route_at_forks(const graph::DeBruijnGraph &graph,
const std::string &rd_succ_filename,
const std::string &count_vectors_dir,
Expand All @@ -282,7 +301,7 @@ rd_succ_bv_type route_at_forks(const graph::DeBruijnGraph &graph,
logger->trace("RowDiff successors will be set to the adjacent nodes with"
" the largest number of labels");

const bit_vector &last = *graph.get_last();
const bit_vector &last = *get_last(graph);
graph::DeBruijnGraph::node_index graph_idx = to_node(0);

std::vector<uint32_t> outgoing_counts;
Expand Down Expand Up @@ -326,6 +345,71 @@ rd_succ_bv_type route_at_forks(const graph::DeBruijnGraph &graph,
return rd_succ;
}

node_index row_diff_successor(const graph::DeBruijnGraph &graph,
node_index node,
const bit_vector &rd_succ) {
if (auto* dbg_succ = dynamic_cast<graph::DBGSuccinct const*>(&graph)) {
return dbg_succ->get_boss().row_diff_successor(node, rd_succ);
} else {
node_index succ = graph::DeBruijnGraph::npos;
graph.adjacent_outgoing_nodes(node, [&](node_index adjacent_node) {
if(rd_succ[adjacent_node]) {
succ = adjacent_node;
}
});
assert(succ != graph::DeBruijnGraph::npos && "a row diff successor must exist");
return succ;
}
}

void row_diff_traverse(const graph::DeBruijnGraph &graph,
size_t num_threads,
size_t max_length,
const bit_vector &rd_succ,
sdsl::bit_vector *terminal) {
if (auto* dbg_succ = dynamic_cast<graph::DBGSuccinct const*>(&graph)) {
return dbg_succ->get_boss().row_diff_traverse(
num_threads, max_length, rd_succ, terminal);
} else {
sdsl::bit_vector visited(graph.max_index() + 1);
auto finalised = visited;
std::vector<size_t> distance(graph.max_index() + 1);
assert(terminal->size() == visited.size());
assert(rd_succ.size() == visited.size());
auto set_terminal = [&](int v) {
distance[v] = 0;
(*terminal)[v] = true;
};
graph.call_nodes([&](node_index v) {
static std::stack<node_index> path;
while (!visited[v]) {
path.push(v);
visited[v] = true;
if (!graph.has_no_outgoing(v)) {
v = row_diff_successor(graph, v, rd_succ);
}
}
// Either a sink, or a cyclic dependency
if (!finalised[v]) {
set_terminal(v);
finalised[v] = true;
}
node_index succ;
while (!empty(path)) {
succ = std::exchange(v, path.top());
if (!finalised[v]) {
distance[v] = distance[succ] + 1;
if (distance[v] == max_length) {
set_terminal(v);
}
finalised[v] = true;
}
path.pop();
}
});
}
}

void build_pred_succ(const graph::DeBruijnGraph &graph,
const std::string &outfbase,
const std::string &count_vectors_dir,
Expand Down Expand Up @@ -366,13 +450,13 @@ void build_pred_succ(const graph::DeBruijnGraph &graph,
// traverse graph in parallel processing blocks of size |BS|
// use static scheduling to make threads process ordered contiguous blocks
#pragma omp parallel for ordered num_threads(num_threads) schedule(dynamic)
for (uint64_t start = 1; start <= graph.max_index(); start += BS) {
std::vector<uint64_t> succ_buf;
for (node_index start = 1; start <= graph.max_index(); start += BS) {
std::vector<node_index> succ_buf;
std::vector<bool> succ_boundary_buf;
std::vector<uint64_t> pred_buf;
std::vector<node_index> pred_buf;
std::vector<bool> pred_boundary_buf;

for (uint64_t i = start; i < std::min(start + BS, graph.max_index() + 1); ++i) {
for (node_index i = start; i < std::min(start + BS, graph.max_index() + 1); ++i) {
bool skip_succ = false, skip_all = false;
if (succinct) { // Legacy code for DBGSuccinct
BOSS::edge_index boss_idx = i;
Expand All @@ -384,7 +468,7 @@ void build_pred_succ(const graph::DeBruijnGraph &graph,
}
auto with_rd_succ = [&](bit_vector const& rd_succ) {
if(!skip_succ) {
auto j = graph.row_diff_successor(i, rd_succ);
auto j = row_diff_successor(graph, i, rd_succ);
succ_buf.push_back(to_row(j));
succ_boundary_buf.push_back(0);
}
Expand All @@ -402,7 +486,7 @@ void build_pred_succ(const graph::DeBruijnGraph &graph,
if (rd_succ.size()) {
with_rd_succ(rd_succ);
} else {
with_rd_succ(*graph.get_last());
with_rd_succ(*get_last(graph));
}
}
succ_boundary_buf.push_back(1);
Expand Down Expand Up @@ -484,11 +568,11 @@ void assign_anchors(const graph::DeBruijnGraph &graph,

if (rd_succ.size()) {
logger->trace("Assigning anchors for RowDiff successors {}...", rd_succ_fname);
graph.row_diff_traverse(num_threads, max_length, rd_succ, &anchors_bv);
row_diff_traverse(graph, num_threads, max_length, rd_succ, &anchors_bv);
} else {
logger->warn("Assigning anchors without chosen RowDiff successors."
" The last outgoing edges will be used for routing.");
graph.row_diff_traverse(num_threads, max_length, *graph.get_last(), &anchors_bv);
row_diff_traverse(graph, num_threads, max_length, *get_last(graph), &anchors_bv);
}
}

Expand Down
66 changes: 0 additions & 66 deletions metagraph/src/graph/representation/base/sequence_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#include <sdsl/int_vector.hpp>

#include "common/logger.hpp"
#include "common/vectors/bit_vector_dyn.hpp"
#include "common/seq_tools/reverse_complement.hpp"
#include "common/threads/threading.hpp"
#include "common/vectors/vector_algorithm.hpp"
Expand Down Expand Up @@ -421,71 +420,6 @@ void DeBruijnGraph::call_unitigs(const CallPath &callback,
::mtg::graph::call_sequences(*this, callback, num_threads, true, min_tip_size, kmers_in_single_form);
}

std::shared_ptr<const bit_vector> DeBruijnGraph::get_last() const {
bit_vector_dyn last_bv(max_index() + 1);
call_nodes([&](node_index v) {
std::pair<char, node_index> last;
call_outgoing_kmers(v, [&](node_index u, char c) {
last = std::max(last, std::pair{c, u});
});
last_bv.set(last.second, true);
});
return std::make_shared<bit_vector_dyn>(std::move(last_bv));
}

void DeBruijnGraph::row_diff_traverse(size_t num_threads,
size_t max_length,
const bit_vector &rd_succ,
sdsl::bit_vector *terminal) const {
sdsl::bit_vector visited(max_index() + 1);
auto finalised = visited;
std::vector<size_t> distance(max_index() + 1);
assert(terminal->size() == visited.size());
assert(rd_succ.size() == visited.size());
auto set_terminal = [&](int v) {
distance[v] = 0;
(*terminal)[v] = true;
};
call_nodes([&](node_index v) {
static std::stack<node_index> path;
while (!visited[v]) {
path.push(v);
visited[v] = true;
if (!has_no_outgoing(v)) {
v = row_diff_successor(v, rd_succ);
}
}
// Either a sink, or a cyclic dependency
if (!finalised[v]) {
set_terminal(v);
finalised[v] = true;
}
node_index succ;
while (!empty(path)) {
succ = std::exchange(v, path.top());
if (!finalised[v]) {
distance[v] = distance[succ] + 1;
if (distance[v] == max_length) {
set_terminal(v);
}
finalised[v] = true;
}
path.pop();
}
});
}

node_index DeBruijnGraph::row_diff_successor(node_index node, const bit_vector &rd_succ) const {
node_index succ = npos;
adjacent_outgoing_nodes(node, [&](node_index adjacent_node) {
if(rd_succ[adjacent_node]) {
succ = adjacent_node;
}
});
assert(succ != npos && "a row diff successor must exist");
return succ;
}

/**
* Traverse graph and iterate over all nodes
*/
Expand Down
9 changes: 0 additions & 9 deletions metagraph/src/graph/representation/base/sequence_graph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,15 +246,6 @@ class DeBruijnGraph : public SequenceGraph {

// Call all nodes that have no incoming edges
virtual void call_source_nodes(const std::function<void(node_index)> &callback) const;

virtual std::shared_ptr<const bit_vector> get_last() const;

virtual void row_diff_traverse(size_t num_threads,
size_t max_length,
const bit_vector &rd_succ,
sdsl::bit_vector *terminal) const;

virtual node_index row_diff_successor(node_index node, const bit_vector &rd_succ) const;
};


Expand Down
19 changes: 0 additions & 19 deletions metagraph/src/graph/representation/succinct/dbg_succinct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -525,25 +525,6 @@ ::call_source_nodes(const std::function<void(node_index)> &callback) const {
});
}

std::shared_ptr<const bit_vector> DBGSuccinct
::get_last() const {
return std::shared_ptr<const bit_vector>(std::shared_ptr<const bit_vector>{}, &get_boss().get_last());
}

void DBGSuccinct
::row_diff_traverse(size_t num_threads,
size_t max_length,
const bit_vector &rd_succ,
sdsl::bit_vector *terminal) const {
return get_boss().row_diff_traverse(num_threads, max_length, rd_succ, terminal);
}

node_index DBGSuccinct
::row_diff_successor(node_index node, const bit_vector &rd_succ) const {
return get_boss().row_diff_successor(node, rd_succ);
}


size_t DBGSuccinct::outdegree(node_index node) const {
assert(is_valid(node));

Expand Down
10 changes: 0 additions & 10 deletions metagraph/src/graph/representation/succinct/dbg_succinct.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,16 +183,6 @@ class DBGSuccinct : public DeBruijnGraph {
node_index select_node(uint64_t boss_index) const;
uint64_t rank_node(node_index kmer_index) const;

virtual std::shared_ptr<const bit_vector> get_last() const override final;

virtual void row_diff_traverse(size_t num_threads,
size_t max_length,
const bit_vector &rd_succ,
sdsl::bit_vector *terminal) const override final;

virtual node_index row_diff_successor(node_index node,
const bit_vector &rd_succ) const override final;

void initialize_bloom_filter_from_fpr(double false_positive_rate,
uint32_t max_num_hash_functions = -1);

Expand Down

0 comments on commit 7737e26

Please sign in to comment.