Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mark rd successors only in forks #308

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions metagraph/src/annotation/binary_matrix/row_diff/row_diff.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,9 @@ BinaryMatrix::SetBitPositions RowDiff<BaseMatrix>::get_row(Row row) const {
uint64_t boss_edge = graph_->kmer_to_boss_index(
graph::AnnotatedSequenceGraph::anno_to_graph_index(row));
const graph::boss::BOSS &boss = graph_->get_boss();
const bit_vector &rd_succ = fork_succ_.size() ? fork_succ_ : boss.get_last();

while (!anchor_[row]) {
boss_edge = boss.row_diff_successor(boss_edge, rd_succ);
boss_edge = boss.row_diff_successor(boss_edge, fork_succ_);

row = graph::AnnotatedSequenceGraph::graph_to_anno_index(
graph_->boss_to_kmer_index(boss_edge));
Expand Down Expand Up @@ -186,7 +185,6 @@ RowDiff<BaseMatrix>::get_rows(const std::vector<Row> &row_ids) const {
std::vector<std::vector<size_t>> rd_paths_trunc(row_ids.size());

const graph::boss::BOSS &boss = graph_->get_boss();
const bit_vector &rd_succ = fork_succ_.size() ? fork_succ_ : boss.get_last();

for (size_t i = 0; i < row_ids.size(); ++i) {
Row row = row_ids[i];
Expand All @@ -213,7 +211,7 @@ RowDiff<BaseMatrix>::get_rows(const std::vector<Row> &row_ids) const {
if (anchor_[row])
break;

boss_edge = boss.row_diff_successor(boss_edge, rd_succ);
boss_edge = boss.row_diff_successor(boss_edge, fork_succ_);
}
}

Expand Down
47 changes: 28 additions & 19 deletions metagraph/src/annotation/row_diff_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ void sum_and_call_counts(const fs::path &dir,
}

rd_succ_bv_type route_at_forks(const graph::DBGSuccinct &graph,
const sdsl::bit_vector &dummy,
const std::string &rd_succ_filename,
const std::string &count_vectors_dir,
const std::string &row_count_extension) {
Expand All @@ -226,16 +227,27 @@ rd_succ_bv_type route_at_forks(const graph::DBGSuccinct &graph,

sdsl::bit_vector rd_succ_bv(last.size(), false);

uint64_t num_forks = 0;
uint64_t num_redirected = 0;
sum_and_call_counts(count_vectors_dir, row_count_extension, "row counts",
[&](int32_t count) {
// TODO: skip single outgoing
if (dummy[graph.kmer_to_boss_index(graph_idx)]) {
assert(!outgoing_counts.size());
graph_idx++;
return;
}
outgoing_counts.push_back(count);
if (last[graph.kmer_to_boss_index(graph_idx)]) {
// pick the node with the largest count
size_t max_pos = std::max_element(outgoing_counts.rbegin(),
outgoing_counts.rend())
- outgoing_counts.rbegin();
rd_succ_bv[graph.kmer_to_boss_index(graph_idx - max_pos)] = true;
if (outgoing_counts.size() > 1) {
num_forks++;
// pick the node with the largest count
size_t max_pos = std::max_element(outgoing_counts.rbegin(),
outgoing_counts.rend())
- outgoing_counts.rbegin();
if (max_pos)
num_redirected++;
rd_succ_bv[graph.kmer_to_boss_index(graph_idx - max_pos)] = true;
}
outgoing_counts.resize(0);
}
graph_idx++;
Expand All @@ -248,6 +260,9 @@ rd_succ_bv_type route_at_forks(const graph::DBGSuccinct &graph,
exit(1);
}

logger->info("Number of row-diff successors redirected at forks: {} / {}",
num_redirected, num_forks);

rd_succ = rd_succ_bv_type(std::move(rd_succ_bv));

} else {
Expand Down Expand Up @@ -286,14 +301,14 @@ void build_pred_succ(const std::string &graph_fname,
std::exit(1);
}

// assign row-diff successors at forks
rd_succ_bv_type rd_succ = route_at_forks(graph, outfbase + kRowDiffForkSuccExt,
count_vectors_dir, row_count_extension);

const BOSS &boss = graph.get_boss();

sdsl::bit_vector dummy = boss.mark_all_dummy_edges(num_threads);

// assign row-diff successors at forks
rd_succ_bv_type rd_succ = route_at_forks(graph, dummy, outfbase + kRowDiffForkSuccExt,
count_vectors_dir, row_count_extension);

// create the succ/pred files, indexed using annotation indices
uint32_t width = sdsl::bits::hi(graph.num_nodes()) + 1;
sdsl::int_vector_buffer<> succ(outfbase + ".succ", std::ios::out, BUFFER_SIZE, width);
Expand All @@ -317,20 +332,14 @@ void build_pred_succ(const std::string &graph_fname,
for (uint64_t i = start; i < std::min(start + BS, graph.num_nodes() + 1); ++i) {
BOSS::edge_index boss_idx = graph.kmer_to_boss_index(i);
if (!dummy[boss_idx]) {
const BOSS::TAlphabet d = boss.get_W(boss_idx) % boss.alph_size;
assert(d && "must not be dummy");
BOSS::edge_index next = boss.fwd(boss_idx, d);
assert(next);
BOSS::edge_index next = boss.row_diff_successor(boss_idx, rd_succ);
if (!dummy[next]) {
while (rd_succ.size() && !rd_succ[next]) {
next--;
assert(!boss.get_last(next));
}
succ_buf.push_back(to_row(graph.boss_to_kmer_index(next)));
succ_boundary_buf.push_back(0);
}
// compute predecessors only for row-diff successors
if (rd_succ.size() ? rd_succ[boss_idx] : boss.get_last(boss_idx)) {
if (rd_succ.size() ? (boss.is_single_outgoing(boss_idx) || rd_succ[boss_idx])
: boss.get_last(boss_idx)) {
BOSS::TAlphabet d = boss.get_node_last_value(boss_idx);
BOSS::edge_index back_idx = boss.bwd(boss_idx);
boss.call_incoming_to_target(back_idx, d,
Expand Down
2 changes: 1 addition & 1 deletion metagraph/src/graph/representation/succinct/boss.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2718,7 +2718,7 @@ void traverse_rd_path_backward(const BOSS &boss,
// AAAX - AAX$
// ^^^^
// AAAY - ****
if (!rd_succ[edge])
if (!boss.is_single_outgoing(edge) && !rd_succ[edge])
continue;

// |edge| is the row-diff successor. Thus, it is part of a diff
Expand Down
2 changes: 1 addition & 1 deletion metagraph/src/graph/representation/succinct/boss.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ class BOSS {
// make one traversal step
edge = fwd(edge, d);
// pick the row-diff successor
if (!get_last(edge - 1)) {
if (rd_succ.size() && !get_last(edge - 1)) {
while (!rd_succ[edge]) {
edge--;
assert(!get_last(edge) && "a row-diff successor must exist");
Expand Down