From 39f671364b9095c24a4f81a4148aebba3b076107 Mon Sep 17 00:00:00 2001 From: Harun Mustafa Date: Thu, 21 Mar 2024 13:30:38 +0100 Subject: [PATCH] fixes --- metagraph/CMakeLists.txt | 3 +- metagraph/src/cli/build.cpp | 4 +- .../graph/representation/canonical_dbg.cpp | 22 +++ .../graph/representation/hash/dbg_sshash.cpp | 142 +++++++++++++----- .../graph/representation/hash/dbg_sshash.hpp | 19 ++- metagraph/tests/graph/all/test_dbg_basic.cpp | 74 ++++----- .../tests/graph/all/test_dbg_helpers.cpp | 18 ++- 7 files changed, 190 insertions(+), 92 deletions(-) diff --git a/metagraph/CMakeLists.txt b/metagraph/CMakeLists.txt index 6d54d8e443..6e80582997 100644 --- a/metagraph/CMakeLists.txt +++ b/metagraph/CMakeLists.txt @@ -270,6 +270,8 @@ include_directories( external-libraries/zlib external-libraries/sdust external-libraries/simde-no-tests + external-libraries/sshash/include + external-libraries/sshash/external/pthash/external/essentials/include ${PROJECT_SOURCE_DIR}/src ) @@ -320,7 +322,6 @@ IF(APPLE) ENDIF() add_subdirectory(external-libraries/spdlog) add_subdirectory(external-libraries/sshash SYSTEM) -target_include_directories(sshash_static PUBLIC SYSTEM external-libraries/sshash/include) target_compile_options(test_alphabet PRIVATE -Wno-strict-aliasing) add_subdirectory(external-libraries/DYNAMIC) add_subdirectory(external-libraries/zlib) diff --git a/metagraph/src/cli/build.cpp b/metagraph/src/cli/build.cpp index 40e1cd74e2..d8851855cb 100644 --- a/metagraph/src/cli/build.cpp +++ b/metagraph/src/cli/build.cpp @@ -252,12 +252,12 @@ int build_graph(Config *config) { } else if (config->graph_type == Config::GraphType::SSHASH){ - graph.reset(new DBGSSHash(files.at(0), config->k)); + graph.reset(new DBGSSHash(files.at(0), config->k, config->graph_mode)); if(files.size() > 1){ logger->error("Only one file for SSHash"); exit(1); } - + }else { //slower method switch (config->graph_type) { diff --git a/metagraph/src/graph/representation/canonical_dbg.cpp b/metagraph/src/graph/representation/canonical_dbg.cpp index 22104c86e7..f04a84f7df 100644 --- a/metagraph/src/graph/representation/canonical_dbg.cpp +++ b/metagraph/src/graph/representation/canonical_dbg.cpp @@ -3,6 +3,7 @@ #include "common/seq_tools/reverse_complement.hpp" #include "common/logger.hpp" #include "graph/representation/succinct/dbg_succinct.hpp" +#include "graph/representation/hash/dbg_sshash.hpp" namespace mtg { @@ -62,6 +63,13 @@ ::map_to_nodes_sequentially(std::string_view sequence, std::vector path; path.reserve(sequence.size() - get_k() + 1); + if (const auto sshash = std::dynamic_pointer_cast(graph_)) { + sshash->map_to_nodes_with_rc(sequence, [&](node_index node, bool orientation) { + callback(node && orientation ? reverse_complement(node) : node); + }, terminate); + return; + } + // map until the first mismatch bool stop = false; graph_->map_to_nodes_sequentially(sequence, @@ -171,6 +179,13 @@ void CanonicalDBG::call_outgoing_kmers(node_index node, return; } + if (const auto sshash = std::dynamic_pointer_cast(graph_)) { + sshash->call_outgoing_kmers_with_rc(node, [&](node_index next, char c, bool orientation) { + callback(orientation ? reverse_complement(next) : next, c); + }); + return; + } + // includes `$` for DBGSuccinct const auto &alphabet = graph_->alphabet(); @@ -257,6 +272,13 @@ void CanonicalDBG::call_incoming_kmers(node_index node, return; } + if (const auto sshash = std::dynamic_pointer_cast(graph_)) { + sshash->call_incoming_kmers_with_rc(node, [&](node_index prev, char c, bool orientation) { + callback(orientation ? reverse_complement(prev) : prev, c); + }); + return; + } + // includes `$` for DBGSuccinct const auto &alphabet = graph_->alphabet(); diff --git a/metagraph/src/graph/representation/hash/dbg_sshash.cpp b/metagraph/src/graph/representation/hash/dbg_sshash.cpp index 8c52d641fb..9a04f7555e 100644 --- a/metagraph/src/graph/representation/hash/dbg_sshash.cpp +++ b/metagraph/src/graph/representation/hash/dbg_sshash.cpp @@ -10,18 +10,19 @@ DBGSSHash::DBGSSHash(size_t k):k_(k) { dict_ = std::make_unique(); } -DBGSSHash::DBGSSHash(std::string const& input_filename, size_t k):k_(k){ +DBGSSHash::DBGSSHash(std::string const& input_filename, size_t k, Mode mode):k_(k), mode_(mode) { sshash::build_configuration build_config; build_config.k = k;// // quick fix for value of m... k/2 but odd build_config.m = (k_+1)/2; + build_config.num_threads = get_num_threads(); if(build_config.m % 2 == 0) build_config.m++; dict_ = std::make_unique(); dict_->build(input_filename, build_config); } std::string DBGSSHash::file_extension() const { return kExtension; } size_t DBGSSHash::get_k() const { return k_; } - DeBruijnGraph::Mode DBGSSHash::get_mode() const { return BASIC; } + DeBruijnGraph::Mode DBGSSHash::get_mode() const { return mode_; } void DBGSSHash::add_sequence(std::string_view sequence, const std::function &on_insertion) { @@ -39,15 +40,32 @@ void DBGSSHash::map_to_nodes(std::string_view sequence, void DBGSSHash ::map_to_nodes_sequentially(std::string_view sequence, const std::function &callback, const std::function &terminate) const { + if (terminate() || sequence.size() < k_) + return; + + auto uint_kmer = sshash::util::string_to_uint_kmer(sequence.data(), k_ - 1) << 2; + for (size_t i = k_ - 1; i < sequence.size() && !terminate(); ++i) { + uint_kmer = (uint_kmer >> 2) + (sshash::util::char_to_uint(sequence[i]) << (2 * (k_ - 1))); + callback(dict_->lookup_uint(uint_kmer, false) + 1); + } +} + +void DBGSSHash ::map_to_nodes_with_rc(std::string_view sequence, + const std::function &callback, + const std::function &terminate) const { + sshash::streaming_query_regular_parsing streamer(dict_.get()); + streamer.start(); for (size_t i = 0; i + k_ <= sequence.size() && !terminate(); ++i) { - callback(kmer_to_node(sequence.substr(i, k_))); + const char *kmer = sequence.data() + i; + auto res = streamer.lookup_advanced(kmer); + callback(res.kmer_id + 1, res.kmer_orientation); } } DBGSSHash::node_index DBGSSHash::traverse(node_index node, char next_char) const { - std::string kmer = DBGSSHash::get_node_sequence(node); - sshash::neighbourhood nb = dict_->kmer_forward_neighbours(&kmer[0]); - uint64_t ssh_idx = -1; + std::string kmer = DBGSSHash::get_node_sequence(node); + sshash::neighbourhood nb = dict_->kmer_forward_neighbours(&kmer[0], false); + uint64_t ssh_idx = -1; switch (next_char) { case 'A': ssh_idx = nb.forward_A.kmer_id; @@ -69,8 +87,8 @@ DBGSSHash::node_index DBGSSHash::traverse(node_index node, char next_char) const DBGSSHash::node_index DBGSSHash::traverse_back(node_index node, char prev_char) const { std::string kmer = DBGSSHash::get_node_sequence(node); - sshash::neighbourhood nb = dict_->kmer_backward_neighbours(&kmer[0]); - uint64_t ssh_idx = -1; + sshash::neighbourhood nb = dict_->kmer_backward_neighbours(&kmer[0], false); + uint64_t ssh_idx = -1; switch (prev_char) { case 'A': ssh_idx = nb.backward_A.kmer_id; @@ -94,7 +112,6 @@ void DBGSSHash ::adjacent_outgoing_nodes(node_index node, const std::function &callback) const { assert(node > 0 && node <= num_nodes()); call_outgoing_kmers(node, [&](auto child, char) { callback(child); }); - } void DBGSSHash ::adjacent_incoming_nodes(node_index node, @@ -107,13 +124,19 @@ void DBGSSHash ::call_outgoing_kmers(node_index node, const OutgoingEdgeCallback &callback) const { assert(node > 0 && node <= num_nodes()); - auto prefix = get_node_sequence(node).substr(1); + std::string kmer = DBGSSHash::get_node_sequence(node); + sshash::neighbourhood nb = dict_->kmer_forward_neighbours(kmer.c_str(), false); + if (nb.forward_A.kmer_id != sshash::constants::invalid_uint64) + callback(nb.forward_A.kmer_id + 1, 'A'); + + if (nb.forward_C.kmer_id != sshash::constants::invalid_uint64) + callback(nb.forward_C.kmer_id + 1, 'C'); - for (char c : alphabet_) { - auto next = kmer_to_node(prefix + c); - if (next != npos) - callback(next, c); - } + if (nb.forward_G.kmer_id != sshash::constants::invalid_uint64) + callback(nb.forward_G.kmer_id + 1, 'G'); + + if (nb.forward_T.kmer_id != sshash::constants::invalid_uint64) + callback(nb.forward_T.kmer_id + 1, 'T'); } @@ -121,23 +144,67 @@ void DBGSSHash ::call_incoming_kmers(node_index node, const IncomingEdgeCallback &callback) const { assert(node > 0 && node <= num_nodes()); - std::string suffix = get_node_sequence(node); - suffix.pop_back(); + std::string kmer = DBGSSHash::get_node_sequence(node); + sshash::neighbourhood nb = dict_->kmer_backward_neighbours(kmer.c_str(), false); + if (nb.backward_A.kmer_id != sshash::constants::invalid_uint64) + callback(nb.backward_A.kmer_id + 1, 'A'); - for (char c : alphabet_) { - auto prev = kmer_to_node(c + suffix); - if (prev != npos) - callback(prev, c); - } + if (nb.backward_C.kmer_id != sshash::constants::invalid_uint64) + callback(nb.backward_C.kmer_id + 1, 'C'); + + if (nb.backward_G.kmer_id != sshash::constants::invalid_uint64) + callback(nb.backward_G.kmer_id + 1, 'G'); + + if (nb.backward_T.kmer_id != sshash::constants::invalid_uint64) + callback(nb.backward_T.kmer_id + 1, 'T'); +} + +void DBGSSHash ::call_outgoing_kmers_with_rc(node_index node, + const std::function &callback) const { + assert(node > 0 && node <= num_nodes()); + + std::string kmer = DBGSSHash::get_node_sequence(node); + sshash::neighbourhood nb = dict_->kmer_forward_neighbours(kmer.c_str(), true); + if (nb.forward_A.kmer_id != sshash::constants::invalid_uint64) + callback(nb.forward_A.kmer_id + 1, 'A', nb.forward_A.kmer_orientation); + + if (nb.forward_C.kmer_id != sshash::constants::invalid_uint64) + callback(nb.forward_C.kmer_id + 1, 'C', nb.forward_C.kmer_orientation); + + if (nb.forward_G.kmer_id != sshash::constants::invalid_uint64) + callback(nb.forward_G.kmer_id + 1, 'G', nb.forward_G.kmer_orientation); + + if (nb.forward_T.kmer_id != sshash::constants::invalid_uint64) + callback(nb.forward_T.kmer_id + 1, 'T', nb.forward_T.kmer_orientation); +} + + +void DBGSSHash ::call_incoming_kmers_with_rc(node_index node, + const std::function &callback) const { + assert(node > 0 && node <= num_nodes()); + + std::string kmer = DBGSSHash::get_node_sequence(node); + sshash::neighbourhood nb = dict_->kmer_backward_neighbours(kmer.c_str(), true); + if (nb.backward_A.kmer_id != sshash::constants::invalid_uint64) + callback(nb.backward_A.kmer_id + 1, 'A', nb.backward_A.kmer_orientation); + + if (nb.backward_C.kmer_id != sshash::constants::invalid_uint64) + callback(nb.backward_C.kmer_id + 1, 'C', nb.backward_C.kmer_orientation); + + if (nb.backward_G.kmer_id != sshash::constants::invalid_uint64) + callback(nb.backward_G.kmer_id + 1, 'G', nb.backward_G.kmer_orientation); + + if (nb.backward_T.kmer_id != sshash::constants::invalid_uint64) + callback(nb.backward_T.kmer_id + 1, 'T', nb.backward_T.kmer_orientation); } size_t DBGSSHash::outdegree(node_index node) const { std::string kmer = DBGSSHash::get_node_sequence(node); - sshash::neighbourhood nb = dict_->kmer_forward_neighbours(&kmer[0]); - size_t out_deg = bool(nb.forward_A.kmer_id + 1) // change to loop? - + bool(nb.forward_C.kmer_id + 1) - + bool(nb.forward_G.kmer_id + 1) - + bool(nb.forward_T.kmer_id + 1); + sshash::neighbourhood nb = dict_->kmer_forward_neighbours(&kmer[0], false); + size_t out_deg = (nb.forward_A.kmer_id != sshash::constants::invalid_uint64) // change to loop? + + (nb.forward_C.kmer_id != sshash::constants::invalid_uint64) + + (nb.forward_G.kmer_id != sshash::constants::invalid_uint64) + + (nb.forward_T.kmer_id != sshash::constants::invalid_uint64); return out_deg; } @@ -151,11 +218,11 @@ bool DBGSSHash::has_multiple_outgoing(node_index node) const { size_t DBGSSHash::indegree(node_index node) const { std::string kmer = DBGSSHash::get_node_sequence(node); - sshash::neighbourhood nb = dict_->kmer_backward_neighbours(&kmer[0]); - size_t in_deg = bool(nb.backward_A.kmer_id + 1) // change to loop? - + bool(nb.backward_C.kmer_id + 1) - + bool(nb.backward_G.kmer_id + 1) - + bool(nb.backward_T.kmer_id + 1); + sshash::neighbourhood nb = dict_->kmer_backward_neighbours(kmer.c_str(), false); + size_t in_deg = (nb.backward_A.kmer_id != sshash::constants::invalid_uint64) // change to loop? + + (nb.backward_C.kmer_id != sshash::constants::invalid_uint64) + + (nb.backward_G.kmer_id != sshash::constants::invalid_uint64) + + (nb.backward_T.kmer_id != sshash::constants::invalid_uint64); return in_deg; } @@ -175,8 +242,15 @@ void DBGSSHash::call_kmers( } DBGSSHash::node_index DBGSSHash::kmer_to_node(std::string_view kmer) const { - uint64_t ssh_idx = dict_->lookup(kmer.begin(), false); - return ssh_idx + 1; + return num_nodes() ? dict_->lookup(kmer.begin(), false) + 1 : npos; +} + +std::pair DBGSSHash::kmer_to_node_with_rc(std::string_view kmer) const { + if (!num_nodes()) + return std::make_pair(npos, false); + + auto res = dict_->lookup_advanced(kmer.begin(), true); + return std::make_pair(res.kmer_id + 1, res.kmer_orientation); } std::string DBGSSHash::get_node_sequence(node_index node) const { diff --git a/metagraph/src/graph/representation/hash/dbg_sshash.hpp b/metagraph/src/graph/representation/hash/dbg_sshash.hpp index 910ed57d5d..70e0d4b319 100644 --- a/metagraph/src/graph/representation/hash/dbg_sshash.hpp +++ b/metagraph/src/graph/representation/hash/dbg_sshash.hpp @@ -16,7 +16,7 @@ namespace graph { class DBGSSHash : public DeBruijnGraph { public: explicit DBGSSHash(size_t k); - DBGSSHash(std::string const& input_filename, size_t k); + DBGSSHash(std::string const& input_filename, size_t k, Mode mode = BASIC); ~DBGSSHash(); @@ -35,6 +35,11 @@ class DBGSSHash : public DeBruijnGraph { const std::function &callback, const std::function &terminate = []() { return false; }) const override; + void map_to_nodes_with_rc( + std::string_view sequence, + const std::function &callback, + const std::function &terminate = []() { return false; }) const; + void adjacent_outgoing_nodes(node_index node, const std::function &callback) const override; @@ -74,22 +79,32 @@ class DBGSSHash : public DeBruijnGraph { bool has_single_incoming(node_index) const override; node_index kmer_to_node(std::string_view kmer) const override; - + std::pair kmer_to_node_with_rc(std::string_view kmer) const; + void call_outgoing_kmers(node_index node, const OutgoingEdgeCallback &callback) const override; + void call_outgoing_kmers_with_rc(node_index node, + const std::function &callback) const; + void call_incoming_kmers(node_index node, const IncomingEdgeCallback &callback) const override; + void call_incoming_kmers_with_rc(node_index node, + const std::function &callback) const; + bool operator==(const DeBruijnGraph &other) const override; const std::string &alphabet() const override; + const sshash::dictionary& data() const { return *dict_; } + private: static const std::string alphabet_; std::unique_ptr dict_; size_t k_; + Mode mode_; }; } // namespace graph diff --git a/metagraph/tests/graph/all/test_dbg_basic.cpp b/metagraph/tests/graph/all/test_dbg_basic.cpp index c2d39ad78c..cba2d4bd36 100644 --- a/metagraph/tests/graph/all/test_dbg_basic.cpp +++ b/metagraph/tests/graph/all/test_dbg_basic.cpp @@ -33,10 +33,6 @@ TYPED_TEST(DeBruijnGraphTest, GraphDefaultConstructor) { } TYPED_TEST(DeBruijnGraphTest, InitializeEmpty) { - if constexpr(std::is_same_v) { - common::logger->warn("Test disabled for DBGSSHash"); - return; - } auto graph = build_graph(2); EXPECT_EQ(0u, graph->num_nodes()); @@ -48,10 +44,6 @@ TYPED_TEST(DeBruijnGraphTest, InitializeEmpty) { } TYPED_TEST(DeBruijnGraphTest, SerializeEmpty) { - if constexpr(std::is_same_v) { - common::logger->warn("Test disabled for DBGSSHash"); - return; - } { auto graph = build_graph(12); ASSERT_EQ(0u, graph->num_nodes()); @@ -187,16 +179,6 @@ TYPED_TEST(DeBruijnGraphTest, Weighted) { } TYPED_TEST(DeBruijnGraphTest, ReverseComplement) { - if constexpr(std::is_same_v) { - common::logger->warn("Test disabled for DBGSSHash"); - return; - } - auto graph1 = build_graph(12, { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAA" }); - auto graph2 = build_graph(12, { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAA", - "TTTTTTTTTTTTTTTTTTTTTTTTTTTTT" }); - - EXPECT_EQ(graph1->num_nodes() * 2, graph2->num_nodes()); - auto graph = build_graph(12, { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "TTTTTTTTTTTTTTTTTTTTTTTTTTTTT", "CATGTACTAGCTGATCGTAGCTAGCTAGC" }); @@ -207,6 +189,16 @@ TYPED_TEST(DeBruijnGraphTest, ReverseComplement) { EXPECT_FALSE(graph->find("GCTAGCTAGCTACGATCAGCTAGTACATG")); EXPECT_FALSE(graph->find("CATGTTTTTTTAATATATATATTTTTAGC")); EXPECT_FALSE(graph->find("GCTAAAAATATATATATTAAAAAAACATG")); + + if constexpr(std::is_same_v) { + common::logger->warn("Test disabled for DBGSSHash"); + return; + } + auto graph1 = build_graph(12, { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAA" }); + auto graph2 = build_graph(12, { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAA", + "TTTTTTTTTTTTTTTTTTTTTTTTTTTTT" }); + + EXPECT_EQ(graph1->num_nodes() * 2, graph2->num_nodes()); } TYPED_TEST(DeBruijnGraphTest, CheckGraph) { @@ -224,10 +216,6 @@ TYPED_TEST(DeBruijnGraphTest, CheckGraphInputWithN) { } TYPED_TEST(DeBruijnGraphTest, Alphabet) { - if constexpr(std::is_same_v) { - common::logger->warn("Test disabled for DBGSSHash"); - return; - } for (size_t k = 2; k <= 10; ++k) { auto graph = build_graph(k, {}); std::set alphabet(graph->alphabet().begin(), graph->alphabet().end()); @@ -260,10 +248,6 @@ TYPED_TEST(DeBruijnGraphTest, AddSequenceSimplePaths) { } TYPED_TEST(DeBruijnGraphTest, TestNonASCIIStrings) { - if constexpr(std::is_same_v) { - common::logger->warn("Test disabled for DBGSSHash"); - return; - } std::vector sequences { // cyrillic A and C "АСАСАСАСАСАСА", "плохая строка", @@ -272,49 +256,49 @@ TYPED_TEST(DeBruijnGraphTest, TestNonASCIIStrings) { EXPECT_EQ(0u, build_graph(6, sequences)->num_nodes()); EXPECT_EQ(0u, build_graph_batch(6, sequences)->num_nodes()); } else { + if constexpr(std::is_same_v) { + common::logger->warn("Test disabled for DBGSSHash"); + return; + } EXPECT_EQ(1u, build_graph(6, sequences)->num_nodes()); EXPECT_EQ(1u, build_graph_batch(6, sequences)->num_nodes()); } } TYPED_TEST(DeBruijnGraphTest, AddSequences) { + { + // TODO: add version with N at the ends of these + std::vector sequences { "AGACT", "GACTT", "ACTAT" }; + EXPECT_EQ(3u, build_graph(5, sequences)->num_nodes()); + EXPECT_EQ(3u, build_graph_batch(5, sequences)->num_nodes()); + } + { + std::vector sequences { "AGAC", "GACT", "ACTA" }; + EXPECT_EQ(3u, build_graph(4, sequences)->num_nodes()); + EXPECT_EQ(3u, build_graph_batch(4, sequences)->num_nodes()); + } if constexpr(std::is_same_v) { common::logger->warn("Test case disabled for DBGSSHash"); return; } - { - std::vector sequences { "AAAC", "CAAC" }; - EXPECT_EQ(2u, build_graph(4, sequences)->num_nodes()); - EXPECT_EQ(2u, build_graph_batch(4, sequences)->num_nodes()); - } { std::vector sequences { "AAAC", "CAAC", "GAAC" }; EXPECT_EQ(3u, build_graph(4, sequences)->num_nodes()); EXPECT_EQ(3u, build_graph_batch(4, sequences)->num_nodes()); } { - std::vector sequences { "AAAC", "AACG" }; + std::vector sequences { "AAAC", "CAAC" }; EXPECT_EQ(2u, build_graph(4, sequences)->num_nodes()); EXPECT_EQ(2u, build_graph_batch(4, sequences)->num_nodes()); } { - // TODO: add version with N at the ends of these - std::vector sequences { "AGACT", "GACTT", "ACTAT" }; - EXPECT_EQ(3u, build_graph(5, sequences)->num_nodes()); - EXPECT_EQ(3u, build_graph_batch(5, sequences)->num_nodes()); - } - { - std::vector sequences { "AGAC", "GACT", "ACTA" }; - EXPECT_EQ(3u, build_graph(4, sequences)->num_nodes()); - EXPECT_EQ(3u, build_graph_batch(4, sequences)->num_nodes()); + std::vector sequences { "AAAC", "AACG" }; + EXPECT_EQ(2u, build_graph(4, sequences)->num_nodes()); + EXPECT_EQ(2u, build_graph_batch(4, sequences)->num_nodes()); } } TYPED_TEST(DeBruijnGraphTest, CallKmersEmptyGraph) { - if constexpr(std::is_same_v) { - common::logger->warn("Test disabled for DBGSSHash"); - return; - } for (size_t k = 2; k <= max_test_k(); ++k) { auto empty = build_graph(k); diff --git a/metagraph/tests/graph/all/test_dbg_helpers.cpp b/metagraph/tests/graph/all/test_dbg_helpers.cpp index e9ad1229cb..8c9fd5be82 100644 --- a/metagraph/tests/graph/all/test_dbg_helpers.cpp +++ b/metagraph/tests/graph/all/test_dbg_helpers.cpp @@ -133,7 +133,7 @@ build_graph(uint64_t k, void writeFastaFile(const std::vector& sequences, const std::string& outputFilename) { std::ofstream fastaFile(outputFilename); - + if (!fastaFile.is_open()) { std::cerr << "Error: Unable to open the output file." << std::endl; return; @@ -151,13 +151,11 @@ build_graph(uint64_t k, std::vector sequences, DeBruijnGraph::Mode mode) { - - - if(sequences.size() == 0){ - throw std::invalid_argument( "empty graph" ); - } - - // use DBGHashString to get contigs for SSHash + if (sequences.empty()) + return std::make_shared(k); + + + // use DBGHashString to get contigs for SSHash auto string_graph = build_graph(k, sequences, mode); @@ -165,6 +163,10 @@ build_graph(uint64_t k, string_graph->call_sequences([&](const std::string &contig, const auto &) { contigs.push_back(contig); }, 1, false); + + if (contigs.empty()) + return std::make_shared(k); + std::string dump_path = "../tests/data/sshash_sequences/contigs.fa"; writeFastaFile(contigs, dump_path); auto graph = std::make_shared(dump_path, k);