diff --git a/ext/src/ncbi/CMakeLists.txt b/ext/src/ncbi/CMakeLists.txt index 81b603dc13..8354fffa26 100644 --- a/ext/src/ncbi/CMakeLists.txt +++ b/ext/src/ncbi/CMakeLists.txt @@ -20,7 +20,7 @@ include(ExternalProject) ExternalProject_Add(ncbi_vdb_ext GIT_REPOSITORY "https://github.com/ncbi/ncbi-vdb.git" - GIT_TAG "3.0.8" + GIT_TAG "3.1.0" GIT_SHALLOW 1 PATCH_COMMAND patch -p1 < ${CMAKE_CURRENT_LIST_DIR}/vdb.patch BUILD_ALWAYS OFF @@ -29,12 +29,12 @@ ExternalProject_Add(ncbi_vdb_ext CMAKE_CACHE_ARGS ${CL_ARGS}) ExternalProject_Get_property(ncbi_vdb_ext INSTALL_DIR) set(NCBI_VDB_PREFIX ${INSTALL_DIR}) -set(NCBI_VDB_INCLUDE ${NCBI_VDB_PREFIX}/include) -set(NCBI_VDB_LIB ${NCBI_VDB_PREFIX}/lib) +set(NCBI_VDB_INCLUDE ${NCBI_VDB_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) +set(NCBI_VDB_LIB ${NCBI_VDB_PREFIX}/${CMAKE_INSTALL_LIBDIR}) ExternalProject_Add(ncbi_sratools_ext GIT_REPOSITORY "https://github.com/ncbi/sra-tools.git" - GIT_TAG "3.0.8" + GIT_TAG "3.1.0" GIT_SHALLOW 1 PATCH_COMMAND patch -p1 < ${CMAKE_CURRENT_LIST_DIR}/sratools.patch BUILD_ALWAYS OFF @@ -43,12 +43,12 @@ ExternalProject_Add(ncbi_sratools_ext -DVDB_INCDIR=${NCBI_VDB_INCLUDE} -DVDB_LIBDIR=${NCBI_VDB_LIB} -DNO_JAVA=ON - CMAKE_CACHE_ARGS ${CL_ARGS}) - add_dependencies(ncbi_sratools_ext ncbi_vdb_ext) + CMAKE_CACHE_ARGS ${CL_ARGS}) +add_dependencies(ncbi_sratools_ext ncbi_vdb_ext) ExternalProject_Get_property(ncbi_sratools_ext INSTALL_DIR) set(NCBI_SRATOOLS_PREFIX ${INSTALL_DIR}) -set(NCBI_SRATOOLS_INCLUDE ${NCBI_SRATOOLS_PREFIX}/include) -set(NCBI_SRATOOLS_LIB ${NCBI_SRATOOLS_PREFIX}/lib) +set(NCBI_SRATOOLS_INCLUDE ${NCBI_SRATOOLS_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) +set(NCBI_SRATOOLS_LIB ${NCBI_SRATOOLS_PREFIX}/${CMAKE_INSTALL_LIBDIR}) add_library(ncbi-sdk INTERFACE) diff --git a/src/cmake/proj.cmake b/src/cmake/proj.cmake index 55fd7f9b1b..f31e50cfac 100644 --- a/src/cmake/proj.cmake +++ b/src/cmake/proj.cmake @@ -8,7 +8,7 @@ # Side-by-side subprojects layout: automatically set the # SPADES_EXTERNAL_${project}_SOURCE_DIR using SPADES_ALL_PROJECTS -set(SPADES_ALL_PROJECTS "spades;hammer;ionhammer;corrector;spaligner;spades_tools;binspreader;pathracer") +set(SPADES_ALL_PROJECTS "spades;hammer;ionhammer;corrector;spaligner;spades_tools;binspreader;pathracer;splitter") set(SPADES_EXTRA_PROJECTS "mts;online_vis;cds_subgraphs") set(SPADES_KNOWN_PROJECTS "${SPADES_ALL_PROJECTS};${SPADES_EXTRA_PROJECTS}") set(SPADES_ENABLE_PROJECTS "" CACHE STRING diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 47c4cd5211..2f37f76d23 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -14,6 +14,7 @@ add_subdirectory(pipeline) add_subdirectory(sequence) add_subdirectory(assembly_graph) add_subdirectory(alignment) +add_subdirectory(auxiliary_graphs) add_subdirectory(modules/path_extend) add_subdirectory(modules) add_subdirectory(paired_info) @@ -29,7 +30,7 @@ add_library(common_modules adt/concurrent_dsu.cpp) target_link_libraries(common_modules library - assembly_graph input pipeline sequence - coverage_model paired_info path_extend - stages mph_index modules + assembly_graph auxiliary_graphs input + pipeline sequence coverage_model + paired_info path_extend stages mph_index modules utils configs alignment) diff --git a/src/common/assembly_graph/core/graph.hpp b/src/common/assembly_graph/core/graph.hpp index 5c56a0ea6e..0b246f26eb 100644 --- a/src/common/assembly_graph/core/graph.hpp +++ b/src/common/assembly_graph/core/graph.hpp @@ -90,7 +90,7 @@ class DeBruijnGraph: public omnigraph::ObservableGraph { } auto move_links(VertexId v) { - data(v).move_links(); + return data(v).move_links(); } auto clear_links(VertexId v) { diff --git a/src/common/auxiliary_graphs/CMakeLists.txt b/src/common/auxiliary_graphs/CMakeLists.txt new file mode 100644 index 0000000000..a7dedd465e --- /dev/null +++ b/src/common/auxiliary_graphs/CMakeLists.txt @@ -0,0 +1,18 @@ +############################################################################ +# Copyright (c) 2019 Saint Petersburg State University +# All Rights Reserved +# See file LICENSE for details. +############################################################################ + +project(auxiliary_graphs CXX) + +add_library(auxiliary_graphs STATIC + contracted_graph/contracted_graph.cpp + contracted_graph/contracted_graph_builder.cpp + contracted_graph/contracted_graph_helper.cpp + contracted_graph/graph_condensation.cpp + contracted_graph/contracted_statistics.cpp + scaffold_graph/scaffold_vertex.cpp + scaffold_graph/scaffold_graph.cpp) + +target_link_libraries(auxiliary_graphs assembly_graph) diff --git a/src/common/auxiliary_graphs/contracted_graph/contracted_graph.cpp b/src/common/auxiliary_graphs/contracted_graph/contracted_graph.cpp new file mode 100644 index 0000000000..d7daff20ad --- /dev/null +++ b/src/common/auxiliary_graphs/contracted_graph/contracted_graph.cpp @@ -0,0 +1,203 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "contracted_graph.hpp" + +namespace contracted_graph { + +void AdjacencyMap::InsertPair(const AdjacencyMap::VertexId &vertex, const AdjacencyMap::ScaffoldVertex &edge) { + data_[vertex].insert(edge); +} +AdjacencyMap::const_iterator AdjacencyMap::begin() const { + return data_.begin(); +} +AdjacencyMap::const_iterator AdjacencyMap::end() const { + return data_.end(); +} +void AdjacencyMap::RemovePair(const VertexId &vertex, const AdjacencyMap::ScaffoldVertex &edge) { + data_.at(vertex).erase(edge); + if (data_.at(vertex).empty()) { + data_.erase(vertex); + } +} +bool AdjacencyMap::Contains(const VertexId &vertex, const AdjacencyMap::ScaffoldVertex &edge) { + auto vertex_entry = data_.find(vertex); + if (vertex_entry == data_.end()) { + return false; + } + return vertex_entry->second.find(edge) != vertex_entry->second.end(); +} +bool AdjacencyMap::empty() const { + return data_.empty(); +} +size_t AdjacencyMap::size() const { + return data_.size(); +} + +void ContractedGraph::InsertVertex(const ContractedGraph::VertexId &vertex) { + if (vertices_.insert(vertex).second) { + AdjacencyMap empty; + incoming_[vertex] = empty; + outcoming_[vertex] = empty; + } +} +void ContractedGraph::InsertEdge(const ContractedGraph::VertexId &head, const ContractedGraph::VertexId &tail, + const ContractedGraph::ScaffoldVertex &edge) { + VERIFY_DEV(vertices_.find(head) != vertices_.end()); + VERIFY_DEV(vertices_.find(tail) != vertices_.end()); + outcoming_[head].InsertPair(tail, edge); + incoming_[tail].InsertPair(head, edge); +} +ContractedGraph::const_entry_iterator ContractedGraph::in_entry_begin(const ContractedGraph::VertexId &vertex) const { + return incoming_.at(vertex).begin(); +} +ContractedGraph::const_entry_iterator ContractedGraph::in_entry_end(const ContractedGraph::VertexId &vertex) const { + return incoming_.at(vertex).end(); +} +adt::iterator_range ContractedGraph::IncomingEntries( + const ContractedGraph::VertexId &vertex) const { + return adt::make_range(in_entry_begin(vertex), in_entry_end(vertex)); +} +ContractedGraph::const_entry_iterator ContractedGraph::out_entry_begin(const ContractedGraph::VertexId &vertex) const { + return outcoming_.at(vertex).begin(); +} +ContractedGraph::const_entry_iterator ContractedGraph::out_entry_end(const ContractedGraph::VertexId &vertex) const { + return outcoming_.at(vertex).end(); +} +adt::iterator_range ContractedGraph::OutcomingEntries( + const ContractedGraph::VertexId &vertex) const { + return adt::make_range(out_entry_begin(vertex), out_entry_end(vertex)); +} +size_t ContractedGraph::GetOutDegree(const ContractedGraph::VertexId &vertex) const { + size_t result = 0; + for (const auto &entry: outcoming_.at(vertex)) { + result += entry.second.size(); + } + return result; +} +size_t ContractedGraph::GetInDegree(const ContractedGraph::VertexId &vertex) const { + size_t result = 0; + for (const auto &entry: incoming_.at(vertex)) { + result += entry.second.size(); + } + return result; +} +size_t ContractedGraph::GetCapacity(const ContractedGraph::VertexId &vertex) const { + return capacity_.at(vertex); +} +void ContractedGraph::InsertCapacity(const ContractedGraph::VertexId &vertex, size_t capacity) { + capacity_[vertex] = capacity; +} +bool ContractedGraph::ContainsVertex(const ContractedGraph::VertexId &vertex) const { + return vertices_.find(vertex) != vertices_.end(); +} +ContractedGraph::const_vertex_iterator ContractedGraph::begin() const { + return vertices_.begin(); +} +ContractedGraph::const_vertex_iterator ContractedGraph::end() const { + return vertices_.end(); +} +size_t ContractedGraph::size() const { + return vertices_.size(); +} +size_t ContractedGraph::CountEdges() const { + size_t result = 0; + for (const auto &vertex: vertices()) { + result += GetOutDegree(vertex); + } + return result; +} +void ContractedGraph::RemoveEdge(const VertexId &head, const VertexId &tail, const ContractedGraph::ScaffoldVertex &edge) { + VERIFY_DEV(ContainsVertex(head)); + VERIFY_DEV(ContainsVertex(tail)); + auto &head_outcoming = outcoming_.at(head); + auto &tail_incoming = incoming_.at(tail); + if (not head_outcoming.Contains(tail, edge)) { + return; + } + VERIFY_DEV(tail_incoming.Contains(head, edge)); + head_outcoming.RemovePair(tail, edge); + tail_incoming.RemovePair(head, edge); +} +ContractedGraph::ContractedGraph(const Graph &assembly_graph) : assembly_graph_(assembly_graph) {} + +const debruijn_graph::Graph &ContractedGraph::GetAssemblyGraph() const { + return assembly_graph_; +} +ContractedGraph::ScaffoldVertex ContractedGraph::conjugate(ContractedGraph::ScaffoldVertex edge) const { + return edge.GetConjugateFromGraph(assembly_graph_); +} +//std::string ContractedGraph::EdgeNucls(ContractedGraph::EdgeId edge) const { +// return edge.GetSequence(assembly_graph_); +//} + +double ContractedGraph::coverage(ContractedGraph::EdgeId edge) const { + return edge.GetCoverageFromGraph(assembly_graph_); +} +size_t ContractedGraph::length(ContractedGraph::EdgeId edge) const { + return edge.GetLengthFromGraph(assembly_graph_); +} +size_t ContractedGraph::int_id(ContractedGraph::EdgeId edge) const { + return edge.int_id(); +} +adt::iterator_range ContractedGraph::vertices() const { + return adt::make_range(begin(), end()); +} +ContractedGraph::const_edge_iterator ContractedGraph::in_edge_begin(const VertexId &vertex) const { + auto entry_begin = in_entry_begin(vertex); + if (not incoming_.at(vertex).empty()) { + return ContractedGraph::const_edge_iterator(entry_begin, entry_begin->second.begin(), in_entry_end(vertex)); + } + return const_edge_iterator(entry_begin, empty_.end(), in_entry_end(vertex)); +} +ContractedGraph::const_edge_iterator ContractedGraph::in_edge_end(const VertexId &vertex) const { + auto entry_end = in_entry_end(vertex); + auto entry_last = std::prev(entry_end); + if (not incoming_.at(vertex).empty()) { + return const_edge_iterator(entry_end, entry_last->second.end(), entry_end); + } + return const_edge_iterator(entry_end, empty_.end(), entry_end); +} +adt::iterator_range ContractedGraph::IncomingEdges(const VertexId &vertex) const { + return adt::make_range(in_edge_begin(vertex), in_edge_end(vertex)); +} + +ContractedGraph::const_edge_iterator ContractedGraph::out_edge_begin(const VertexId &vertex) const { + auto entry_begin = out_entry_begin(vertex); + if (not outcoming_.at(vertex).empty()) { + return ContractedGraph::const_edge_iterator(entry_begin, entry_begin->second.begin(), out_entry_end(vertex)); + } + return const_edge_iterator(entry_begin, empty_.end(), out_entry_end(vertex)); +} +ContractedGraph::const_edge_iterator ContractedGraph::out_edge_end(const VertexId &vertex) const { + auto entry_end = out_entry_end(vertex); + auto entry_last = std::prev(entry_end); + if (not outcoming_.at(vertex).empty()) { + return const_edge_iterator(entry_end, entry_last->second.end(), entry_end); + } + return const_edge_iterator(entry_end, empty_.end(), entry_end); +} +adt::iterator_range ContractedGraph::OutgoingEdges(const VertexId &vertex) const { + return adt::make_range(out_edge_begin(vertex), out_edge_end(vertex)); +} +auto ContractedGraph::canonical_edges() const { + return assembly_graph_.canonical_edges(); +} +ContractedGraph::VertexId ContractedGraph::conjugate(const ContractedGraph::VertexId &vertex) const { + return assembly_graph_.conjugate(vertex); +} +Sequence ContractedGraph::EdgeNucls(ContractedGraph::EdgeId edge) const { + VERIFY(edge.GetType() == scaffold_graph::ScaffoldVertexT::Edge); + assembly_graph_.EdgeNucls(edge.GetFirstEdge()); +} +size_t ContractedGraph::IncomingEdgeCount(const ContractedGraph::VertexId &vertex) const { + return incoming_.at(vertex).size(); +} +size_t ContractedGraph::OutgoingEdgeCount(const contracted_graph::ContractedGraph::VertexId &vertex) const { + return outcoming_.at(vertex).size(); +} + +} \ No newline at end of file diff --git a/src/common/auxiliary_graphs/contracted_graph/contracted_graph.hpp b/src/common/auxiliary_graphs/contracted_graph/contracted_graph.hpp new file mode 100644 index 0000000000..d35d0a89ef --- /dev/null +++ b/src/common/auxiliary_graphs/contracted_graph/contracted_graph.hpp @@ -0,0 +1,147 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "auxiliary_graphs/scaffold_graph/scaffold_vertex.hpp" +#include "adt/iterator_range.hpp" +#include "assembly_graph/core/graph.hpp" + +namespace contracted_graph { +class AdjacencyMap { + public: + typedef debruijn_graph::VertexId VertexId; + typedef debruijn_graph::EdgeId EdgeId; + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + typedef std::map>::const_iterator const_iterator; + typedef std::map>::value_type value_type; + + AdjacencyMap() = default; + AdjacencyMap(const VertexId &vertex, const ScaffoldVertex &edge) : data_({{vertex, {edge}}}) {} + void InsertPair(const VertexId &vertex, const ScaffoldVertex &edge); + void RemovePair(const VertexId &vertex, const ScaffoldVertex &edge); + bool Contains(const VertexId &vertex, const ScaffoldVertex &edge); + bool empty() const; + size_t size() const; + + const_iterator begin() const; + const_iterator end() const; + + private: + std::map> data_; +}; + +class ContractedGraph { + public: + typedef debruijn_graph::VertexId VertexId; + typedef debruijn_graph::Graph Graph; + typedef std::set VertexContainer; + typedef std::map EdgeContainer; + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + typedef AdjacencyMap::const_iterator const_entry_iterator; + typedef VertexContainer::const_iterator const_vertex_iterator; + typedef std::unordered_set::const_iterator internal_edge_iterator; + typedef ScaffoldVertex EdgeId; + + class const_edge_iterator : public boost::iterator_facade { + public: + explicit const_edge_iterator(const_entry_iterator entry_it, + internal_edge_iterator edge_it, + const_entry_iterator entry_end): + entry_it_(entry_it), + edge_it_(edge_it), + entry_end_(entry_end) {} + + private: + friend class boost::iterator_core_access; + + bool equal(const const_edge_iterator &other) const { + return entry_it_ == other.entry_it_ and edge_it_ == other.edge_it_ and entry_end_ == other.entry_end_; + } + + const ScaffoldVertex &dereference() const { + return *edge_it_; + } + + void increment() { + ++edge_it_; + if (edge_it_ == entry_it_->second.end()) { + ++entry_it_; + if (entry_it_ != entry_end_) { + edge_it_ = entry_it_->second.begin(); + } + } + } + + const_entry_iterator entry_it_; + internal_edge_iterator edge_it_; + const_entry_iterator entry_end_; + }; + + explicit ContractedGraph(const Graph &assembly_graph); + virtual ~ContractedGraph() = default; + ContractedGraph(ContractedGraph &&other) = default; + + void InsertVertex(const VertexId &vertex); + void InsertEdge(const VertexId &head, const VertexId &tail, const ScaffoldVertex &edge); + void RemoveEdge(const VertexId &head, const VertexId &tail, const ScaffoldVertex &edge); + size_t GetOutDegree(const VertexId &vertex) const; + size_t GetInDegree(const VertexId &vertex) const; + size_t GetCapacity(const VertexId &vertex) const; + void InsertCapacity(const VertexId &vertex, size_t capacity); + bool ContainsVertex(const VertexId &vertex) const; + + const_entry_iterator in_entry_begin(const VertexId &vertex) const; + const_entry_iterator in_entry_end(const VertexId &vertex) const; + adt::iterator_range IncomingEntries(const VertexId &vertex) const; + const_entry_iterator out_entry_begin(const VertexId &vertex) const; + const_entry_iterator out_entry_end(const VertexId &vertex) const; + adt::iterator_range OutcomingEntries(const VertexId &vertex) const; + + const_edge_iterator in_edge_begin(const VertexId &vertex) const; + const_edge_iterator in_edge_end(const VertexId &vertex) const; + adt::iterator_range IncomingEdges(const VertexId &vertex) const; + size_t IncomingEdgeCount(const VertexId &vertex) const; + const_edge_iterator out_edge_begin(const VertexId &vertex) const; + const_edge_iterator out_edge_end(const VertexId &vertex) const; + adt::iterator_range OutgoingEdges(const VertexId &vertex) const; + size_t OutgoingEdgeCount(const VertexId &vertex) const; + + const_vertex_iterator begin() const; + const_vertex_iterator end() const; + adt::iterator_range vertices() const; + size_t size() const; + size_t CountEdges() const; + + //fixme also iterates over short edges + auto canonical_edges () const; + + const Graph &GetAssemblyGraph() const; +// std::string EdgeNucls(EdgeId edge) const; +// std::string VertexNucls(VertexId vertex) const; + Sequence EdgeNucls(EdgeId edge) const; + double coverage(EdgeId edge) const; + size_t length(EdgeId edge) const; + size_t int_id(EdgeId edge) const; + + ScaffoldVertex conjugate(ScaffoldVertex edge) const; + VertexId conjugate(const VertexId &vertex) const; + + protected: + EdgeContainer outcoming_; + EdgeContainer incoming_; + VertexContainer vertices_; + std::map capacity_; + + //for edge iterator + std::unordered_set empty_; + + //for compatibility with visualizers and stuff + const Graph &assembly_graph_; + }; +} \ No newline at end of file diff --git a/src/common/auxiliary_graphs/contracted_graph/contracted_graph_builder.cpp b/src/common/auxiliary_graphs/contracted_graph/contracted_graph_builder.cpp new file mode 100644 index 0000000000..229e80a035 --- /dev/null +++ b/src/common/auxiliary_graphs/contracted_graph/contracted_graph_builder.cpp @@ -0,0 +1,118 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "contracted_graph_builder.hpp" + +namespace contracted_graph { + +void PartsBasedContractedFactory::ConstructFromParts(PartsBasedContractedFactory::ContractedGraphParts &&parts) { + DEBUG("Constructing from parts"); + const auto &vertex_to_root = parts.vertex_to_root_; + const auto &long_edges = parts.long_edges_; + const auto &vertex_to_capacity = parts.vertex_to_capacity_; + DEBUG("Vertex to root size: " << vertex_to_root.size()); + DEBUG("Vertex to capacity: " << vertex_to_capacity.size()); + for (const auto &entry: vertex_to_root) { + this->graph_ptr_->InsertVertex(entry.second); + } + for (const auto &edge: long_edges) { + DEBUG("Processing edge " << edge.int_id()); + DEBUG(edge.GetStartGraphVertex(g_) << " -> " << edge.GetEndGraphVertex(g_)); + VertexId start_root = vertex_to_root.at(edge.GetStartGraphVertex(g_)); + VertexId end_root = vertex_to_root.at(edge.GetEndGraphVertex(g_)); + DEBUG("Inserting vertices and edges"); + this->graph_ptr_->InsertEdge(start_root, end_root, edge); + + } + for (const auto &entry: parts.vertex_to_capacity_) { + this->graph_ptr_->InsertCapacity(entry.first, entry.second); + } +} +void PartsBasedContractedFactory::Construct() { + ConstructFromParts(ConstructParts()); +} + +PartsBasedContractedFactory::ContractedGraphParts DBGContractedGraphFactory::ConstructParts() const { + omnigraph::IterationHelper edge_iteration_helper(g_); + omnigraph::IterationHelper vertex_iteration_helper(g_); + DEBUG("Preparing parts"); + ContractedGraphParts graph_parts; + + contracted_dsu_t graph_dsu(g_.size()); + std::unordered_map vertex_to_id; + std::unordered_map id_to_vertex; + + size_t counter = 0; + for (auto vertex : vertex_iteration_helper) { + vertex_to_id.insert({vertex, counter}); + id_to_vertex.insert({counter, vertex}); + graph_parts.vertex_to_capacity_.insert({vertex, 0}); + ++counter; + } + + DEBUG("Filling parts"); + for (const auto &edge: edge_iteration_helper) { + ProcessEdge(graph_dsu, graph_parts, vertex_to_id, id_to_vertex, edge); + } + DEBUG(graph_dsu.num_sets() << " sets in dsu"); + for (const auto &vertex: vertex_iteration_helper) { + VertexId root_vertex = id_to_vertex.at(graph_dsu.find_set(vertex_to_id.at(vertex))); + DEBUG("Inserting vertex and root: " << vertex.int_id() << ", " << root_vertex.int_id()); + graph_parts.vertex_to_root_.insert({vertex, root_vertex}); + } + return graph_parts; +} +void DBGContractedGraphFactory::ProcessEdge(DBGContractedGraphFactory::contracted_dsu_t &graph_dsu, + PartsBasedContractedFactory::ContractedGraphParts &parts, + const std::unordered_map &vertex_to_id, + const std::unordered_map &id_to_vertex, + const EdgeId &edge) const { + VertexId start = g_.EdgeStart(edge); + VertexId end = g_.EdgeEnd(edge); + size_t start_id = vertex_to_id.at(start); + size_t end_id = vertex_to_id.at(end); + size_t start_root = graph_dsu.find_set(start_id); + size_t end_root = graph_dsu.find_set(end_id); + VertexId start_root_vertex = id_to_vertex.at(start_root); + VertexId end_root_vertex = id_to_vertex.at(start_root); + if (not edge_predicate_(edge)) { + if (start_root == end_root) { + parts.vertex_to_capacity_.at(start_root_vertex) += g_.length(edge); + } else { + size_t start_capacity = parts.vertex_to_capacity_[start_root_vertex]; + size_t end_capacity = parts.vertex_to_capacity_[end_root_vertex]; + graph_dsu.unite(start_root, end_root); + VertexId new_vertex = id_to_vertex.at(graph_dsu.find_set(start_root)); + parts.vertex_to_capacity_.at(new_vertex) = start_capacity + end_capacity + g_.length(edge); + } + } else { + parts.long_edge_ends_.insert(start_root_vertex); + parts.long_edge_ends_.insert(end_root_vertex); + parts.long_edges_.emplace_back(edge); + } +} +void SubgraphContractedGraphFactory::Construct() { + ExtractSubgraphFromContractedGraph(other_, vertices_); +} +void SubgraphContractedGraphFactory::ExtractSubgraphFromContractedGraph(const ContractedGraph &other, + const std::unordered_set &vertices) { + for (const auto &vertex: vertices) { + VERIFY(other.ContainsVertex(vertex)); + graph_ptr_->InsertVertex(vertex); + } + + for (const auto &vertex: vertices) { + for (const auto &adj_list: other.OutcomingEntries(vertex)) { + VertexId next = adj_list.first; + if (vertices.find(next) != vertices.end()) { + for (const auto &edge: adj_list.second) { + graph_ptr_->InsertEdge(vertex, next, edge); + } + } + } + } +} +} \ No newline at end of file diff --git a/src/common/auxiliary_graphs/contracted_graph/contracted_graph_builder.hpp b/src/common/auxiliary_graphs/contracted_graph/contracted_graph_builder.hpp new file mode 100644 index 0000000000..0135f0fabb --- /dev/null +++ b/src/common/auxiliary_graphs/contracted_graph/contracted_graph_builder.hpp @@ -0,0 +1,100 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "contracted_graph.hpp" + +#include "adt/concurrent_dsu.hpp" + +#include + +namespace contracted_graph { + +class ContractedGraphFactory { + public: + using Graph = debruijn_graph::Graph; + + ContractedGraphFactory(const Graph &g) : + g_(g), graph_ptr_(std::make_shared(g)) {} + virtual ~ContractedGraphFactory() = default; + virtual void Construct() = 0; + std::shared_ptr GetGraph() { + return graph_ptr_; + } + protected: + const Graph &g_; + std::shared_ptr graph_ptr_; +}; + +class PartsBasedContractedFactory : public ContractedGraphFactory { + public: + using VertexId = debruijn_graph::VertexId; + using ContractedGraphFactory::Graph; + + PartsBasedContractedFactory(const Graph &g): ContractedGraphFactory(g) {} + virtual ~PartsBasedContractedFactory() {} + + void Construct() override; + protected: + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + + struct ContractedGraphParts { + std::vector long_edges_; + std::unordered_set long_edge_ends_; + std::unordered_map vertex_to_capacity_; + std::unordered_map vertex_to_root_; + }; + + virtual ContractedGraphParts ConstructParts() const = 0; + void ConstructFromParts(ContractedGraphParts &&parts); + + using ContractedGraphFactory::graph_ptr_; + using ContractedGraphFactory::g_; + DECL_LOGGER("DSUBasedContractedGraphFactory"); +}; + +class DBGContractedGraphFactory : public PartsBasedContractedFactory { + public: + using contracted_dsu_t = dsu::ConcurrentDSU; + using EdgeId = debruijn_graph::EdgeId; + using PartsBasedContractedFactory::Graph; + + DBGContractedGraphFactory(const Graph &g, const std::function &edge_predicate) : + PartsBasedContractedFactory(g), edge_predicate_(edge_predicate) {} + + private: + ContractedGraphParts ConstructParts() const override; + + void ProcessEdge(contracted_dsu_t &graph_dsu, ContractedGraphParts &parts, + const std::unordered_map &vertex_to_id, + const std::unordered_map &id_to_vertex, const EdgeId &edge) const; + + using PartsBasedContractedFactory::g_; + using PartsBasedContractedFactory::graph_ptr_; + using PartsBasedContractedFactory::ContractedGraphParts; + const std::function edge_predicate_; + + DECL_LOGGER("DBGContractedGraphFactory"); +}; + +class SubgraphContractedGraphFactory: public ContractedGraphFactory { + public: + using VertexId = debruijn_graph::VertexId; + + SubgraphContractedGraphFactory(const ContractedGraph &other, const std::unordered_set &vertices) : + ContractedGraphFactory(other.GetAssemblyGraph()), other_(other), vertices_(vertices) {} + + void Construct() override; + + private: + void ExtractSubgraphFromContractedGraph(const ContractedGraph &other, const std::unordered_set &vertices); + + const ContractedGraph &other_; + const std::unordered_set &vertices_; +}; + +} //contracted_graph diff --git a/src/common/auxiliary_graphs/contracted_graph/contracted_graph_helper.cpp b/src/common/auxiliary_graphs/contracted_graph/contracted_graph_helper.cpp new file mode 100644 index 0000000000..98a295ac61 --- /dev/null +++ b/src/common/auxiliary_graphs/contracted_graph/contracted_graph_helper.cpp @@ -0,0 +1,27 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "contracted_graph_helper.hpp" + +namespace contracted_graph { + +//std::shared_ptr ContractedGraphFactoryHelper::ConstructFromUniqueStorage( +// const UniqueStorage &unique_storage) const { +// std::function edge_predicate = [&unique_storage](debruijn_graph::EdgeId edge) { +// return unique_storage.IsUnique(edge); +// }; +// DBGContractedGraphFactory factory(g_, edge_predicate); +// factory.Construct(); +// return factory.GetGraph(); +//} +std::shared_ptr ContractedGraphFactoryHelper::ExtractContractedSubgraph( + const ContractedGraph &other, + const std::unordered_set &vertices) const { + SubgraphContractedGraphFactory factory(other, vertices); + factory.Construct(); + return factory.GetGraph(); +} +} \ No newline at end of file diff --git a/src/common/auxiliary_graphs/contracted_graph/contracted_graph_helper.hpp b/src/common/auxiliary_graphs/contracted_graph/contracted_graph_helper.hpp new file mode 100644 index 0000000000..6a6618ec84 --- /dev/null +++ b/src/common/auxiliary_graphs/contracted_graph/contracted_graph_helper.hpp @@ -0,0 +1,26 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "contracted_graph_builder.hpp" + +namespace contracted_graph { + +class ContractedGraphFactoryHelper { + public: + typedef debruijn_graph::VertexId VertexId; + typedef debruijn_graph::Graph Graph; + + explicit ContractedGraphFactoryHelper(const debruijn_graph::Graph &g) : g_(g) {} + + std::shared_ptr ExtractContractedSubgraph(const ContractedGraph &other, + const std::unordered_set &vertices) const; + + private: + const Graph& g_; +}; +} \ No newline at end of file diff --git a/src/common/auxiliary_graphs/contracted_graph/contracted_statistics.cpp b/src/common/auxiliary_graphs/contracted_graph/contracted_statistics.cpp new file mode 100644 index 0000000000..cd48e2a372 --- /dev/null +++ b/src/common/auxiliary_graphs/contracted_graph/contracted_statistics.cpp @@ -0,0 +1,68 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "contracted_statistics.hpp" + +#include "contracted_graph_builder.hpp" + +namespace contracted_graph { +size_t ContractedStatisticsExtractor::CountLoops(const ContractedGraph &graph) const { + size_t result = 0; + for (const auto &vertex: graph) { + for (const auto &entry: graph.OutcomingEntries(vertex)) { + auto next_vertex = entry.first; + if (vertex == next_vertex) { + ++result; + } + } + } + return result; +} +double ContractedStatisticsExtractor::GetMeanWeight(const ContractedGraph &graph) const { + size_t total_capacity = 0; + size_t non_isolated = 0; + for (const auto &vertex: graph) { + if (graph.GetOutDegree(vertex) > 0 or graph.GetInDegree(vertex) > 0) { + total_capacity += graph.GetCapacity(vertex); + ++non_isolated; + } + } + INFO("Total capacity: " << total_capacity << ", Non isolated: " << non_isolated); + return static_cast(total_capacity) / static_cast(non_isolated); +} +void ContractedStatisticsExtractor::GetMeanWeights(std::vector thresholds, + const std::string &output_path) const { + std::map threshold_to_mean_weight; + for (const size_t threshold: thresholds) { + INFO("Constructing graph for " << threshold); + auto length_predicate = [this, threshold](const debruijn_graph::EdgeId &edge) { + return assembly_graph_.length(edge) >= threshold; + }; + DBGContractedGraphFactory factory(assembly_graph_, length_predicate); + factory.Construct(); + INFO("Constructed graph"); + auto graph = factory.GetGraph(); + double mean_weight = GetMeanWeight(*graph); + INFO("Mean weight " << mean_weight); + threshold_to_mean_weight.insert({threshold, mean_weight}); + } + std::ofstream fout(output_path); + for (const auto &entry: threshold_to_mean_weight) { + fout << entry.first << " " << entry.second << "\n"; + } +} +ContractedStatisticsExtractor::ContractedStatisticsExtractor(const Graph &assembly_graph) : + assembly_graph_(assembly_graph) {} +size_t ContractedStatisticsExtractor::CountNonIsolated(const ContractedGraph &graph) const { + size_t result = 0; + for (const auto &vertex: graph) { + if (graph.GetInDegree(vertex) > 0 or graph.GetOutDegree(vertex) > 0) { + ++result; + } + } + return result; +} +} diff --git a/src/common/auxiliary_graphs/contracted_graph/contracted_statistics.hpp b/src/common/auxiliary_graphs/contracted_graph/contracted_statistics.hpp new file mode 100644 index 0000000000..3190280a79 --- /dev/null +++ b/src/common/auxiliary_graphs/contracted_graph/contracted_statistics.hpp @@ -0,0 +1,27 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "contracted_graph.hpp" + +namespace contracted_graph { +class ContractedStatisticsExtractor { + public: + using Graph = debruijn_graph::Graph; + + explicit ContractedStatisticsExtractor(const Graph &assembly_graph); + + size_t CountLoops(const ContractedGraph &graph) const; + size_t CountNonIsolated(const ContractedGraph &graph) const; + double GetMeanWeight(const ContractedGraph &graph) const; + + void GetMeanWeights(std::vector thresholds, const std::string &output_path) const; + + private: + const Graph &assembly_graph_; +}; +} \ No newline at end of file diff --git a/src/common/auxiliary_graphs/contracted_graph/graph_condensation.cpp b/src/common/auxiliary_graphs/contracted_graph/graph_condensation.cpp new file mode 100644 index 0000000000..0938ead43d --- /dev/null +++ b/src/common/auxiliary_graphs/contracted_graph/graph_condensation.cpp @@ -0,0 +1,69 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "graph_condensation.hpp" + +namespace contracted_graph { + +std::vector UnbranchingPathExtractor::ExtractUnbranchingPaths( + const ContractedGraph &graph) const { + std::unordered_map edge_to_next; + std::set unbranching_vertices; + std::set starts; + for (const auto &vertex: graph) { + if (graph.GetOutDegree(vertex) == 1 and graph.GetInDegree(vertex) == 1) { + auto incoming_edge = *(graph.in_edge_begin(vertex)); + auto outcoming_edge = *(graph.out_edge_begin(vertex)); + if (incoming_edge != outcoming_edge) { + edge_to_next[incoming_edge] = outcoming_edge; + unbranching_vertices.insert(incoming_edge); + unbranching_vertices.insert(outcoming_edge); + starts.insert(incoming_edge); + if (starts.find(outcoming_edge) != starts.end()) { + starts.erase(outcoming_edge); + } + } + } + } + + std::vector result; + std::set visited; + size_t inserted = 0; + for (const auto &start: starts) { + SimplePath path; + path.push_back(start); + ++inserted; + visited.insert(start); + ScaffoldVertex curr_vertex = start; + while(edge_to_next.find(curr_vertex) != edge_to_next.end()) { + curr_vertex = edge_to_next.at(curr_vertex); + path.push_back(curr_vertex); + visited.insert(curr_vertex); + ++inserted; + } + result.push_back(path); + } + INFO("Inserted " << inserted << " out of " << unbranching_vertices.size() << " unbranching vertices") + INFO(result.size() << " unbranching simple paths") + for (const auto &vertex: unbranching_vertices) { + if (visited.find(vertex) == visited.end()) { + INFO("Unvisited"); + SimplePath cycle; + visited.insert(vertex); + cycle.push_back(vertex); + ScaffoldVertex curr_vertex = edge_to_next.at(vertex); + while (curr_vertex != vertex) { + visited.insert(curr_vertex); + cycle.push_back(curr_vertex); + } + result.push_back(cycle); + } + + } + INFO(result.size() << " total simple paths"); + return result; +} +} diff --git a/src/common/auxiliary_graphs/contracted_graph/graph_condensation.hpp b/src/common/auxiliary_graphs/contracted_graph/graph_condensation.hpp new file mode 100644 index 0000000000..6f569f6f02 --- /dev/null +++ b/src/common/auxiliary_graphs/contracted_graph/graph_condensation.hpp @@ -0,0 +1,20 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "contracted_graph.hpp" + +namespace contracted_graph { + +class UnbranchingPathExtractor { + public: + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + typedef std::vector SimplePath; + std::vector ExtractUnbranchingPaths(const ContractedGraph &graph) const; +}; + +} \ No newline at end of file diff --git a/src/common/modules/path_extend/scaffolder2015/scaffold_graph.cpp b/src/common/auxiliary_graphs/scaffold_graph/scaffold_graph.cpp similarity index 56% rename from src/common/modules/path_extend/scaffolder2015/scaffold_graph.cpp rename to src/common/auxiliary_graphs/scaffold_graph/scaffold_graph.cpp index e1bcb713e5..f071dcb9c2 100644 --- a/src/common/modules/path_extend/scaffolder2015/scaffold_graph.cpp +++ b/src/common/auxiliary_graphs/scaffold_graph/scaffold_graph.cpp @@ -7,12 +7,26 @@ #include "scaffold_graph.hpp" - -namespace path_extend { namespace scaffold_graph { std::atomic ScaffoldGraph::ScaffoldEdge::scaffold_edge_id_{0}; +bool ScaffoldGraph::ScaffoldEdge::operator<(const ScaffoldGraph::ScaffoldEdge& rhs) const { + return id_ < rhs.id_; +} +bool ScaffoldGraph::ScaffoldEdge::operator>(const ScaffoldGraph::ScaffoldEdge& rhs) const { + return rhs < *this; +} +bool ScaffoldGraph::ScaffoldEdge::operator<=(const ScaffoldGraph::ScaffoldEdge& rhs) const { + return !(rhs < *this); +} +bool ScaffoldGraph::ScaffoldEdge::operator>=(const ScaffoldGraph::ScaffoldEdge& rhs) const { + return !(*this < rhs); +} +bool ScaffoldGraph::ScaffoldEdge::operator==(const ScaffoldGraph::ScaffoldEdge &e) const { + return color_ == e.color_ && weight_ == e.weight_ && start_ == e.start_ && end_ == e.end_; +} + void ScaffoldGraph::AddEdgeSimple(const ScaffoldGraph::ScaffoldEdge &e) { edges_.emplace(e.getId(), e); outgoing_edges_.emplace(e.getStart(), e.getId()); @@ -21,23 +35,27 @@ void ScaffoldGraph::AddEdgeSimple(const ScaffoldGraph::ScaffoldEdge &e) { void ScaffoldGraph::DeleteOutgoing(const ScaffoldGraph::ScaffoldEdge &e) { auto e_range = outgoing_edges_.equal_range(e.getStart()); - for (auto edge_id = e_range.first; edge_id != e_range.second; ++edge_id) { + for (auto edge_id = e_range.first; edge_id != e_range.second;) { if (edges_.at(edge_id->second) == e) { - outgoing_edges_.erase(edge_id); + edge_id = outgoing_edges_.erase(edge_id); + } else { + ++edge_id; } } } void ScaffoldGraph::DeleteIncoming(const ScaffoldGraph::ScaffoldEdge &e) { auto e_range = incoming_edges_.equal_range(e.getEnd()); - for (auto edge_id = e_range.first; edge_id != e_range.second; ++edge_id) { + for (auto edge_id = e_range.first; edge_id != e_range.second;) { if (edges_.at(edge_id->second) == e) { - incoming_edges_.erase(edge_id); + edge_id = incoming_edges_.erase(edge_id); + } else { + ++edge_id; } } } -void ScaffoldGraph::DeleteAllOutgoingEdgesSimple(ScaffoldGraph::ScaffoldVertex v) { +void ScaffoldGraph::DeleteAllOutgoingEdgesSimple(ScaffoldVertex v) { auto e_range = outgoing_edges_.equal_range(v); for (auto edge_id = e_range.first; edge_id != e_range.second; ++edge_id) { DeleteIncoming(edges_.at(edge_id->second)); @@ -50,7 +68,7 @@ void ScaffoldGraph::DeleteEdgeFromStorage(const ScaffoldGraph::ScaffoldEdge &e) edges_.erase(e.getId()); } -void ScaffoldGraph::DeleteAllIncomingEdgesSimple(ScaffoldGraph::ScaffoldVertex v) { +void ScaffoldGraph::DeleteAllIncomingEdgesSimple(ScaffoldVertex v) { auto e_range = incoming_edges_.equal_range(v); for (auto edge_id = e_range.first; edge_id != e_range.second; ++edge_id) { DeleteOutgoing(edges_.at(edge_id->second)); @@ -58,7 +76,7 @@ void ScaffoldGraph::DeleteAllIncomingEdgesSimple(ScaffoldGraph::ScaffoldVertex v incoming_edges_.erase(v); } -bool ScaffoldGraph::Exists(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) const { +bool ScaffoldGraph::Exists(ScaffoldVertex assembly_graph_edge) const { return vertices_.count(assembly_graph_edge) != 0; } @@ -72,40 +90,33 @@ bool ScaffoldGraph::Exists(const ScaffoldGraph::ScaffoldEdge &e) const { return false; } -ScaffoldGraph::ScaffoldVertex ScaffoldGraph::conjugate(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) const { - return assembly_graph_.conjugate(assembly_graph_edge); +ScaffoldVertex ScaffoldGraph::conjugate(ScaffoldVertex scaffold_vertex) const { + return scaffold_vertex.GetConjugateFromGraph(assembly_graph_); } ScaffoldGraph::ScaffoldEdge ScaffoldGraph::conjugate(const ScaffoldGraph::ScaffoldEdge &e) const { return ScaffoldEdge(conjugate(e.getEnd()), conjugate(e.getStart()), e.getColor(), e.getWeight()); } -bool ScaffoldGraph::AddVertex(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) { - if (!Exists(assembly_graph_edge)) { - VERIFY(!Exists(conjugate(assembly_graph_edge))); - vertices_.insert(assembly_graph_edge); - vertices_.insert(conjugate(assembly_graph_edge)); +bool ScaffoldGraph::AddVertex(ScaffoldVertex scaffold_vertex) { + if (!Exists(scaffold_vertex)) { + VERIFY(!Exists(conjugate(scaffold_vertex))); + vertices_.insert(scaffold_vertex); + vertices_.insert(conjugate(scaffold_vertex)); return true; } return false; } -void ScaffoldGraph::AddVertices(const std::set &vertices) { - for (auto v : vertices) { - AddVertex(v); - } -} - -bool ScaffoldGraph::AddEdge(ScaffoldGraph::ScaffoldVertex v1, ScaffoldGraph::ScaffoldVertex v2, size_t lib_id, double weight) { +bool ScaffoldGraph::AddEdge(ScaffoldVertex v1, ScaffoldVertex v2, size_t lib_id, double weight, size_t length) { VERIFY(Exists(v1)); VERIFY(Exists(v2)); - ScaffoldEdge e(v1, v2, lib_id, weight); + ScaffoldEdge e(v1, v2, lib_id, weight, length); if (Exists(e)) { return false; } - AddEdgeSimple(e); return true; } @@ -113,42 +124,44 @@ bool ScaffoldGraph::AddEdge(ScaffoldGraph::ScaffoldVertex v1, ScaffoldGraph::Sca void ScaffoldGraph::Print(std::ostream &os) const { for (auto v: vertices_) { os << "Vertex " << int_id(v) << " ~ " << int_id(conjugate(v)) - << ": len = " << assembly_graph_.length(v) << ", cov = " << assembly_graph_.coverage(v) << std::endl; + << ": len = " << v.GetLengthFromGraph(assembly_graph_) << ", cov = " + << v.GetCoverageFromGraph(assembly_graph_) << std::endl; } for (auto e_iter = edges_.begin(); e_iter != edges_.end(); ++e_iter) { os << "Edge " << e_iter->second.getId() << ": " << int_id(e_iter->second.getStart()) << " -> " << int_id(e_iter->second.getEnd()) << - ", lib index = " << e_iter->second.getColor() << ", weight " << e_iter->second.getWeight() << std::endl; + ", lib index = " << e_iter->second.getColor() << ", weight " << e_iter->second.getWeight() + << ", length = " << e_iter->second.getLength() << std::endl; } } -ScaffoldGraph::ScaffoldEdge ScaffoldGraph::UniqueIncoming(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) const { +ScaffoldGraph::ScaffoldEdge ScaffoldGraph::UniqueIncoming(ScaffoldVertex assembly_graph_edge) const { VERIFY(HasUniqueIncoming(assembly_graph_edge)); return edges_.at(incoming_edges_.find(assembly_graph_edge)->second); } -ScaffoldGraph::ScaffoldEdge ScaffoldGraph::UniqueOutgoing(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) const { +ScaffoldGraph::ScaffoldEdge ScaffoldGraph::UniqueOutgoing(ScaffoldVertex assembly_graph_edge) const { VERIFY(HasUniqueOutgoing(assembly_graph_edge)); return edges_.at(outgoing_edges_.find(assembly_graph_edge)->second); } -bool ScaffoldGraph::HasUniqueIncoming(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) const { +bool ScaffoldGraph::HasUniqueIncoming(ScaffoldVertex assembly_graph_edge) const { return IncomingEdgeCount(assembly_graph_edge) == 1; } -bool ScaffoldGraph::HasUniqueOutgoing(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) const { +bool ScaffoldGraph::HasUniqueOutgoing(ScaffoldVertex assembly_graph_edge) const { return OutgoingEdgeCount(assembly_graph_edge) == 1; } -size_t ScaffoldGraph::IncomingEdgeCount(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) const { +size_t ScaffoldGraph::IncomingEdgeCount(ScaffoldVertex assembly_graph_edge) const { return incoming_edges_.count(assembly_graph_edge); } -size_t ScaffoldGraph::OutgoingEdgeCount(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) const { +size_t ScaffoldGraph::OutgoingEdgeCount(ScaffoldVertex assembly_graph_edge) const { return outgoing_edges_.count(assembly_graph_edge); } -std::vector ScaffoldGraph::IncomingEdges(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) const { +std::vector ScaffoldGraph::IncomingEdges(scaffold_graph::ScaffoldVertex assembly_graph_edge) const { std::vector result; auto e_range = incoming_edges_.equal_range(assembly_graph_edge); for (auto edge_id = e_range.first; edge_id != e_range.second; ++edge_id) { @@ -157,7 +170,7 @@ std::vector ScaffoldGraph::IncomingEdges(ScaffoldGr return result; } -std::vector ScaffoldGraph::OutgoingEdges(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) const { +std::vector ScaffoldGraph::OutgoingEdges(scaffold_graph::ScaffoldVertex assembly_graph_edge) const { std::vector result; auto e_range = outgoing_edges_.equal_range(assembly_graph_edge); for (auto edge_id = e_range.first; edge_id != e_range.second; ++edge_id) { @@ -178,11 +191,11 @@ size_t ScaffoldGraph::VertexCount() const { return vertices_.size(); } -ScaffoldGraph::ScaffoldVertex ScaffoldGraph::EdgeEnd(ScaffoldEdge e) const { +ScaffoldVertex ScaffoldGraph::EdgeEnd(ScaffoldEdge e) const { return e.getEnd(); } -ScaffoldGraph::ScaffoldVertex ScaffoldGraph::EdgeStart(ScaffoldEdge e) const { +ScaffoldVertex ScaffoldGraph::EdgeStart(ScaffoldEdge e) const { return e.getStart(); } @@ -190,8 +203,8 @@ size_t ScaffoldGraph::int_id(ScaffoldGraph::ScaffoldEdge e) const { return e.getId(); } -size_t ScaffoldGraph::int_id(ScaffoldGraph::ScaffoldVertex v) const { - return assembly_graph_.int_id(v); +size_t ScaffoldGraph::int_id(ScaffoldVertex v) const { + return v.int_id(); } ScaffoldGraph::ConstScaffoldEdgeIterator ScaffoldGraph::eend() const { @@ -218,27 +231,27 @@ adt::iterator_range ScaffoldGraph::edg return adt::make_range(ebegin(), eend()); } -bool ScaffoldGraph::IsVertexIsolated(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) const { +bool ScaffoldGraph::IsVertexIsolated(ScaffoldVertex assembly_graph_edge) const { bool result = incoming_edges_.count(assembly_graph_edge) == 0 && outgoing_edges_.count(assembly_graph_edge) == 0; return result; } -bool ScaffoldGraph::RemoveVertex(ScaffoldGraph::ScaffoldVertex assembly_graph_edge) { - if (Exists(assembly_graph_edge)) { - VERIFY(Exists(conjugate(assembly_graph_edge))); +bool ScaffoldGraph::RemoveVertex(ScaffoldVertex scaffold_vertex) { + if (Exists(scaffold_vertex)) { + VERIFY(Exists(conjugate(scaffold_vertex))); - DeleteAllOutgoingEdgesSimple(assembly_graph_edge); - DeleteAllIncomingEdgesSimple(assembly_graph_edge); - DeleteAllOutgoingEdgesSimple(conjugate(assembly_graph_edge)); - DeleteAllIncomingEdgesSimple(conjugate(assembly_graph_edge)); + DeleteAllOutgoingEdgesSimple(scaffold_vertex); + DeleteAllIncomingEdgesSimple(scaffold_vertex); + DeleteAllOutgoingEdgesSimple(conjugate(scaffold_vertex)); + DeleteAllIncomingEdgesSimple(conjugate(scaffold_vertex)); - VERIFY(incoming_edges_.count(assembly_graph_edge) == 0); - VERIFY(outgoing_edges_.count(assembly_graph_edge) == 0); - VERIFY(incoming_edges_.count(conjugate(assembly_graph_edge)) == 0); - VERIFY(outgoing_edges_.count(conjugate(assembly_graph_edge)) == 0); + VERIFY(incoming_edges_.count(scaffold_vertex) == 0); + VERIFY(outgoing_edges_.count(scaffold_vertex) == 0); + VERIFY(incoming_edges_.count(conjugate(scaffold_vertex)) == 0); + VERIFY(outgoing_edges_.count(conjugate(scaffold_vertex)) == 0); - vertices_.erase(assembly_graph_edge); - vertices_.erase(conjugate(assembly_graph_edge)); + vertices_.erase(scaffold_vertex); + vertices_.erase(conjugate(scaffold_vertex)); return true; } @@ -257,8 +270,38 @@ bool ScaffoldGraph::RemoveEdge(const ScaffoldGraph::ScaffoldEdge &e) { } bool ScaffoldGraph::AddEdge(const ScaffoldGraph::ScaffoldEdge &e) { - return AddEdge(e.getStart(), e.getEnd(), e.getColor(), e.getWeight()); + return AddEdge(e.getStart(), e.getEnd(), e.getColor(), e.getWeight(), e.getLength()); +} +std::string ScaffoldGraph::str(const ScaffoldVertex& vertex) const { + return vertex.str(assembly_graph_); +} +std::string ScaffoldGraph::str(const ScaffoldGraph::ScaffoldEdge& edge) const { + return "(" + std::to_string(edge.getStart().int_id()) + ", " + std::to_string(edge.getEnd().int_id()) + ")"; +} +size_t ScaffoldGraph::length(const ScaffoldGraph::ScaffoldEdge &edge) const { + return edge.getLength(); +} +size_t ScaffoldGraph::length(const ScaffoldVertex &vertex) const { + return vertex.GetLengthFromGraph(assembly_graph_); +} +double ScaffoldGraph::coverage(const ScaffoldVertex &vertex) const { + return vertex.GetCoverageFromGraph(assembly_graph_); +} +void ScaffoldGraph::AddVertices(const std::set &vertices) { + for (const auto& v: vertices) { + AddVertex(v); + } +} +ScaffoldGraph &ScaffoldGraph::operator=(ScaffoldGraph other) { + swap(other); + return *this; +} +void ScaffoldGraph::swap(ScaffoldGraph &other) { + VERIFY(&assembly_graph_ == &other.assembly_graph_); + std::swap(vertices_, other.vertices_); + std::swap(edges_, other.edges_); + std::swap(outgoing_edges_, other.outgoing_edges_); + std::swap(incoming_edges_, other.incoming_edges_); } } //scaffold_graph -} //path_extend diff --git a/src/common/modules/path_extend/scaffolder2015/scaffold_graph.hpp b/src/common/auxiliary_graphs/scaffold_graph/scaffold_graph.hpp similarity index 60% rename from src/common/modules/path_extend/scaffolder2015/scaffold_graph.hpp rename to src/common/auxiliary_graphs/scaffold_graph/scaffold_graph.hpp index a45a251eb8..1ee3ffd9d9 100644 --- a/src/common/modules/path_extend/scaffolder2015/scaffold_graph.hpp +++ b/src/common/auxiliary_graphs/scaffold_graph/scaffold_graph.hpp @@ -10,13 +10,14 @@ // #pragma once -#include "utils/logger/logger.hpp" +#include "scaffold_vertex.hpp" #include "assembly_graph/core/graph.hpp" -#include "modules/path_extend/paired_library.hpp" -#include "connection_condition2015.hpp" +#include "utils/logger/logger.hpp" + #include "adt/iterator_range.hpp" -namespace path_extend { +#include + namespace scaffold_graph { //do NOT add "using namespace debruijn_graph" in order not to confuse between EdgeId typdefs @@ -25,7 +26,7 @@ class ScaffoldGraph { public: //EdgeId in de Bruijn graph is vertex in scaffolding graph - typedef debruijn_graph::EdgeId ScaffoldVertex; + typedef ScaffoldVertex ScaffoldGraphVertex; //Unique edge id typedef size_t ScaffoldEdgeIdT; @@ -38,21 +39,30 @@ class ScaffoldGraph { //id counter static std::atomic scaffold_edge_id_; - ScaffoldVertex start_; - ScaffoldVertex end_; + ScaffoldGraphVertex start_; + ScaffoldGraphVertex end_; //color = lib# size_t color_; //read pair weight or anything else double weight_; + //todo discuss (distance between vertices by default) + size_t length_; public: - ScaffoldEdge(ScaffoldVertex start, ScaffoldVertex end, size_t lib_id = (size_t) -1, double weight = 0) : + ScaffoldEdge(ScaffoldVertex start, ScaffoldVertex end, size_t lib_id = (size_t) -1, double weight = 0, size_t length = 0) : id_(scaffold_edge_id_++), start_(start), end_(end), color_(lib_id), - weight_(weight) { + weight_(weight), + length_(length){ } + //for consistency with dijkstra + explicit ScaffoldEdge(size_t ): id_(scaffold_edge_id_++), start_(nullptr), end_(nullptr), + color_((size_t) -1), weight_(0), length_(0) {} + + ScaffoldEdge(): id_(scaffold_edge_id_++), start_(nullptr), end_(nullptr), + color_((size_t) -1), weight_(0), length_(0) {} ScaffoldEdgeIdT getId() const { return id_; @@ -67,33 +77,36 @@ class ScaffoldGraph { return weight_; } - const ScaffoldVertex getStart() const { + size_t getLength() const { + return length_; + } + + const ScaffoldGraphVertex getStart() const { return start_; } - const ScaffoldVertex getEnd() const { + const ScaffoldGraphVertex getEnd() const { return end_; } - bool operator==(const ScaffoldEdge &e) const { - return color_ == e.color_ && weight_ == e.weight_ && start_ == e.start_ && end_ == e.end_; - } + bool operator==(const ScaffoldEdge &e) const; - bool operator==(const ScaffoldEdge &e) { - return color_ == e.color_ && weight_ == e.weight_ && start_ == e.start_ && end_ == e.end_; - } + bool operator<(const ScaffoldEdge& rhs) const; + bool operator>(const ScaffoldEdge& rhs) const; + bool operator<=(const ScaffoldEdge& rhs) const; + bool operator>=(const ScaffoldEdge& rhs) const; }; - //typedef for possibility to use in templated graph visualizers + //typedef to use in templated graph algorithms typedef ScaffoldVertex VertexId; typedef ScaffoldEdge EdgeId; //All vertices are stored in set - typedef std::set VertexStorage; + typedef std::set VertexStorage; //Edges are stored in map: Id -> Edge Information typedef std::unordered_map EdgeStorage; //Adjacency list contains vertrx and edge id (instead of whole edge information) - typedef std::unordered_multimap AdjacencyStorage; + typedef std::multimap AdjacencyStorage; struct ConstScaffoldEdgeIterator: public boost::iterator_facade &vertices); //Add edge (and conjugate) if not exists //v1 and v2 must exist - bool AddEdge(ScaffoldVertex v1, ScaffoldVertex v2, size_t lib_id, double weight); + bool AddEdge(ScaffoldGraphVertex v1, ScaffoldGraphVertex v2, size_t lib_id, double weight, size_t length); bool AddEdge(const ScaffoldEdge &e); @@ -178,9 +196,9 @@ class ScaffoldGraph { bool RemoveEdge(const ScaffoldEdge &e); //Remove vertex and all adjacent edges - bool RemoveVertex(ScaffoldVertex assembly_graph_edge); + bool RemoveVertex(ScaffoldGraphVertex scaffold_vertex); - bool IsVertexIsolated(ScaffoldVertex assembly_graph_edge) const; + bool IsVertexIsolated(ScaffoldGraphVertex assembly_graph_edge) const; VertexStorage::const_iterator vbegin() const; @@ -194,13 +212,13 @@ class ScaffoldGraph { adt::iterator_range edges() const; - size_t int_id(ScaffoldVertex v) const; + size_t int_id(ScaffoldGraphVertex v) const; size_t int_id(ScaffoldEdge e) const; - ScaffoldVertex EdgeStart(ScaffoldEdge e) const; + ScaffoldGraphVertex EdgeStart(ScaffoldEdge e) const; - ScaffoldVertex EdgeEnd(ScaffoldEdge e) const; + ScaffoldGraphVertex EdgeEnd(ScaffoldEdge e) const; size_t VertexCount() const; @@ -212,22 +230,33 @@ class ScaffoldGraph { std::vector IncomingEdges(ScaffoldVertex assembly_graph_edge) const; - size_t OutgoingEdgeCount(ScaffoldVertex assembly_graph_edge) const; + size_t OutgoingEdgeCount(ScaffoldGraphVertex assembly_graph_edge) const; - size_t IncomingEdgeCount(ScaffoldVertex assembly_graph_edge) const; + size_t IncomingEdgeCount(ScaffoldGraphVertex assembly_graph_edge) const; - bool HasUniqueOutgoing(ScaffoldVertex assembly_graph_edge) const; + bool HasUniqueOutgoing(ScaffoldGraphVertex assembly_graph_edge) const; - bool HasUniqueIncoming(ScaffoldVertex assembly_graph_edge) const; + bool HasUniqueIncoming(ScaffoldGraphVertex assembly_graph_edge) const; - ScaffoldEdge UniqueOutgoing(ScaffoldVertex assembly_graph_edge) const; + ScaffoldEdge UniqueOutgoing(ScaffoldGraphVertex assembly_graph_edge) const; - ScaffoldEdge UniqueIncoming(ScaffoldVertex assembly_graph_edge) const; + ScaffoldEdge UniqueIncoming(ScaffoldGraphVertex assembly_graph_edge) const; void Print(std::ostream &os) const; + std::string str(const ScaffoldGraphVertex &vertex) const; + + std::string str(const ScaffoldGraph::ScaffoldEdge &edge) const; + + size_t length(const ScaffoldGraphVertex &vertex) const; + size_t length(const ScaffoldGraph::ScaffoldEdge &edge) const; + + double coverage(const ScaffoldGraphVertex &vertex) const; + + ScaffoldGraph& operator =(ScaffoldGraph other); + + void swap(ScaffoldGraph& other); }; } //scaffold_graph -} //path_extend diff --git a/src/common/auxiliary_graphs/scaffold_graph/scaffold_vertex.cpp b/src/common/auxiliary_graphs/scaffold_graph/scaffold_vertex.cpp new file mode 100644 index 0000000000..fd6dd85d6f --- /dev/null +++ b/src/common/auxiliary_graphs/scaffold_graph/scaffold_vertex.cpp @@ -0,0 +1,244 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "scaffold_vertex.hpp" + +#include "assembly_graph/paths/bidirectional_path_io/io_support.hpp" + +namespace scaffold_graph { + +size_t EdgeIdVertex::GetId() const { + return edge_.int_id(); +} +size_t EdgeIdVertex::GetLengthFromGraph(const debruijn_graph::Graph &g) const { + return g.length(edge_); +} +EdgeIdVertex::EdgeIdVertex(EdgeId edge_) : edge_(edge_) {} +std::shared_ptr EdgeIdVertex::GetConjugateFromGraph(const debruijn_graph::Graph &g) const { + return std::make_shared(g.conjugate(get())); +} +debruijn_graph::EdgeId EdgeIdVertex::get() const { + return edge_; +} +ScaffoldVertexT EdgeIdVertex::GetType() const { + return Edge; +} +debruijn_graph::VertexId EdgeIdVertex::GetStartGraphVertex(const debruijn_graph::Graph &g) const { + return g.EdgeStart(edge_); +} +debruijn_graph::VertexId EdgeIdVertex::GetEndGraphVertex(const debruijn_graph::Graph &g) const { + return g.EdgeEnd(edge_); +} +std::string EdgeIdVertex::str(const debruijn_graph::Graph &g) const { + return g.str(edge_); +} +double EdgeIdVertex::GetCoverageFromGraph(const debruijn_graph::Graph &g) const { + return g.coverage(edge_); +} + +path_extend::BidirectionalPath* EdgeIdVertex::ToPath(const debruijn_graph::Graph &g) const { + //fixme think of a better way + VERIFY(false); + path_extend::BidirectionalPath* result; +// path_extend::BidirectionalPath* result = new path_extend::BidirectionalPath (g); +// path_extend::Gap gap(0); +// result->PushBack(edge_, gap); + return result; +} +debruijn_graph::EdgeId EdgeIdVertex::GetLastEdge() const { + return edge_; +} +debruijn_graph::EdgeId EdgeIdVertex::GetFirstEdge() const { + return edge_; +} +boost::optional EdgeIdVertex::GetLastEdgeWithPredicate( + const func::TypedPredicate &pred) const { + boost::optional result; + if (pred(edge_)) { + result = edge_; + } + return result; +} +boost::optional EdgeIdVertex::GetFirstEdgeWithPredicate( + const func::TypedPredicate &pred) const { + boost::optional result; + if (pred(edge_)) { + result = edge_; + } + return result; +} +std::unordered_set EdgeIdVertex::GetAllEdges() const { + std::unordered_set result; + result.insert(edge_); + return result; +} +std::string EdgeIdVertex::GetSequence(const debruijn_graph::Graph &g) const { + return g.EdgeNucls(edge_).str(); +} + +size_t EdgeIdVertex::GetSize() const { + return 1; +} + +size_t PathVertex::GetId() const { + return path_->GetId(); +} +size_t PathVertex::GetLengthFromGraph(const debruijn_graph::Graph &/*g*/) const { + return path_->Length(); +} +std::shared_ptr PathVertex::GetConjugateFromGraph(const debruijn_graph::Graph &/*g*/) const { + return std::make_shared(get()->GetConjPath()); +} +PathVertex::PathVertex(path_extend::BidirectionalPath *path_) : path_(path_) {} +path_extend::BidirectionalPath *PathVertex::get() const { + return path_; +} +ScaffoldVertexT PathVertex::GetType() const { + return Path; +} +debruijn_graph::VertexId PathVertex::GetEndGraphVertex(const debruijn_graph::Graph &g) const { + VERIFY(path_->Size() > 0); + return g.EdgeEnd(path_->Back()); +} +debruijn_graph::VertexId PathVertex::GetStartGraphVertex(const debruijn_graph::Graph &g) const { + VERIFY(path_->Size() > 0); + return g.EdgeStart(path_->Front()); +} +std::string PathVertex::str(const debruijn_graph::Graph &/*g*/) const { + return path_->str(); +} +double PathVertex::GetCoverageFromGraph(const debruijn_graph::Graph &/*g*/) const { + return path_->Coverage(); +} +path_extend::BidirectionalPath* PathVertex::ToPath(const debruijn_graph::Graph &/*g*/) const { + return path_; +} +debruijn_graph::EdgeId PathVertex::GetLastEdge() const { + const size_t path_size = path_->Size(); + VERIFY(path_size > 0); + return path_->Back(); +} +debruijn_graph::EdgeId PathVertex::GetFirstEdge() const { + const size_t path_size = path_->Size(); + VERIFY(path_size > 0); + return path_->Front(); +} +boost::optional PathVertex::GetLastEdgeWithPredicate( + const func::TypedPredicate &pred) const { + boost::optional result; + for (int i = static_cast(path_->Size()) - 1; i >= 0; --i) { + EdgeId current = path_->At(i); + if (pred(current)) { + result = current; + return result; + } + } + return result; +} +boost::optional PathVertex::GetFirstEdgeWithPredicate( + const func::TypedPredicate &pred) const { + boost::optional result; + for (size_t i = 0; i < path_->Size(); ++i) { + EdgeId current = path_->At(i); + if (pred(current)) { + result = current; + return result; + } + } + return result; +} +std::unordered_set PathVertex::GetAllEdges() const { + std::unordered_set result; + for (const auto &edge: *path_) { + result.insert(edge); + } + return result; +} +std::string PathVertex::GetSequence(const debruijn_graph::Graph &g) const { + path_extend::ScaffoldSequenceMaker sequence_maker(g); + return sequence_maker.MakeSequence(*path_); +} + +size_t PathVertex::GetSize() const { + return path_->Size(); +} + +ScaffoldVertex::ScaffoldVertex(std::shared_ptr vertex_ptr_) : vertex_ptr_(vertex_ptr_) {} +size_t ScaffoldVertex::int_id() const { + return vertex_ptr_->GetId(); +} +size_t ScaffoldVertex::GetLengthFromGraph(const debruijn_graph::Graph &g) const { + return vertex_ptr_->GetLengthFromGraph(g); +} +ScaffoldVertex ScaffoldVertex::GetConjugateFromGraph(const debruijn_graph::Graph &g) const { + auto inner_vertex = vertex_ptr_->GetConjugateFromGraph(g); + ScaffoldVertex result(inner_vertex); + return result; +} +ScaffoldVertexT ScaffoldVertex::GetType() const { + return vertex_ptr_->GetType(); +} +debruijn_graph::VertexId ScaffoldVertex::GetStartGraphVertex(const debruijn_graph::Graph &g) const { + return vertex_ptr_->GetStartGraphVertex(g); +} +debruijn_graph::VertexId ScaffoldVertex::GetEndGraphVertex(const debruijn_graph::Graph &g) const { + return vertex_ptr_->GetEndGraphVertex(g); +} +ScaffoldVertex::ScaffoldVertex(EdgeId edge) : vertex_ptr_(std::make_shared(edge)) {} +ScaffoldVertex::ScaffoldVertex(path_extend::BidirectionalPath *path) : vertex_ptr_(std::make_shared(path)) {} +bool ScaffoldVertex::operator==(const ScaffoldVertex &rhs) const { + return GetType() == rhs.GetType() and int_id() == rhs.int_id(); +} +bool ScaffoldVertex::operator!=(const ScaffoldVertex &rhs) const { + return !(rhs == *this); +} +bool ScaffoldVertex::operator<(const ScaffoldVertex &rhs) const { + return GetType() < rhs.GetType() or (GetType() == rhs.GetType() and int_id() < rhs.int_id()); +} +bool ScaffoldVertex::operator>(const ScaffoldVertex &rhs) const { + return rhs < *this; +} +bool ScaffoldVertex::operator<=(const ScaffoldVertex &rhs) const { + return !(rhs < *this); +} +bool ScaffoldVertex::operator>=(const ScaffoldVertex &rhs) const { + return !(*this < rhs); +} +std::string ScaffoldVertex::str(const debruijn_graph::Graph &g) const { + return vertex_ptr_->str(g); +} +double ScaffoldVertex::GetCoverageFromGraph(const debruijn_graph::Graph &g) const { + return vertex_ptr_->GetCoverageFromGraph(g); +} +std::shared_ptr ScaffoldVertex::GetInnerVertex() const { + return vertex_ptr_; +} +path_extend::BidirectionalPath* ScaffoldVertex::ToPath(const debruijn_graph::Graph &g) const { + return vertex_ptr_->ToPath(g); +} +debruijn_graph::EdgeId ScaffoldVertex::GetFirstEdge() const { + return vertex_ptr_->GetFirstEdge(); +} +debruijn_graph::EdgeId ScaffoldVertex::GetLastEdge() const { + return vertex_ptr_->GetLastEdge(); +} +ScaffoldVertex::ScaffoldVertex(): vertex_ptr_(nullptr) {} +boost::optional ScaffoldVertex::GetLastEdgeWithPredicate(const func::TypedPredicate &pred) const { + return vertex_ptr_->GetLastEdgeWithPredicate(pred); +} +boost::optional ScaffoldVertex::GetFirstEdgeWithPredicate(const func::TypedPredicate &pred) const { + return vertex_ptr_->GetFirstEdgeWithPredicate(pred); +} +std::unordered_set ScaffoldVertex::GetAllEdges() const { + return vertex_ptr_->GetAllEdges(); +} +std::string ScaffoldVertex::GetSequence(const debruijn_graph::Graph &g) const { + return vertex_ptr_->GetSequence(g); +} +size_t ScaffoldVertex::GetSize() const { + return vertex_ptr_->GetSize(); +} +} diff --git a/src/common/auxiliary_graphs/scaffold_graph/scaffold_vertex.hpp b/src/common/auxiliary_graphs/scaffold_graph/scaffold_vertex.hpp new file mode 100644 index 0000000000..9968753ef9 --- /dev/null +++ b/src/common/auxiliary_graphs/scaffold_graph/scaffold_vertex.hpp @@ -0,0 +1,182 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "assembly_graph/paths/bidirectional_path.hpp" +#include "assembly_graph/core/graph.hpp" + +#include "boost/optional/optional.hpp" + +#include + +namespace scaffold_graph { + +enum ScaffoldVertexT { Edge = 0, Path = 1 }; + +class InnerScaffoldVertex { + public: + using EdgeId = debruijn_graph::EdgeId; + + virtual ~InnerScaffoldVertex() = default; + + virtual size_t GetId() const = 0; + virtual ScaffoldVertexT GetType() const = 0; + virtual size_t GetLengthFromGraph(const debruijn_graph::Graph &g) const = 0; + virtual double GetCoverageFromGraph(const debruijn_graph::Graph &g) const = 0; + virtual std::shared_ptr GetConjugateFromGraph(const debruijn_graph::Graph &g) const = 0; + virtual debruijn_graph::VertexId GetEndGraphVertex(const debruijn_graph::Graph &g) const = 0; + virtual debruijn_graph::VertexId GetStartGraphVertex(const debruijn_graph::Graph &g) const = 0; + virtual boost::optional GetLastEdgeWithPredicate(const func::TypedPredicate &pred) const = 0; + virtual boost::optional GetFirstEdgeWithPredicate(const func::TypedPredicate &pred) const = 0; + virtual std::string GetSequence(const debruijn_graph::Graph &g) const = 0; + virtual size_t GetSize() const = 0; + + virtual debruijn_graph::EdgeId GetLastEdge() const = 0; + virtual EdgeId GetFirstEdge() const = 0; + virtual std::unordered_set GetAllEdges() const = 0; + + virtual path_extend::BidirectionalPath* ToPath(const debruijn_graph::Graph &g) const = 0; + virtual std::string str(const debruijn_graph::Graph& g) const = 0; +}; + +class EdgeIdVertex : public InnerScaffoldVertex { + private: + debruijn_graph::EdgeId edge_; + + public: + explicit EdgeIdVertex(EdgeId edge_); + + size_t GetId() const override; + ScaffoldVertexT GetType() const override; + size_t GetLengthFromGraph(const debruijn_graph::Graph &g) const override; + double GetCoverageFromGraph(const debruijn_graph::Graph &g) const override; + std::shared_ptr GetConjugateFromGraph(const debruijn_graph::Graph &g) const override; + debruijn_graph::VertexId GetEndGraphVertex(const debruijn_graph::Graph &g) const override; + debruijn_graph::VertexId GetStartGraphVertex(const debruijn_graph::Graph &g) const override; + boost::optional GetLastEdgeWithPredicate(const func::TypedPredicate &pred) const override; + boost::optional GetFirstEdgeWithPredicate(const func::TypedPredicate &pred) const override; + std::string GetSequence(const debruijn_graph::Graph &g) const override; + size_t GetSize() const override; + + EdgeId GetLastEdge() const override; + EdgeId GetFirstEdge() const override; + std::unordered_set GetAllEdges() const override; + + std::string str(const debruijn_graph::Graph &g) const override; + path_extend::BidirectionalPath* ToPath(const debruijn_graph::Graph &g) const override; + + EdgeId get() const; +}; + +class PathVertex : public InnerScaffoldVertex { + private: + path_extend::BidirectionalPath *path_; + + public: + using VertexId = debruijn_graph::VertexId; + using InnerScaffoldVertex::EdgeId; + + explicit PathVertex(path_extend::BidirectionalPath *path_); + + size_t GetId() const override; + ScaffoldVertexT GetType() const override; + size_t GetLengthFromGraph(const debruijn_graph::Graph &g) const override; + double GetCoverageFromGraph(const debruijn_graph::Graph &g) const override; + std::shared_ptr GetConjugateFromGraph(const debruijn_graph::Graph &g) const override; + VertexId GetEndGraphVertex(const debruijn_graph::Graph &g) const override; + VertexId GetStartGraphVertex(const debruijn_graph::Graph &g) const override; + boost::optional GetLastEdgeWithPredicate(const func::TypedPredicate &pred) const override; + boost::optional GetFirstEdgeWithPredicate(const func::TypedPredicate &pred) const override; + std::string GetSequence(const debruijn_graph::Graph &g) const override; + size_t GetSize() const override; + + EdgeId GetLastEdge() const override; + EdgeId GetFirstEdge() const override; + std::unordered_set GetAllEdges() const override; + + std::string str(const debruijn_graph::Graph &g) const override; + path_extend::BidirectionalPath* ToPath(const debruijn_graph::Graph &g) const override; + + path_extend::BidirectionalPath *get() const; +}; + +class ScaffoldVertex { + std::shared_ptr vertex_ptr_; + + public: + using EdgeId = debruijn_graph::EdgeId; + using VertexId = debruijn_graph::VertexId; + + explicit ScaffoldVertex(std::shared_ptr vertex_ptr_); + + ScaffoldVertex(const ScaffoldVertex& other) = default; + + //make implicit for easy scaffold edge construction + ScaffoldVertex(EdgeId edge); + ScaffoldVertex(path_extend::BidirectionalPath *path); + + ScaffoldVertex(); + + //deviate from surrounding style to make compatible with generic graph algorithms + size_t int_id() const; + + ScaffoldVertexT GetType() const; + size_t GetLengthFromGraph(const debruijn_graph::Graph &g) const; + double GetCoverageFromGraph(const debruijn_graph::Graph &g) const; + ScaffoldVertex GetConjugateFromGraph(const debruijn_graph::Graph &g) const; + VertexId GetEndGraphVertex(const debruijn_graph::Graph &g) const; + VertexId GetStartGraphVertex(const debruijn_graph::Graph &g) const; + boost::optional GetLastEdgeWithPredicate(const func::TypedPredicate &pred) const; + boost::optional GetFirstEdgeWithPredicate(const func::TypedPredicate &pred) const; + std::string GetSequence(const debruijn_graph::Graph &g) const; + size_t GetSize() const; + + EdgeId GetLastEdge() const; + EdgeId GetFirstEdge() const; + std::unordered_set GetAllEdges() const; + + std::string str(const debruijn_graph::Graph &g) const; + path_extend::BidirectionalPath* ToPath(const debruijn_graph::Graph &g) const; + + std::shared_ptr GetInnerVertex() const; + + bool operator==(const ScaffoldVertex &rhs) const; + bool operator!=(const ScaffoldVertex &rhs) const; + bool operator<(const ScaffoldVertex &rhs) const; + bool operator>(const ScaffoldVertex &rhs) const; + bool operator<=(const ScaffoldVertex &rhs) const; + bool operator>=(const ScaffoldVertex &rhs) const; +}; + +class EdgeGetter { + public: + debruijn_graph::EdgeId GetEdgeFromScaffoldVertex(const ScaffoldVertex& vertex) { + VERIFY_DEV(vertex.GetType() == Edge); + auto inner_vertex = std::static_pointer_cast(vertex.GetInnerVertex()); + return inner_vertex->get(); + } +}; + +} + +namespace std { +template<> +struct hash { + size_t operator()(const scaffold_graph::ScaffoldVertex& vertex) const { + return vertex.int_id(); + } +}; + +template<> +struct less { + bool operator()(const scaffold_graph::ScaffoldVertex& lhs, + const scaffold_graph::ScaffoldVertex& rhs) const { + return lhs < rhs; + } +}; + +} diff --git a/src/common/barcode_index/barcode_index.hpp b/src/common/barcode_index/barcode_index.hpp new file mode 100644 index 0000000000..14dac6f1f6 --- /dev/null +++ b/src/common/barcode_index/barcode_index.hpp @@ -0,0 +1,823 @@ +//*************************************************************************** +//* Copyright (c) 2017-2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "assembly_graph/core/graph.hpp" +#include "assembly_graph/index/edge_index_builders.hpp" +#include "assembly_graph/paths/mapping_path.hpp" +#include "io/binary/binary.hpp" +#include "io/reads/paired_readers.hpp" +#include "sequence/range.hpp" + +#include +#include + +using std::string; +using std::istringstream; +using namespace omnigraph; + +namespace barcode_index { +typedef RtSeq Kmer; + +class FrameBarcodeIndexBuilder; +template +class BarcodeIndexInfoExtractor; +typedef uint64_t BarcodeId; + +/** + This class provides partial interface to BarcodeIndex. +*/ +template +class AbstractBarcodeIndex { +public: + typedef typename Graph::EdgeId EdgeId; + +protected: + const Graph& g_; +public: + AbstractBarcodeIndex (const Graph &g) : + g_(g) {} + virtual ~AbstractBarcodeIndex() {} + + //Number of entries in the barcode map. Currently equals to the number of edges. + virtual size_t size() const = 0; + + //Number of barcodes on the beginning/end of the edge + virtual size_t GetBarcodeNumber(const EdgeId &edge) const = 0; + + virtual void ReadEntry(std::ifstream& fin, const EdgeId& edge) = 0; + virtual void WriteEntry(std::ofstream& fin, const EdgeId& edge) = 0; + + //Remove low abundant barcodes + virtual void Filter(size_t abundancy_threshold, size_t gap_threshold) = 0; + + virtual bool IsEmpty() = 0; + +}; + +template +class ConcurrentBarcodeIndexBuffer { + public: + typedef typename Graph::EdgeId EdgeId; + typedef libcuckoo::cuckoohash_map StorageMap; + + ConcurrentBarcodeIndexBuffer(const Graph &g) : g_(g), edge_to_entry_() {} + virtual ~ConcurrentBarcodeIndexBuffer() {clear();} + + void clear() { + edge_to_entry_.clear(); + } + + typename StorageMap::locked_table lock_table() { + return edge_to_entry_.lock_table(); + } + + void InsertEntry(EdgeEntryT &&entry) { + edge_to_entry_.insert(entry); + } + + void InsertBarcode(const BarcodeId &barcode, const EdgeId &edge, size_t count, const Range &range) { + edge_to_entry_.update_fn(edge, + [&](EdgeEntryT &second) { + second.InsertBarcode(barcode, count, range); + }); + } + protected: + const Graph &g_; + StorageMap edge_to_entry_; +}; + +/** + * BarcodeIndex stores information provided by alignment of read clouds to the graph. + * For every edge we store barcoded reads which are contained on the edge along with additional info. + * Read cloud is represented by its barcode + * The edge contains the cloud if there is a read barcoded by cloud's barcode which is aligned to the edge. + * Info example: FrameBarcodeInfo + */ +template +class BarcodeIndex: public AbstractBarcodeIndex { +friend class BarcodeIndexInfoExtractor; + +public: + typedef typename Graph::EdgeId EdgeId; + typedef typename Graph::VertexId VertexId; + typedef typename omnigraph::IterationHelper edge_it_helper; + typedef std::unordered_map barcode_map_t; + + BarcodeIndex (const Graph &g) : + AbstractBarcodeIndex(g), + edge_to_entry_(), + number_of_barcodes_(0) + {} + + BarcodeIndex (const BarcodeIndex& other) = default; + + virtual ~BarcodeIndex() {} + + size_t size() const { + return edge_to_entry_.size(); + } + + bool empty() const { + return size() == 0; + } + + typename barcode_map_t::iterator begin() noexcept { + return edge_to_entry_.begin(); + } + typename barcode_map_t::iterator end() noexcept { + return edge_to_entry_.end(); + } + typename barcode_map_t::iterator begin() const noexcept { + return edge_to_entry_.begin(); + } + typename barcode_map_t::iterator end() const noexcept { + return edge_to_entry_.end(); + } + typename barcode_map_t::const_iterator cbegin() const noexcept { + return edge_to_entry_.cbegin(); + } + typename barcode_map_t::const_iterator cend() const noexcept { + return edge_to_entry_.cend(); + } + + size_t GetBarcodeNumber(const EdgeId &edge) const override { + return GetEntry(edge).Size(); + } + + bool IsEmpty() override { + return size() == 0; + } + + //Delete low abundant barcodes from every edge + void Filter(size_t trimming_threshold, size_t gap_threshold) override { + for (auto entry = edge_to_entry_.begin(); entry != edge_to_entry_.end(); ++entry) { + entry->second.Filter(trimming_threshold, gap_threshold); + } + } + + void ReadEntry (std::ifstream& fin, const EdgeId& edge) override { + DEBUG("Reading entry") + DEBUG("Edge: " << edge.int_id()); + DEBUG("Length: " << g_.length(edge)); + edge_to_entry_[edge].Deserialize(fin); + } + void WriteEntry (std::ofstream& fout, const EdgeId& edge) override { + fout << g_.int_id(edge) << std::endl; + GetEntry(edge).Serialize(fout); + } + + virtual void BinRead(std::istream &str) { + using io::binary::BinRead; + + edge_to_entry_.clear(); + size_t size; + BinRead(str, size); + for (size_t i = 0; i < size; ++i) { + EdgeId edge_id = BinRead(str); + auto entry = BinRead(str); + edge_to_entry_.insert({std::move(edge_id), std::move(entry)}); + } + } + virtual void BinWrite(std::ostream &str) const { + using io::binary::BinWrite; + BinWrite(str, edge_to_entry_.size()); + for (const auto &edge_and_entry: edge_to_entry_) { + BinWrite(str, edge_and_entry.first.int_id(), edge_and_entry.second); + } + } + + typename barcode_map_t::const_iterator GetEntryTailsIterator(const EdgeId& edge) const { + return edge_to_entry_.find(g_.conjugate(edge)); + } + typename barcode_map_t::const_iterator GetEntryHeadsIterator(const EdgeId& edge) const { + return edge_to_entry_.find(edge); + } + + void InsertEntry(const EdgeId &edge, const EdgeEntryT &entry) { + edge_to_entry_.insert({edge, entry}); + } + const EdgeEntryT& GetEntry(const EdgeId &edge) const { + return edge_to_entry_.at(edge); + } + + void MoveAssign(ConcurrentBarcodeIndexBuffer &from) { + this->edge_to_entry_.clear(); + auto locked_table = from.lock_table(); + DEBUG(locked_table.size()); + for (auto& kvpair : locked_table) { + TRACE(kvpair.first.int_id()); + TRACE("Length: " << g_.length(kvpair.first)); + TRACE(kvpair.second.Size() << " barcodes"); + this->edge_to_entry_[kvpair.first] = std::move(kvpair.second); + } + } + + void MoveUpdate(ConcurrentBarcodeIndexBuffer &from) { + auto locked_table = from.lock_table(); + for (auto &kvpair: locked_table) { + this->edge_to_entry_[kvpair.first].MoveUpdate(kvpair.second); + } + } + + void Update(ConcurrentBarcodeIndexBuffer &from) { + auto locked_table = from.lock_table(); + for (const auto &kvpair: locked_table) { + this->edge_to_entry_[kvpair.first].Update(kvpair.second); + } + } + + void SetNumberOfBarcodes(size_t number_of_barcodes) { + number_of_barcodes_ = number_of_barcodes; + } + size_t GetNumberOfBarcodes() { + return number_of_barcodes_; + } + + const Graph& GetGraph() const { + return g_; + } + + protected: + using AbstractBarcodeIndex::g_; + barcode_map_t edge_to_entry_; + size_t number_of_barcodes_; + + DECL_LOGGER("BarcodeIndex"); +}; + +class SimpleBarcodeInfo { + size_t count_; + Range range_; +public: + SimpleBarcodeInfo(): count_(0), range_() {} + SimpleBarcodeInfo(size_t count, const Range& range): count_(count), range_(range) {} + + void Update(size_t count, const Range& range) { + count_ += count; + range_.start_pos = std::min(range_.start_pos, range.start_pos); + range_.end_pos = std::max(range_.end_pos, range.end_pos); + } + + void Update(const SimpleBarcodeInfo& other) { + count_ += other.GetCount(); + Range range; + range_.start_pos = std::min(range_.start_pos, other.GetRange().start_pos); + range_.end_pos = std::max(range_.end_pos, other.GetRange().end_pos); + } + + size_t GetCount() const { + return count_; + } + + Range GetRange() const { + return range_; + } + friend std::ostream& operator <<(std::ostream& os, const SimpleBarcodeInfo& info); + friend std::istream& operator >>(std::istream& is, SimpleBarcodeInfo& info); +}; + +inline std::ostream& operator <<(std::ostream& os, const SimpleBarcodeInfo& info) +{ + os << info.count_ << " " << info.range_.start_pos << " " << info.range_.end_pos; + return os; +} + +inline std::istream& operator >>(std::istream& os, SimpleBarcodeInfo& info) +{ + size_t range_start; + size_t range_end; + os >> info.count_; + os >> range_start; + os >> range_end; + info.range_ = Range(range_start, range_end); + return os; +} + +/** + * FrameBarcodeInfo approximates the read cloud defined by the barcode and the edge. + * The edge is split into several bins. + * Bin is barcoded iff there is at least one barcoded read which aligns to the bin. + * + * We store the set of barcoded bins and the number of reads aligned to the edge. + */ +class FrameBarcodeInfo { +public: + /** + * + * @param frames Number of bin in the edge + * @return empty info + */ + FrameBarcodeInfo(size_t frames = 0): count_(0), covered_bins_(), leftmost_index_(frames), rightmost_index_(0) {} + + void Update(size_t count, size_t left_frame, size_t right_frame) { + count_ += count; + for (size_t i = left_frame; i <= right_frame; ++i) { + covered_bins_.insert(i); + } + leftmost_index_ = std::min(left_frame, leftmost_index_); + rightmost_index_ = std::max(right_frame, rightmost_index_); + } + + void Update(const FrameBarcodeInfo& other) { + TRACE(count_); + TRACE(other.count_); + TRACE(covered_bins_.size()); + TRACE(other.covered_bins_.size()); + for (const auto &frame: other.covered_bins_) { + covered_bins_.insert(frame); + } + leftmost_index_ = std::min(leftmost_index_, other.leftmost_index_); + rightmost_index_ = std::max(rightmost_index_, other.rightmost_index_); + count_ += other.count_; + } + + /** + * @return number of barcoded reads aligned to the edge + */ + size_t GetCount() const { + return count_; + } + + /** + * @return Leftmost barcoded bin + */ + size_t GetLeftMost() const { + return leftmost_index_; + } + + /** + * @return Rightmost barcoded bin + */ + size_t GetRightMost() const { + return rightmost_index_; + } + + /** + * + * @return number of barcoded bins + */ + size_t GetCovered() const { + return covered_bins_.size(); + } + + void SetCount(size_t count) { + count_ = count; + } + + void SetLeftMost(size_t index) { + leftmost_index_ = index; + } + + void SetRightMost(size_t index) { + rightmost_index_ = index; + } + + void BinRead(std::istream &str) { + using io::binary::BinRead; + auto count = BinRead(str); + SetCount(count); + + auto num_positions = BinRead(str); + size_t min_pos = std::numeric_limits::max(); + size_t max_pos = 0; + for (size_t i = 0; i < num_positions; ++i) { + auto pos = BinRead(str); + TRACE("Position: " << pos); + covered_bins_.insert(pos); + if (pos < min_pos) { + min_pos = pos; + } + if (pos > max_pos) { + max_pos = pos; + } + } + + SetLeftMost(min_pos); + SetRightMost(max_pos); + TRACE("Leftmost: " << GetLeftMost()); + TRACE("Rightmost: " << GetRightMost()); + } + + void BinWrite(std::ostream &str) const { + using io::binary::BinWrite; + BinWrite(str, GetCount()); + BinWrite(str, GetCovered()); + + for (const size_t &pos: covered_bins_) { + BinWrite(str, pos); + } + } + + + friend std::ostream& operator <<(std::ostream& os, const FrameBarcodeInfo& info); + friend std::istream& operator >>(std::istream& is, FrameBarcodeInfo& info); + + private: + /** + * Number of reads aligned to the edge + */ + size_t count_; + /** + * Bins covered by the barcode + */ + std::unordered_set covered_bins_; + /** + * Leftmost barcoded bin + */ + size_t leftmost_index_; + /** + * Rightmost barcoded bin + */ + size_t rightmost_index_; + + DECL_LOGGER("FrameBarcodeInfo"); +}; + +inline std::ostream& operator <<(std::ostream& os, const FrameBarcodeInfo& info) +{ + os << info.count_ << " " << info.covered_bins_.size(); + for (const auto &bin: info.covered_bins_) { + os << bin << " "; + } + return os; +} + +inline std::istream& operator >>(std::istream& is, FrameBarcodeInfo& info) +{ + using io::binary::BinRead; + size_t count; + is >> count; + info.SetCount(count); + + size_t num_of_bins; + is >> num_of_bins; + size_t min_pos = std::numeric_limits::max(); + size_t max_pos = 0; + for (size_t i = 0; i < num_of_bins; ++i) { + size_t pos = 0; + is >> pos; + TRACE("Position: " << pos); + if (pos < min_pos) { + min_pos = pos; + } + if (pos > max_pos) { + max_pos = pos; + } + info.covered_bins_.insert(pos); + } + info.SetLeftMost(min_pos); + info.SetRightMost(max_pos); + TRACE("Leftmost: " << info.GetLeftMost()); + TRACE("Rightmost: " << info.GetRightMost()); + return is; +} + +template +class EdgeEntry { +public: + typedef typename Graph::EdgeId EdgeId; + typedef std::map barcode_distribution_t; + typedef EntryInfoT barcode_info_t; + + EdgeEntry(): + edge_(), barcode_distribution_() {}; + EdgeEntry(const EdgeId& edge) : + edge_(edge), barcode_distribution_() {} + + virtual ~EdgeEntry() {} + + const barcode_distribution_t& GetDistribution() const { + return barcode_distribution_; + } + + EdgeId GetEdge() const { + return edge_; + } + + size_t Size() const { + return barcode_distribution_.size(); + } + + virtual void Serialize(std::ofstream& fout) const { + SerializeDistribution(fout); + } + + virtual void Deserialize(std::ifstream& fin) { + DeserializeDistribution(fin); + } + + typename barcode_distribution_t::const_iterator begin() const { + return barcode_distribution_.begin(); + } + + typename barcode_distribution_t::const_iterator end() const { + return barcode_distribution_.end(); + } + + typename barcode_distribution_t::const_iterator cbegin() const { + return barcode_distribution_.cbegin(); + } + + typename barcode_distribution_t::const_iterator cend() const { + return barcode_distribution_.cend(); + } + + bool has_barcode(const BarcodeId& barcode) const { + return barcode_distribution_.find(barcode) != barcode_distribution_.end(); + } + + typename barcode_distribution_t::const_iterator get_barcode(const BarcodeId& barcode) const { + return barcode_distribution_.find(barcode); + } + + virtual void BinRead(std::istream &str) { + barcode_distribution_ = io::binary::BinRead(str); + } + + virtual void BinWrite(std::ostream &str) const { + io::binary::BinWrite(str, barcode_distribution_); + } + +protected: + void SerializeDistribution(std::ofstream &fout) const { + fout << barcode_distribution_.size() << std::endl; + for (auto entry : barcode_distribution_) { + fout << entry.first << ' ' << entry.second << std::endl; + } + } + + void DeserializeDistribution(std::ifstream &fin) { + size_t distr_size; + fin >> distr_size; + for (size_t i = 0; i < distr_size; ++i) { + uint64_t int_id; + EntryInfoT info; + fin >> int_id >> info; + BarcodeId bid(int_id); + InsertInfo(bid, info); + } + } + + void InsertInfo(const BarcodeId &barcode, const barcode_info_t &info) { + auto barcode_result = barcode_distribution_.find(barcode); + if (barcode_result == barcode_distribution_.end()) { + barcode_distribution_.insert({barcode, info}); + } + else { + barcode_result->second.Update(info); + } + } + virtual void InsertBarcode(const BarcodeId &code, const size_t count, const Range &range) = 0; + + EdgeId edge_; + barcode_distribution_t barcode_distribution_; +}; + +template +class SimpleEdgeEntry : public EdgeEntry { + friend class BarcodeIndex; + friend class BarcodeIndexInfoExtractor; +protected: + typedef typename Graph::EdgeId EdgeId; + using EdgeEntry::barcode_distribution_t; + using EdgeEntry::barcode_distribution_; + using EdgeEntry::edge_; + +public: + SimpleEdgeEntry(): + EdgeEntry() {} + SimpleEdgeEntry(const EdgeId& edge) : + EdgeEntry(edge) {} + + ~SimpleEdgeEntry() {} + + void Filter(size_t trimming_threshold, size_t gap_threshold) { + for (auto it = barcode_distribution_.begin(); it != barcode_distribution_.end() ;) { + if (IsLowReadCount(trimming_threshold, it->second) or + IsFarFromEdgeHead(gap_threshold, it->second)) { + barcode_distribution_.erase(it++); + } + else { + ++it; + } + } + } + +protected: + void InsertBarcode(const BarcodeId& barcode, const size_t count, const Range& range) { + if (barcode_distribution_.find(barcode) == barcode_distribution_.end()) { + SimpleBarcodeInfo info(count, range); + barcode_distribution_.insert({barcode, info}); + } + else { + barcode_distribution_.at(barcode).Update(count, range); + } + } + + bool IsFarFromEdgeHead(size_t gap_threshold, const SimpleBarcodeInfo& info) { + return info.GetRange().start_pos > gap_threshold; + } + + bool IsLowReadCount(size_t trimming_threshold, const SimpleBarcodeInfo& info) { + return info.GetCount() < trimming_threshold; + } +}; + +template +class FrameEdgeEntry : public EdgeEntry { + friend class BarcodeIndex; + friend class FrameBarcodeIndexBuilder; + friend class BarcodeIndexInfoExtractor; + friend class ConcurrentBarcodeIndexBuffer; +protected: + typedef typename Graph::EdgeId EdgeId; + using EdgeEntry::barcode_distribution_t; + using EdgeEntry::barcode_distribution_; + using EdgeEntry::edge_; + size_t edge_length_; + size_t frame_size_; + size_t number_of_frames_; + +public: + FrameEdgeEntry(): + EdgeEntry(), + edge_length_(0), + frame_size_(0), + number_of_frames_(0) {} + FrameEdgeEntry(const EdgeId& edge, size_t edge_length, size_t frame_size) : + EdgeEntry(edge), + edge_length_(edge_length), + frame_size_(frame_size), + number_of_frames_(edge_length / frame_size + 1) {} + + ~FrameEdgeEntry() {} + + void Filter(size_t trimming_threshold, size_t gap_threshold) { + for (auto it = barcode_distribution_.begin(); it != barcode_distribution_.end() ;) { + if (IsLowReadCount(trimming_threshold, it->second) or + IsFarFromEdgeHead(gap_threshold, it->second)) { + barcode_distribution_.erase(it++); + } + else { + ++it; + } + } + } + + size_t GetFrameSize() const { + return frame_size_; + } + + size_t GetNumberOfFrames() const { + return number_of_frames_; + } + + void BinRead(std::istream &str) override { + using io::binary::BinRead; + edge_length_ = BinRead(str); + frame_size_ = BinRead(str); + number_of_frames_ = BinRead(str); + + barcode_distribution_.clear(); + size_t size; + BinRead(str, size); + for (size_t i = 0; i < size; ++i) { + BarcodeId barcode; + FrameBarcodeInfo info(number_of_frames_); + BinRead(str, barcode, info); + barcode_distribution_.insert({std::move(barcode), std::move(info)}); + } + } + + void BinWrite(std::ostream &str) const override { + using io::binary::BinWrite; + BinWrite(str, edge_length_); + BinWrite(str, frame_size_); + BinWrite(str, number_of_frames_); + + size_t size = barcode_distribution_.size(); + BinWrite(str, size); + for (const auto &entry : barcode_distribution_) { + BinWrite(str, entry.first, entry.second); + } + } + +protected: + void InsertBarcode(const BarcodeId& barcode, const size_t count, const Range& range) override { + DEBUG("Inserting barcode"); + if (barcode_distribution_.find(barcode) == barcode_distribution_.end()) { + FrameBarcodeInfo info(number_of_frames_); + barcode_distribution_.insert({barcode, info}); + } + size_t left_frame = GetFrameFromPos(range.start_pos); + size_t right_frame = GetFrameFromPos(range.end_pos); + DEBUG("Range: " << range); + DEBUG("Frames: " << left_frame << " " << right_frame); + DEBUG("Count: " << count); + VERIFY_DEV(barcode_distribution_.find(barcode) != barcode_distribution_.end()); + barcode_distribution_.at(barcode).Update(count, left_frame, right_frame); + } + + void MoveUpdate(FrameEdgeEntry &other) { + for (auto it = other.begin(); it != other.end(); ++it) { + barcode_distribution_[it->first] = std::move(it->second); + } + } + + void Update(const FrameEdgeEntry &other) { + for (auto it = other.begin(); it != other.end(); ++it) { + barcode_distribution_[it->first] = it->second; + } + } + + bool IsFarFromEdgeHead(size_t gap_threshold, const FrameBarcodeInfo& info) { + return info.GetLeftMost() > gap_threshold / frame_size_; + } + + bool IsLowReadCount(size_t trimming_threshold, const FrameBarcodeInfo& info) { + return info.GetCount() < trimming_threshold; + } + + void SetFrameSize(size_t frame_size) { + frame_size_ = frame_size; + } + +private: + //fixme last frame is larger than the others + size_t GetFrameFromPos(size_t pos) { + return pos / frame_size_; + } + + DECL_LOGGER("FrameEdgeEntry"); +}; + +template +class FrameConcurrentBarcodeIndexBuffer: public ConcurrentBarcodeIndexBuffer> { + public: + FrameConcurrentBarcodeIndexBuffer(const Graph &g, size_t frame_size): + ConcurrentBarcodeIndexBuffer>(g), frame_size_(frame_size) { + } + + void InitialFillMap() { + VERIFY_DEV(frame_size_ != 0); + VERIFY_DEV(edge_to_entry_.empty()); + for (const debruijn_graph::EdgeId &edge: g_.canonical_edges()) { + edge_to_entry_.insert(edge, FrameEdgeEntry(edge, g_.length(edge), frame_size_)); + debruijn_graph::EdgeId conj = g_.conjugate(edge); + edge_to_entry_.insert(conj, FrameEdgeEntry(conj, g_.length(edge), frame_size_)); + } + } + + size_t GetFrameSize() { + return frame_size_; + } + + private: + using ConcurrentBarcodeIndexBuffer>::g_; + using ConcurrentBarcodeIndexBuffer>::edge_to_entry_; + size_t frame_size_; +}; + +template +class FrameBarcodeIndex: public BarcodeIndex> { + friend class FrameBarcodeIndexBuilder; + friend class BarcodeIndexInfoExtractor>; + public: + using BarcodeIndex>::barcode_map_t; + typedef typename Graph::EdgeId EdgeId; + typedef typename omnigraph::IterationHelper edge_it_helper; + + FrameBarcodeIndex(const Graph &g, size_t frame_size): + BarcodeIndex>(g), frame_size_(frame_size) { + } + + size_t GetFrameSize() const { + return frame_size_; + } + + void SetFrameSize(size_t frame_size) { + VERIFY_DEV(frame_size_ == 0); + frame_size_ = frame_size; + } + + void InitialFillMap() { + VERIFY_DEV(frame_size_ != 0); + VERIFY_DEV(edge_to_entry_.empty()); + edge_it_helper helper(g_); + for (auto it = helper.begin(); it != helper.end(); ++it) { + FrameEdgeEntry entry(*it, g_.length(*it), frame_size_); + this->InsertEntry(*it, entry); + } + } + + private: + using BarcodeIndex>::g_; + using BarcodeIndex>::edge_to_entry_; + size_t frame_size_; +}; +} //barcode_index diff --git a/src/common/barcode_index/barcode_index_builder.hpp b/src/common/barcode_index/barcode_index_builder.hpp new file mode 100644 index 0000000000..3194da6f5d --- /dev/null +++ b/src/common/barcode_index/barcode_index_builder.hpp @@ -0,0 +1,273 @@ +//*************************************************************************** +//* Copyright (c) 2017-2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "barcode_index.hpp" + +#include "io/dataset_support/dataset_readers.hpp" +#include "io/reads/read_processor.hpp" +#include "alignment/sequence_mapper_notifier.hpp" +#include "alignment/sequence_mapper.hpp" + +#include +#include +#include +#include + +namespace barcode_index { + +//todo templatize +class ConcurrentBufferFiller { + public: + using EdgeId = debruijn_graph::EdgeId; + using Graph = debruijn_graph::Graph; + using FrameConcurrentBuffer = FrameConcurrentBarcodeIndexBuffer; + using SequenceMapper = debruijn_graph::SequenceMapper; + using MappingPath = omnigraph::MappingPath; + using MappingRange = omnigraph::MappingRange; + + class BarcodeEncoder { + public: + BarcodeEncoder(size_t start): + codes_(), + start_(start) + { } + + BarcodeId add(const std::string &barcode) { + std::unique_lock lock(mutex_); + size_t encoder_size = codes_.size(); + codes_[barcode] = encoder_size + start_; + return encoder_size; + } + + auto begin() const { + return codes_.begin(); + } + auto end() { + std::shared_lock lock(mutex_); + return codes_.end(); + } + auto find(const string& barcode) { + std::shared_lock lock(mutex_); + return codes_.find(barcode); + } + size_t size() const { + return codes_.size(); + } + private: + std::unordered_map codes_; + std::shared_timed_mutex mutex_; + const size_t start_; + }; + + ConcurrentBufferFiller(const Graph &g, + FrameConcurrentBuffer &buf, + const SequenceMapper &mapper, + const std::vector &barcode_prefices, + size_t starting_barcode, bool is_tellseq) : g_(g), + buf_(buf), + mapper_(mapper), + barcode_prefices_(barcode_prefices), + encoder_(starting_barcode), + is_tellseq_(is_tellseq) { Init(); } + + bool operator()(std::unique_ptr r) { + const Sequence& read1 = r->first().sequence(); + const Sequence& read2 = r->second().sequence(); + MappingPath path1 = mapper_.MapSequence(read1); + MappingPath path2 = mapper_.MapSequence(read2); + auto barcode = GetBarcode(std::move(r)); + if (barcode.empty()) { + TRACE("Empty barcode") + return false; + } + if (path1.size() > 0 and path2.size() > 0) { + TRACE("non-empty pair"); + } + + ProcessPairedRead(barcode, path1, path2); + return false; + } + + size_t GetNumberOfBarcodes() const { + return encoder_.size(); + } + + FrameConcurrentBuffer& GetBuffer() { + return buf_; + } + + private: + void ProcessPairedRead(const std::string &barcode_string, + const MappingPath& path1, + const MappingPath& path2) { + + auto code_result = encoder_.find(barcode_string); + BarcodeId barcode; + if (code_result == encoder_.end()) { + barcode = encoder_.add(barcode_string); + } else { + barcode = code_result->second; + } + InsertMappingPath(barcode, path1); + InsertMappingPath(barcode, path2); + } + void Init() { + buf_.InitialFillMap(); + } + + std::string GetBarcode(std::unique_ptr r) { + if (not is_tellseq_) { + TRACE("Getting barcode") + auto left_barcode_string = GetTenXBarcodeFromRead(r->first().comment(), barcode_prefices_); + auto right_barcode_string = GetTenXBarcodeFromRead(r->second().comment(), barcode_prefices_); + + if (left_barcode_string.empty() or left_barcode_string != right_barcode_string) { + TRACE(left_barcode_string); + TRACE(right_barcode_string); + std::string empty; + return empty; + } + return left_barcode_string; + } else { + io::PairedRead* paired = r.get(); + return static_cast(paired)->aux().sequence().str(); + } + } + + string GetTenXBarcodeFromRead(const std::string &read_name, const std::vector& barcode_prefixes) { + for (const auto& prefix: barcode_prefixes) { + size_t prefix_len = prefix.size(); + size_t start_pos = read_name.find(prefix); + if (start_pos != string::npos) { + string barcode = GetBarcodeFromStartPos(start_pos + prefix_len, read_name); + TRACE(barcode); + return barcode; + } + } + return ""; + } + string GetBarcodeFromStartPos(const size_t start_pos, const string& read_id) { + string result = ""; + for (auto it = read_id.begin() + start_pos; it != read_id.end(); ++it) { + if (not is_nucl(*it)) { + return result; + } + result.push_back(*it); + } + return result; + } + + void InsertMappingPath(BarcodeId &barcode, const MappingPath &path) { + for (size_t i = 0; i < path.size(); i++) { + //todo restore tail threshold if needed + EdgeId edge = path[i].first; + const auto &range = path[i].second.mapped_range; + buf_.InsertBarcode(barcode, edge, 1, range); + buf_.InsertBarcode(barcode, g_.conjugate(edge), 1, range.Invert(g_.length(edge))); + } + } + + const Graph& g_; + FrameConcurrentBuffer &buf_; + const SequenceMapper &mapper_; + const std::vector barcode_prefices_; + BarcodeEncoder encoder_; + bool is_tellseq_; + + DECL_LOGGER("ConcurrentBufferFiller"); +}; + +class FrameBarcodeIndexBuilder { + public: + using EdgeId = debruijn_graph::EdgeId; + using Graph = debruijn_graph::Graph; + using SequenceMapper = debruijn_graph::SequenceMapper; + + FrameBarcodeIndexBuilder(const Graph &g, + const SequenceMapper &mapper, + const std::vector &barcode_prefices, + size_t frame_size, + size_t num_threads) : + g_(g), + mapper_(mapper), + barcode_prefices_(barcode_prefices), + frame_size_(frame_size), + num_threads_(num_threads) {} + + template + void ConstructBarcodeIndex(io::ReadStreamList read_streams, + FrameBarcodeIndex &barcode_index, + const io::SequencingLibraryBase &lib, + bool is_tellseq); + + void DownsampleBarcodeIndex(FrameBarcodeIndex &downsampled_index, FrameBarcodeIndex &original_index, double sampling_factor) { + std::unordered_set barcodes; + for (auto it = original_index.begin(); it != original_index.end(); ++it) { + const auto &barcode_distribution = it->second.GetDistribution(); + for (const auto &entry: barcode_distribution) { + BarcodeId current_barcode = entry.first; + barcodes.insert(current_barcode); + } + } + INFO("Number of encountered barcodes: " << barcodes.size()); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution<> distr(.0, 1.0); + std::unordered_set passed_barcodes; + for (const auto &barcode: barcodes) { + if (math::le(distr(gen), sampling_factor)) { + passed_barcodes.insert(barcode); + } + } + INFO("Passed barcodes: " << passed_barcodes.size()); + + downsampled_index.InitialFillMap(); + auto barcode_filter = [&passed_barcodes](const auto &barcode_entry) { + return passed_barcodes.find(barcode_entry.first) != passed_barcodes.end(); + }; + for (auto it = original_index.begin(); it != original_index.end(); ++it) { + auto &to = downsampled_index.edge_to_entry_[it->first].barcode_distribution_; + auto &from = it->second.barcode_distribution_; + std::copy_if(std::make_move_iterator(from.begin()), std::make_move_iterator(from.end()), + std::inserter(to, to.end()), barcode_filter); + } + } + + private: + const Graph& g_; + const SequenceMapper &mapper_; + const std::vector barcode_prefices_; + size_t frame_size_; + size_t num_threads_; +}; +template +void FrameBarcodeIndexBuilder::ConstructBarcodeIndex(io::ReadStreamList read_streams, + FrameBarcodeIndex &barcode_index, + const io::SequencingLibraryBase &lib, + bool is_tellseq) { + { + size_t starting_barcode = 0; + size_t counter = 0; + barcode_index.SetFrameSize(frame_size_); + barcode_index.InitialFillMap(); + for (auto &stream: read_streams) { + DEBUG("Processing stream " << counter << " , currently " << starting_barcode << " barcodes"); + FrameConcurrentBarcodeIndexBuffer buffer(g_, frame_size_); + ConcurrentBufferFiller buffer_filler(g_, buffer, mapper_, barcode_prefices_, starting_barcode, is_tellseq); + hammer::ReadProcessor read_processor(static_cast(num_threads_)); + read_processor.Run(stream, buffer_filler); + starting_barcode += buffer_filler.GetNumberOfBarcodes(); + DEBUG("Update"); + barcode_index.Update(buffer); + DEBUG("Finished update"); + } + INFO(starting_barcode << " total barcodes in the barcode index"); + } + +} +} //namespace barcode index diff --git a/src/common/barcode_index/barcode_info_extractor.hpp b/src/common/barcode_index/barcode_info_extractor.hpp new file mode 100644 index 0000000000..41f0d4254f --- /dev/null +++ b/src/common/barcode_index/barcode_info_extractor.hpp @@ -0,0 +1,509 @@ +//*************************************************************************** +//* Copyright (c) 2017-2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "barcode_index.hpp" + +namespace barcode_index { + + /** + * BarcodeIndexInfoExtractor extracts information from BarcodeIndex + */ + template + class BarcodeIndexInfoExtractor { + public: + typedef typename BarcodeEntryT::barcode_distribution_t distribution_t; + typedef typename BarcodeEntryT::barcode_info_t barcode_info_t; + typedef typename distribution_t::key_type barcode_info_key_t; + typedef typename distribution_t::mapped_type barcode_info_value_t; + typedef typename distribution_t::value_type barcode_info_pair_t; + typedef typename barcode_index::BarcodeIndex BarcodeIndexT; + typedef typename Graph::EdgeId EdgeId; + typedef typename omnigraph::IterationHelper edge_it_helper; + protected: + const BarcodeIndexT &index_; + const Graph &g_; + public: + BarcodeIndexInfoExtractor(const BarcodeIndexT &index, const Graph &g) : + index_(index), g_(g) {} + + /** + * + * @param edge + * @return Number of barcodes contained by the edge + */ + size_t GetNumberOfBarcodes(const EdgeId &edge) const { + return index_.GetEntry(edge).Size(); + } + + /** + * + * @param edge1 + * @param edge2 + * @return List of barcodes shared by edge1 and edge2 + */ + std::vector GetSharedBarcodes (const EdgeId &edge1, const EdgeId &edge2) const { + std::vector intersection; + for (auto it = intersection_iterator_begin(edge1, edge2); it != intersection_iterator_end(edge1, edge2); ++it) { + intersection.push_back((*it).key_); + } + return intersection; + } + + /** + * + * @param edge1 + * @param edge2 + * @return Number of barcodes shared by edge1 and edge2 + */ + size_t GetNumberOfSharedBarcodes (const EdgeId &edge1, const EdgeId &edge2) const { + size_t result = 0; + for (auto it = intersection_iterator_begin(edge1, edge2); it != intersection_iterator_end(edge1, edge2); ++it) { + ++result; + } + return result; + } + + /** + * @param edge + * @param barcode + * @return True if the edge contains the barcode + */ + bool HasBarcode(const EdgeId &edge, const BarcodeId& barcode) const { + return index_.GetEntry(edge).has_barcode(barcode); + } + + /** + * + * @return Average number of barcodes contained on the long edges of the graph + */ + double AverageBarcodeCoverage(size_t length_threshold) const { + edge_it_helper helper(g_); + size_t barcodes_overall = 0; + size_t long_edges = 0; + for (auto it = helper.begin(); it != helper.end(); ++it) { + if (g_.length(*it) > length_threshold) { + long_edges++; + barcodes_overall += GetNumberOfBarcodes(*it); + } + } + DEBUG("tails: " + std::to_string(barcodes_overall)); + DEBUG("Long edges" + long_edges); + return static_cast (barcodes_overall) / static_cast (long_edges); + } + + double GetIntersectionSizeNormalizedByUnion(const EdgeId &edge1, const EdgeId &edge2) const { + if (GetUnionSize(edge1, edge2)) { + return static_cast (GetNumberOfSharedBarcodes(edge1, edge2)) / + static_cast (GetUnionSize(edge1, edge2)); + } + return 0; + } + + virtual double GetIntersectionSizeNormalizedBySecond(const EdgeId &edge1, const EdgeId &edge2) const { + if (GetNumberOfBarcodes(edge2) > 0) { + return static_cast (GetNumberOfSharedBarcodes(edge1, edge2)) / + static_cast (GetNumberOfBarcodes(edge2)); + } + return 0; + } + + size_t GetUnionSize(const EdgeId &edge1, const EdgeId &edge2) const { + auto it_tail = index_.GetEntryTailsIterator(edge1); + auto it_head = index_.GetEntryHeadsIterator(edge2); + return (it_tail->second).GetUnionSize(it_head->second); + } + + std::vector GetBarcodes(const EdgeId& edge) const { + std::vector result; + auto copy_barcode_id = [&result](const barcode_info_pair_t& entry) + {result.push_back(entry.first); }; + std::for_each(barcode_iterator_begin(edge), barcode_iterator_end(edge), copy_barcode_id); + return result; + } + + typename distribution_t::const_iterator barcode_iterator_begin(const EdgeId &edge) const { + auto entry_it = index_.GetEntryHeadsIterator(edge); + return entry_it->second.begin(); + } + + typename distribution_t::const_iterator barcode_iterator_end(const EdgeId &edge) const { + auto entry_it = index_.GetEntryHeadsIterator(edge); + return entry_it->second.end(); + } + + /** + * Proxy class representing a pair of references to BarcodeInfo + */ + struct IntersectionData { + /** + * BarcodeId of the shared barcode + */ + const barcode_info_key_t key_; + /** + * Info corresponding to the shared barcode and the first edge + */ + const barcode_info_value_t& info_first_; + + /** + * Info corresponding to the shared barcode and the second edge + */ + const barcode_info_value_t& info_second_; + + IntersectionData(const barcode_info_key_t key, + const barcode_info_value_t& info_first, + const barcode_info_value_t& info_second) : + key_(key), info_first_(info_first), info_second_(info_second) {} + }; + + /** + * Iterator over shared barcodes of two edges. + * Dereferencing returns proxy object of type IntersectionData + * @note Since it is not an iterator over a container there is no -> operator. + */ + class const_intersection_iterator { + public: + typedef IntersectionData value_type; + typedef IntersectionData reference; + typedef IntersectionData* pointer; + typedef int difference_type; + typedef std::input_iterator_tag iterator_category; + typedef typename distribution_t::const_iterator entry_iterator; + + public: + const_intersection_iterator(entry_iterator first, entry_iterator second, + entry_iterator first_end, entry_iterator second_end) : + first_(first), second_(second), first_end_(first_end), second_end_(second_end) {} + + //todo optimize with lower bounds? + const_intersection_iterator operator++() { + if (first_ == first_end_ and second_ == second_end_) { + ++first_; + ++second_; + return *this; + } + if (get_first_key() == get_second_key()) { + if (second_ != second_end_) { + ++second_; + } else { + ++first_; + } + } + while (get_first_key() != get_second_key() and (first_ != first_end_ or second_ != second_end_)) { + while (get_first_key() < get_second_key() and first_ != first_end_) { + ++first_; + } + while (get_second_key() < get_first_key() and second_ != second_end_) { + ++second_; + } + if ((first_ == first_end_ and get_second_key() > get_first_key()) or + (second_ == second_end_ and get_first_key() > get_second_key())) { + first_ = first_end_; + second_ = second_end_; + } + } + if (get_first_key() == get_second_key()) { + return *this; + } + VERIFY(first_ == first_end_ and second_ == second_end_); + if (get_first_key() != get_second_key()) { + ++first_; + ++second_; + } + return *this; + } + + const_intersection_iterator operator++(int) { + const_intersection_iterator result = *this; + ++(*this); + return result; + } + + reference operator* () { + VERIFY(get_first_key() == get_second_key()); + return IntersectionData(get_first_key(), first_->second, second_->second); + } + + bool operator== (const const_intersection_iterator& other) { + return first_ == other.first_ and second_ == other.second_ and + first_end_ == other.first_end_ and second_end_ == other.second_end_; + } + + bool operator!= (const const_intersection_iterator& other) { + return not(*this == other); + } + + private: + entry_iterator first_; + entry_iterator second_; + entry_iterator first_end_; + entry_iterator second_end_; + + inline barcode_info_key_t get_first_key() { + return first_->first; + } + inline barcode_info_key_t get_second_key() { + return second_->first; + } + }; + + //todo remove end decrement? + const_intersection_iterator intersection_iterator_begin(const EdgeId& first, const EdgeId& second) const { + if (GetNumberOfBarcodes(first) == 0 or GetNumberOfBarcodes(second) == 0) { + return intersection_iterator_end(first, second); + } + auto first_begin = barcode_iterator_begin(first); + auto first_end = barcode_iterator_end(first); + auto second_begin = barcode_iterator_begin(second); + auto second_end = barcode_iterator_end(second); + const_intersection_iterator prebegin(first_begin, second_begin, --first_end, --second_end); + if (barcode_iterator_begin(first)->first == barcode_iterator_begin(second)->first) + return prebegin; + return ++prebegin; + } + + const_intersection_iterator intersection_iterator_end(const EdgeId& first, const EdgeId& second) const { + auto first_end = barcode_iterator_end(first); + auto second_end = barcode_iterator_end(second); + if (GetNumberOfBarcodes(first) == 0 or GetNumberOfBarcodes(second) == 0) { + return const_intersection_iterator(first_end, second_end, first_end, second_end); + } + auto first_end_copy = first_end; + auto second_end_copy = second_end; + const_intersection_iterator result(first_end_copy, second_end_copy, --first_end, --second_end); + return result; + } + + protected: + + /** + * + * @param edge + * @param barcode + * @return barcode info corresponding to given edge + */ + const barcode_info_t& GetInfo(const EdgeId& edge, const BarcodeId& barcode) const { + VERIFY(HasBarcode(edge, barcode)); + const BarcodeEntryT& entry = GetEntry(edge); + return entry.get_barcode(barcode)->second; + } + + const BarcodeEntryT& GetEntry(const EdgeId& edge) const { + return index_.GetEntry(edge); + } + + }; + +/** + * Specialization of BarcodeIndexInfoExtractor for FrameBarcodeInfo type. + * @see FrameBarcodeInfo + */ +template +class FrameBarcodeIndexInfoExtractorTemplate : public BarcodeIndexInfoExtractor> { +public: + typedef typename Graph::EdgeId EdgeId; + typedef typename barcode_index::FrameBarcodeIndex FrameBarcodeIndexT; + + FrameBarcodeIndexInfoExtractorTemplate(const FrameBarcodeIndexT &index, const Graph &g) : + BarcodeIndexInfoExtractor>(index, g) {} + + /** + * + * @param edge + * @param barcode + * @return number of barcoded reads aligned to the edge + */ + size_t GetNumberOfReads(const EdgeId &edge, const BarcodeId &barcode) const { + return this->GetInfo(edge, barcode).GetCount(); + } + + /** + * + * @param edge + * @param barcode + * @return leftmost barcoded bin of the edge + */ + size_t GetLeftBin(const EdgeId &edge, const BarcodeId &barcode) const { + return this->GetInfo(edge, barcode).GetLeftMost(); + } + + /** + * + * @param edge + * @param barcode + * @return rightmost barcoded bin of the edge + */ + size_t GetRightBin(const EdgeId &edge, const BarcodeId &barcode) const { + return this->GetInfo(edge, barcode).GetRightMost(); + } + + /** + * @param edge + * @return length of the bin + */ + size_t GetBinLength(const EdgeId &edge) const { + return this->GetEntry(edge).GetFrameSize(); + } + + /** + * + * @param edge + * @return number of bins on the edge + */ + size_t GetNumberOfBins(const EdgeId& edge) const { + return this->GetEntry(edge).GetNumberOfFrames(); + } + + /** + * @param first first edge + * @param second second edge + * @param shared_threshold minimal number of barcodes shared by first and second + * @param count_threshold edge contains barcode iff there are at least count_threshold reads aligned to the edge + * @param gap_threshold clouds located at the beginning of the first or at the end of the second edge are discarded. + * Cloud is located in the beginning of the edge if it is not aligned to the last gap_threshold nucleotides of the edge. + * @return true if there are at least shared_threshold barcodes which pass requirements determined by count_threshold and gap_threshold. + */ + bool AreEnoughSharedBarcodesWithFilter (const EdgeId &first, + const EdgeId &second, + size_t shared_threshold, + size_t count_threshold, + size_t gap_threshold) const { + size_t current = 0; + for (auto it = intersection_iterator_begin(first, second); it != intersection_iterator_end(first, second); ++it) { + BarcodeId barcode = (*it).key_; + bool is_in_the_end_of_first = g_.length(first) <= gap_threshold or + GetMaxPos(first, barcode) + gap_threshold > g_.length(first); + bool is_in_the_beginning_of_second = g_.length(second) <= gap_threshold or + GetMinPos(second, barcode) < gap_threshold; + bool enough_count = (*it).info_first_.GetCount() >= count_threshold and + (*it).info_second_.GetCount() >= count_threshold; + if (is_in_the_end_of_first and is_in_the_beginning_of_second and enough_count) { + ++current; + } + if (current > shared_threshold) { + return true; + } + } + return false; + } + + /** + * @param first first edge + * @param second second edge + * @param count_threshold edge contains barcode iff there are at least count_threshold reads aligned to the edge + * @param gap_threshold clouds located at the beginning of the first or at the end of the second edge are discarded. + * Cloud is located in the beginning of the edge if it is not aligned to the last gap_threshold nucleotides of the edge. + * @return number of barcodes which pass requirements determined by count_threshold and gap_threshold. + */ + size_t CountSharedBarcodesWithFilter (const EdgeId &first, + const EdgeId &second, + size_t count_threshold, + size_t gap_threshold) const { + return GetSharedBarcodesWithFilter(first, second, count_threshold, gap_threshold).size(); + } + + std::vector GetSharedBarcodesWithFilter(const EdgeId& first, const EdgeId& second, + size_t count_threshold, size_t gap_threshold) const { + std::vector intersection; + for (auto it = this->intersection_iterator_begin(first, second); it != this->intersection_iterator_end(first, second); ++it) { + auto barcode = (*it).key_; + bool is_in_the_end_of_first = g_.length(first) <= gap_threshold or + GetMaxPos(first, barcode) + gap_threshold > g_.length(first); + bool is_in_the_beginning_of_second = g_.length(second) <= gap_threshold or + GetMinPos(second, barcode) < gap_threshold; + bool enough_count = (*it).info_first_.GetCount() >= count_threshold and + (*it).info_second_.GetCount() >= count_threshold; + if (is_in_the_end_of_first and is_in_the_beginning_of_second and enough_count) { + intersection.push_back(barcode); + } + } + return intersection; + } + + std::vector GetBarcodesFromHead(const EdgeId& edge, size_t count_threshold, size_t right) const { + std::vector barcodes; + size_t bin_length = GetBinLength(edge); + for (auto it = this->barcode_iterator_begin(edge); it != this->barcode_iterator_end(edge); ++it) { + BarcodeId barcode = it->first; + size_t left_pos = it->second.GetLeftMost() * bin_length; + size_t reads = it->second.GetCount(); + if (left_pos <= right and reads >= count_threshold) { + barcodes.push_back(barcode); + } + } + return barcodes; + } + + std::vector> GetBarcodesAndCountsFromHead(const EdgeId& edge, + size_t count_threshold, + size_t right) const { + std::vector> barcodes; + size_t bin_length = GetBinLength(edge); + for (auto it = this->barcode_iterator_begin(edge); it != this->barcode_iterator_end(edge); ++it) { + BarcodeId barcode = it->first; + size_t left_pos = it->second.GetLeftMost() * bin_length; + size_t reads = it->second.GetCount(); + if (left_pos <= right and reads >= count_threshold) { + barcodes.emplace_back(barcode, reads); + } + } + return barcodes; + }; + + std::vector GetBarcodesFromRange(const EdgeId& edge, size_t count_threshold, + size_t left, size_t right) const { + std::vector barcodes; + size_t bin_length = GetBinLength(edge); + for (auto it = this->barcode_iterator_begin(edge); it != this->barcode_iterator_end(edge); ++it) { + BarcodeId barcode = it->first; + size_t left_pos = it->second.GetLeftMost() * bin_length; + size_t right_pos = it->second.GetRightMost() * bin_length; + TRACE("Bin length: " << bin_length); + TRACE("Left raw: " << left_pos); + TRACE("Leftmost position: " << left_pos); + TRACE("Rightmost position: " << right_pos); + size_t reads = it->second.GetCount(); + TRACE("Reads: " << reads); + if (left_pos <= right and right_pos >= left and reads >= count_threshold) { + barcodes.push_back(barcode); + } + } + return barcodes; + } + + /** + * + * @param edge + * @param barcode + * @return Estimated first position of the cloud defined by the barcode and the edge (not the first bin, but the first nucleotide) + */ + size_t GetMinPos(const EdgeId &edge, const BarcodeId& barcode) const { + const FrameEdgeEntry &entry = this->GetEntry(edge); + const FrameBarcodeInfo& info = this->GetInfo(edge, barcode); + size_t frame_size = entry.GetFrameSize(); + return info.GetLeftMost() * frame_size; + } + + /** + * + * @param edge + * @param barcode + * @return Estimated last position of the cloud defined by the barcode and the edge (not the last bin, but the last nucleotide) + */ + size_t GetMaxPos(const EdgeId &edge, const BarcodeId& barcode) const { + const FrameEdgeEntry &entry = this->GetEntry(edge); + const FrameBarcodeInfo& info = this->GetInfo(edge, barcode); + size_t frame_size = entry.GetFrameSize(); + return info.GetRightMost() * frame_size; + } + + private: + using BarcodeIndexInfoExtractor>::g_; + +}; +typedef FrameBarcodeIndexInfoExtractorTemplate FrameBarcodeIndexInfoExtractor; +} \ No newline at end of file diff --git a/src/common/barcode_index/cluster_storage/barcode_cluster.hpp b/src/common/barcode_index/cluster_storage/barcode_cluster.hpp new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/common/barcode_index/scaffold_vertex_index.hpp b/src/common/barcode_index/scaffold_vertex_index.hpp new file mode 100644 index 0000000000..039e02bfef --- /dev/null +++ b/src/common/barcode_index/scaffold_vertex_index.hpp @@ -0,0 +1,239 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "barcode_info_extractor.hpp" +#include "auxiliary_graphs/scaffold_graph/scaffold_vertex.hpp" +#include "assembly_graph/core/graph.hpp" +#include "adt/iterator_range.hpp" + +namespace barcode_index { + +template +class ScaffoldVertexIndexBuilder; + +template +class ScaffoldVertexIndex { + friend class ScaffoldVertexIndexBuilder; + public: + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + typedef typename VertexEntryT::const_iterator const_iterator; + typedef debruijn_graph::Graph Graph; + + ScaffoldVertexIndex(const Graph &g): g_(g), vertex_to_entry_() {} + + const VertexEntryT& GetHeadEntry(const ScaffoldVertex& vertex) const { + return vertex_to_entry_.at(vertex); + } + const VertexEntryT& GetTailEntry(const ScaffoldVertex& vertex) const { + return vertex_to_entry_.at(vertex.GetConjugateFromGraph(g_)); + } + + const_iterator GetHeadBegin(const ScaffoldVertex& vertex) const { + return vertex_to_entry_.at(vertex).begin(); + } + const_iterator GetHeadEnd(const ScaffoldVertex& vertex) const { + return vertex_to_entry_.at(vertex).end(); + } + adt::iterator_range GetHeadRange(const ScaffoldVertex& vertex) const { + return adt::make_range(GetHeadBegin(vertex), GetHeadEnd(vertex)); + } + const_iterator GetTailBegin(const ScaffoldVertex& vertex) const { + return vertex_to_entry_.at(vertex.GetConjugateFromGraph(g_)).begin(); + } + const_iterator GetTailEnd(const ScaffoldVertex& vertex) const { + return vertex_to_entry_.at(vertex.GetConjugateFromGraph(g_)).end(); + } + adt::iterator_range GetTailRange(const ScaffoldVertex& vertex) const { + return adt::make_range(GetTailBegin(vertex.GetConjugateFromGraph(g_)), + GetTailEnd(vertex.GetConjugateFromGraph(g_))); + } + + bool Contains(const ScaffoldVertex &vertex) const { + return vertex_to_entry_.find(vertex) != vertex_to_entry_.end(); + } + private: + void InsertEntry(const ScaffoldVertex& vertex, VertexEntryT&& entry) { + vertex_to_entry_.insert({vertex, entry}); + } + + const Graph& g_; + std::unordered_map vertex_to_entry_; +}; + +typedef std::set SimpleVertexEntry; + +typedef ScaffoldVertexIndex SimpleScaffoldVertexIndex; + +class ScaffoldVertexIndexInfoExtractor { + public: + typedef typename scaffold_graph::ScaffoldVertex ScaffoldVertex; + public: + virtual size_t GetHeadSize(const ScaffoldVertex &vertex) const = 0; + virtual size_t GetTailSize(const ScaffoldVertex &vertex) const = 0; + + virtual size_t GetIntersectionSize(const ScaffoldVertex &first, const ScaffoldVertex &second) const = 0; + + /** + * @note second is supposed to be between first and third + */ + virtual size_t GetIntersectionSize(const ScaffoldVertex &first, const ScaffoldVertex &second, + const ScaffoldVertex &third) const = 0; +}; + +template +class IntersectingScaffoldVertexIndexInfoExtractor: public ScaffoldVertexIndexInfoExtractor { + public: + using ScaffoldVertexIndexInfoExtractor::ScaffoldVertex; + + public: + virtual SimpleVertexEntry GetIntersection(const VertexEntryT &first, const VertexEntryT &second) const = 0; + virtual SimpleVertexEntry GetIntersection(const ScaffoldVertex &first, const ScaffoldVertex &second) const = 0; + /** + * @note second is supposed to be between first and third + */ + virtual size_t GetIntersectionSize(const ScaffoldVertex &middle, const VertexEntryT &entry) const = 0; + size_t GetIntersectionSize(const VertexEntryT &first, const VertexEntryT &second) { + return GetIntersection(first, second).size(); + } + + virtual SimpleVertexEntry GetHeadEntry(const ScaffoldVertex &vertex) = 0; + virtual SimpleVertexEntry GetTailEntry(const ScaffoldVertex &vertex) = 0; +}; + +class BarcodeIndexInfoExtractorWrapper: public IntersectingScaffoldVertexIndexInfoExtractor { + public: + using Graph = debruijn_graph::Graph; + + BarcodeIndexInfoExtractorWrapper(const Graph &g, std::shared_ptr barcode_index_) + : g_(g), barcode_extractor_(barcode_index_) {} + + size_t GetHeadSize(const ScaffoldVertex &vertex) const override { + return barcode_extractor_->GetNumberOfBarcodes(vertex.GetFirstEdge()); + } + size_t GetTailSize(const ScaffoldVertex &vertex) const override { + return barcode_extractor_->GetNumberOfBarcodes(vertex.GetConjugateFromGraph(g_).GetFirstEdge()); + } + size_t GetIntersectionSize(const ScaffoldVertex &first, const ScaffoldVertex &second) const override { + return barcode_extractor_->GetNumberOfSharedBarcodes(first.GetLastEdge(), second.GetFirstEdge()); + } + size_t GetIntersectionSize(const ScaffoldVertex &first, + const ScaffoldVertex &second, + const ScaffoldVertex &third) const override { + return GetIntersectionSize(third, GetIntersection(first, second)); + } + + SimpleVertexEntry GetIntersection(const SimpleVertexEntry &first, + const SimpleVertexEntry &second) const override { + SimpleVertexEntry result; + std::set_intersection(first.begin(), first.end(), second.begin(), second.end(), + std::inserter(result, result.end())); + return result; + } + SimpleVertexEntry GetIntersection(const ScaffoldVertex &first, const ScaffoldVertex &second) const override { + auto intersection = barcode_extractor_->GetSharedBarcodes(first.GetLastEdge(), second.GetFirstEdge()); + std::set result; + std::copy(intersection.begin(), intersection.end(), std::inserter(result, result.begin())); + return result; + } + size_t GetIntersectionSize(const ScaffoldVertex &middle, const SimpleVertexEntry &entry) const override { + auto barcodes = barcode_extractor_->GetBarcodes(middle.GetFirstEdge()); + SimpleVertexEntry intersection; + std::set_intersection(barcodes.begin(), barcodes.end(), entry.begin(), entry.end(), + std::inserter(intersection, intersection.begin())); + return intersection.size(); + } + SimpleVertexEntry GetHeadEntry(const ScaffoldVertex &/*vertex*/) override { + VERIFY_MSG(false, "Head entry extractor from BarcodeIndexInfoExtractorWrapper is currently not supported"); + SimpleVertexEntry result; + return result; + } + SimpleVertexEntry GetTailEntry(const ScaffoldVertex &vertex) override { + return GetHeadEntry(vertex); + } + + private: + const Graph &g_; + std::shared_ptr barcode_extractor_; +}; + +class SimpleScaffoldVertexIndexInfoExtractor: public IntersectingScaffoldVertexIndexInfoExtractor { + public: + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + + explicit SimpleScaffoldVertexIndexInfoExtractor(std::shared_ptr> index_) + : index_(index_) {} + + SimpleVertexEntry GetIntersection(const ScaffoldVertex &first, const ScaffoldVertex &second) const override { + SimpleVertexEntry result; + auto first_begin = index_->GetTailBegin(first); + auto first_end = index_->GetTailEnd(first); + auto second_begin = index_->GetHeadBegin(second); + auto second_end = index_->GetHeadEnd(second); + std::set_intersection(first_begin, first_end, second_begin, second_end, std::inserter(result, result.end())); + return result; + } + size_t GetIntersectionSize(const ScaffoldVertex &first, const ScaffoldVertex &second) const override { + auto first_begin = index_->GetTailBegin(first); + auto first_end = index_->GetTailEnd(first); + auto second_begin = index_->GetHeadBegin(second); + auto second_end = index_->GetHeadEnd(second); + auto first_it = first_begin; + auto second_it = second_begin; + size_t current_intersection = 0; + while (first_it != first_end and second_it != second_end) { + if (*first_it == *second_it) { + ++current_intersection; + ++first_it; + ++second_it; + } else if (*first_it < *second_it) { + ++first_it; + } else { + ++second_it; + } + } + return current_intersection; + } + size_t GetIntersectionSize(const ScaffoldVertex &middle, const SimpleVertexEntry &entry) const override { + auto middle_begin = index_->GetHeadBegin(middle); + auto middle_end = index_->GetHeadEnd(middle); + SimpleVertexEntry intersection; + std::set_intersection(entry.begin(), entry.end(), middle_begin, middle_end, std::inserter(intersection, intersection.end())); + return intersection.size(); + } + size_t GetIntersectionSize(const ScaffoldVertex &first, + const ScaffoldVertex &second, + const ScaffoldVertex &third) const override { + const auto& entry = GetIntersection(first, third); + return GetIntersectionSize(second, entry); + } + + size_t GetHeadSize(const ScaffoldVertex &vertex) const override { + return (index_->GetHeadEntry(vertex)).size(); + } + size_t GetTailSize(const ScaffoldVertex &vertex) const override { + return (index_->GetTailEntry(vertex)).size(); + } + SimpleVertexEntry GetIntersection(const SimpleVertexEntry &first, + const SimpleVertexEntry &second) const override { + SimpleVertexEntry result; + std::set_intersection(first.begin(), first.end(), second.begin(), second.end(), std::inserter(result, result.end())); + return result; + } + SimpleVertexEntry GetHeadEntry(const ScaffoldVertex &vertex) override { + return index_->GetHeadEntry(vertex); + } + SimpleVertexEntry GetTailEntry(const ScaffoldVertex &vertex) override { + return index_->GetTailEntry(vertex); + } + + private: + std::shared_ptr> index_; +}; + +typedef IntersectingScaffoldVertexIndexInfoExtractor SimpleIntersectingScaffoldVertexExtractor; +} \ No newline at end of file diff --git a/src/common/barcode_index/scaffold_vertex_index_builder.hpp b/src/common/barcode_index/scaffold_vertex_index_builder.hpp new file mode 100644 index 0000000000..498bd4780e --- /dev/null +++ b/src/common/barcode_index/scaffold_vertex_index_builder.hpp @@ -0,0 +1,238 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "scaffold_vertex_index.hpp" +#include "barcode_info_extractor.hpp" + +namespace barcode_index { + + template + class AbstractScaffoldVertexEntryExtractor { + public: + virtual EdgeEntryT ExtractEntry(const scaffold_graph::ScaffoldVertex &vertex) const = 0; + }; + + class TailThresholdGetter { + public: + virtual size_t GetTailThreshold(const scaffold_graph::ScaffoldVertex &vertex) const = 0; + }; + + class ConstTailThresholdGetter: public TailThresholdGetter { + public: + explicit ConstTailThresholdGetter(const size_t tail_threshold_) : tail_threshold_(tail_threshold_) {} + size_t GetTailThreshold(const scaffold_graph::ScaffoldVertex &/*vertex*/) const override { + return tail_threshold_; + } + private: + const size_t tail_threshold_; + }; + + class FractionTailThresholdGetter: public TailThresholdGetter { + public: + typedef debruijn_graph::Graph Graph; + + FractionTailThresholdGetter(const Graph &g_, const double edge_length_fraction_) + : g_(g_), edge_length_fraction_(edge_length_fraction_) {} + + size_t GetTailThreshold(const scaffold_graph::ScaffoldVertex &vertex) const override { + return static_cast(static_cast(vertex.GetLengthFromGraph(g_)) * edge_length_fraction_); + } + private: + const Graph& g_; + const double edge_length_fraction_; + }; + + class ScaffoldVertexSimpleEntryExtractor: public AbstractScaffoldVertexEntryExtractor { + public: + typedef debruijn_graph::Graph Graph; + typedef debruijn_graph::EdgeId EdgeId; + typedef typename scaffold_graph::EdgeIdVertex EdgeIdVertex; + typedef typename scaffold_graph::PathVertex PathVertex; + + ScaffoldVertexSimpleEntryExtractor(const Graph &g_, + const FrameBarcodeIndexInfoExtractor &barcode_extractor_, + std::shared_ptr tail_threshold_getter, + const size_t count_threshold_, + const size_t length_threshold_) + : g_(g_), + barcode_extractor_(barcode_extractor_), + tail_threshold_getter_(tail_threshold_getter), + count_threshold_(count_threshold_), + length_threshold_(length_threshold_) {} + + SimpleVertexEntry ExtractEntry(const scaffold_graph::ScaffoldVertex &vertex) const override { + auto inner_vertex = vertex.GetInnerVertex(); + + SimpleVertexEntry empty; + auto type = vertex.GetType(); + switch (type) { + case scaffold_graph::ScaffoldVertexT::Edge: { + auto edge_vertex = std::static_pointer_cast(inner_vertex); + return ExtractEntryInner(edge_vertex); + } + case scaffold_graph::ScaffoldVertexT::Path: { + auto path_vertex = std::static_pointer_cast(inner_vertex); + return ExtractEntryInner(path_vertex); + } + } + WARN("ScaffoldVertex of unknown type"); + return empty; + } + + private: + SimpleVertexEntry ExtractEntryInner(std::shared_ptr simple_edge_vertex) const { + SimpleVertexEntry result; + TRACE("Extracting entry from edge"); + auto edge = simple_edge_vertex->get(); + size_t tail_threshold = tail_threshold_getter_->GetTailThreshold(edge); + TRACE("Tail threshold: " << tail_threshold); + auto entry = barcode_extractor_.GetBarcodesFromHead(edge, count_threshold_, tail_threshold); + std::copy(entry.begin(), entry.end(), std::inserter(result, result.end())); + TRACE("Entry size: " << entry.size()); + return result; + } + + //fixme optimize later + SimpleVertexEntry ExtractEntryInner(std::shared_ptr path_vertex) const { + TRACE("Extracting entry from path"); + size_t current_prefix = 0; + path_extend::BidirectionalPath* path = path_vertex->get(); + size_t path_size = path->Size(); + SimpleVertexEntry result; + const size_t global_count_threshold = 5; + std::unordered_map barcode_to_count; + size_t tail_threshold = tail_threshold_getter_->GetTailThreshold(path_vertex->get()); + TRACE("Tail threshold: " << tail_threshold); + for (size_t i = 0; i < path_size and current_prefix <= tail_threshold; ++i) { + EdgeId current_edge = path->At(i); + if (g_.length(current_edge) < length_threshold_) { + current_prefix += g_.length(current_edge); + continue; + } + size_t current_tail = tail_threshold - current_prefix; + TRACE("Current tail: " << current_tail); + const auto ¤t_entry = barcode_extractor_.GetBarcodesAndCountsFromHead(current_edge, + count_threshold_, + current_tail); + for (const auto& barcode_and_reads: current_entry) { + barcode_to_count[barcode_and_reads.first] += barcode_and_reads.second; + } + TRACE("Current entry size: " << barcode_to_count.size()); + current_prefix += g_.length(current_edge); + } + for (const auto& barcode_and_count: barcode_to_count) { + if (barcode_and_count.second >= global_count_threshold) { + result.insert(barcode_and_count.first); + } + } + TRACE("Result size: " << result.size()); + return result; + } + + const debruijn_graph::Graph &g_; + const FrameBarcodeIndexInfoExtractor& barcode_extractor_; + std::shared_ptr tail_threshold_getter_; + const size_t count_threshold_; + const size_t length_threshold_; + + DECL_LOGGER("ScaffoldVertexSimpleEntryExtractor"); + }; + + template + class ScaffoldVertexIndexBuilder { + public: + typedef debruijn_graph::Graph Graph; + typedef debruijn_graph::EdgeId EdgeId; + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + typedef std::shared_ptr> EntryExtractorPtr; + + ScaffoldVertexIndexBuilder(const Graph &g, EntryExtractorPtr vertex_entry_extractor, size_t max_threads) + : g_(g), vertex_entry_extractor_(vertex_entry_extractor), + index_(std::make_shared>(g_)), max_threads_(max_threads) {} + + template + std::shared_ptr> GetConstructedIndex(const ContainerT& vertex_container) { + + //todo make parallel using iterator chunks + DEBUG("Constructing long edge index in " << max_threads_ << " threads"); +// size_t counter = 0; +// size_t block_size = vertex_container.size() / 10; + for (const auto& vertex: vertex_container) + { + auto entry = vertex_entry_extractor_->ExtractEntry(vertex); + TRACE("Entry size: " << entry.size()); + { + index_->InsertEntry(vertex, std::move(entry)); +// ++counter; + } +// if (counter % block_size == 0) { +// INFO("Processed " << counter << " edges out of " << vertex_container.size()); +// } + } + DEBUG("Constructed long edge index"); + return index_; + } + + private: + const Graph& g_; + EntryExtractorPtr vertex_entry_extractor_; + std::shared_ptr> index_; + size_t max_threads_; + }; + + class SimpleScaffoldVertexIndexBuilderHelper { + public: + typedef debruijn_graph::Graph Graph; + typedef std::shared_ptr ScaffoldIndexPtr; + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + + template + ScaffoldIndexPtr ConstructScaffoldVertexIndex(const Graph& g_, const FrameBarcodeIndexInfoExtractor& extractor, + std::shared_ptr tail_threshold_getter, + size_t count_threshold, size_t length_threshold, + size_t max_threads, const ContainerT& vertex_container) { + DEBUG("Building simple long edge barcode index with parameters"); + DEBUG("Count threshold: " << count_threshold); + DEBUG("Length threshold: " << length_threshold); + auto entry_extractor = std::make_shared(g_, extractor, + tail_threshold_getter, + count_threshold, + length_threshold); + ScaffoldVertexIndexBuilder builder(g_, entry_extractor, max_threads); + return builder.GetConstructedIndex(vertex_container); + } + + template + ScaffoldIndexPtr HalfEdgeScaffoldVertexIndex(const Graph& g, const FrameBarcodeIndexInfoExtractor& extractor, + const ContainerT& vertex_container, size_t count_threshold, + size_t max_threads) { + const size_t length_threshold = 1000; + const size_t linkage_distance = 10; + const double EDGE_LENGTH_FRACTION = 0.5; + auto threshold_getter = std::make_shared(g, EDGE_LENGTH_FRACTION); + auto split_scaffold_vertex_index = ConstructScaffoldVertexIndex(g, extractor, + threshold_getter, + count_threshold, length_threshold, + max_threads, vertex_container); + return split_scaffold_vertex_index; + } + + template + ScaffoldIndexPtr TailEdgeScaffoldVertexIndex(const Graph& g, const FrameBarcodeIndexInfoExtractor& extractor, + const ContainerT& vertex_container, size_t count_threshold, + size_t tail_threshold, size_t max_threads) { + const size_t length_threshold = 1000; + auto tail_threshold_getter = std::make_shared(tail_threshold); + auto scaffold_vertex_index = ConstructScaffoldVertexIndex(g, extractor, tail_threshold_getter, + count_threshold, length_threshold, + max_threads, vertex_container); + return scaffold_vertex_index; + } + }; + +} \ No newline at end of file diff --git a/src/common/io/binary/read_cloud.hpp b/src/common/io/binary/read_cloud.hpp new file mode 100644 index 0000000000..ae2afd86dc --- /dev/null +++ b/src/common/io/binary/read_cloud.hpp @@ -0,0 +1,43 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "io_base.hpp" +#include "barcode_index/barcode_index.hpp" + +namespace io { + +namespace binary { + +template +class BarcodeMapperIO : public IOSingle> { +public: + typedef barcode_index::FrameBarcodeIndex Type; + BarcodeMapperIO() + : IOSingle("barcode index", ".bmap") { + } + + void SaveImpl(BinOStream &str, const Type &mapper) override { + str << mapper.GetFrameSize() << mapper; + } + + void LoadImpl(BinIStream &str, Type &mapper) override { + size_t frame_size; + str >> frame_size; + mapper.SetFrameSize(frame_size); + str >> mapper; + } +}; + +template +struct IOTraits> { + typedef BarcodeMapperIO Type; +}; + +} + +} \ No newline at end of file diff --git a/src/common/kmer_index/kmer_counting.hpp b/src/common/kmer_index/kmer_counting.hpp index 68777161c4..474f21e94e 100644 --- a/src/common/kmer_index/kmer_counting.hpp +++ b/src/common/kmer_index/kmer_counting.hpp @@ -7,11 +7,16 @@ #pragma once +#include "ph_map/storing_traits.hpp" +#include "io/reads/read_processor.hpp" // FIXME: remove use of ReadProcessor + +#include "sequence/rtseq.hpp" +#include "sequence/sequence.hpp" + #include "adt/cyclichash.hpp" #include "adt/hll.hpp" #include "adt/cqf.hpp" -#include "ph_map/storing_traits.hpp" -#include "io/reads/read_processor.hpp" + #include "utils/parallel/openmp_wrapper.h" #include "utils/logger/logger.hpp" diff --git a/src/common/kmer_index/kmer_mph/kmer_index.hpp b/src/common/kmer_index/kmer_mph/kmer_index.hpp index 9679144219..6678382c89 100644 --- a/src/common/kmer_index/kmer_mph/kmer_index.hpp +++ b/src/common/kmer_index/kmer_mph/kmer_index.hpp @@ -7,7 +7,6 @@ //* See file LICENSE for details. //*************************************************************************** -#include "kmer_index_traits.hpp" #include "kmer_buckets.hpp" #include diff --git a/src/common/kmer_index/kmer_mph/kmer_index_builder.hpp b/src/common/kmer_index/kmer_mph/kmer_index_builder.hpp index a351b8b697..09b20a1e6f 100644 --- a/src/common/kmer_index/kmer_mph/kmer_index_builder.hpp +++ b/src/common/kmer_index/kmer_mph/kmer_index_builder.hpp @@ -15,10 +15,7 @@ #include "io/binary/binary.hpp" #include "utils/parallel/openmp_wrapper.h" -#include "utils/memory_limit.hpp" #include "utils/logger/logger.hpp" -#include "utils/filesystem/path_helper.hpp" -#include "utils/filesystem/file_limit.hpp" #include "utils/perf/timetracer.hpp" #include "adt/kmer_vector.hpp" diff --git a/src/common/kmer_index/kmer_mph/kmer_splitters.hpp b/src/common/kmer_index/kmer_mph/kmer_splitters.hpp index efb1934e2d..5f62b2c4cf 100644 --- a/src/common/kmer_index/kmer_mph/kmer_splitters.hpp +++ b/src/common/kmer_index/kmer_mph/kmer_splitters.hpp @@ -9,7 +9,9 @@ #pragma once #include "kmer_splitter.hpp" -#include "io/reads/io_helper.hpp" +#include "io/reads/read_stream_vector.hpp" +#include "sequence/rtseq.hpp" +#include "sequence/sequence.hpp" #include "adt/iterator_range.hpp" namespace kmers { diff --git a/src/common/kmer_index/ph_map/cqf_hash_map.hpp b/src/common/kmer_index/ph_map/cqf_hash_map.hpp index 9c71ac8786..adedf2a815 100644 --- a/src/common/kmer_index/ph_map/cqf_hash_map.hpp +++ b/src/common/kmer_index/ph_map/cqf_hash_map.hpp @@ -1,4 +1,3 @@ -#pragma once //*************************************************************************** //* Copyright (c) 2023-2024 SPAdes team //* Copyright (c) 2020-2022 Saint Petersburg State University @@ -6,6 +5,8 @@ //* See file LICENSE for details. //*************************************************************************** +#pragma once + #include "perfect_hash_map.hpp" #include "adt/cqf.hpp" diff --git a/src/common/kmer_index/ph_map/kmer_maps.hpp b/src/common/kmer_index/ph_map/kmer_maps.hpp index 1d5b93c12f..8f32b67342 100644 --- a/src/common/kmer_index/ph_map/kmer_maps.hpp +++ b/src/common/kmer_index/ph_map/kmer_maps.hpp @@ -1,4 +1,3 @@ -#pragma once //*************************************************************************** //* Copyright (c) 2023-2024 SPAdes team //* Copyright (c) 2020-2022 Saint Petersburg State University @@ -6,6 +5,8 @@ //* See file LICENSE for details. //*************************************************************************** +#pragma once + #include "perfect_hash_map.hpp" #include "io/kmers/kmer_iterator.hpp" #include "utils/logger/logger.hpp" diff --git a/src/common/kmer_index/ph_map/perfect_hash_map.hpp b/src/common/kmer_index/ph_map/perfect_hash_map.hpp index 41dafcfb3a..bbd4e971a2 100644 --- a/src/common/kmer_index/ph_map/perfect_hash_map.hpp +++ b/src/common/kmer_index/ph_map/perfect_hash_map.hpp @@ -1,4 +1,3 @@ -#pragma once //*************************************************************************** //* Copyright (c) 2023-2024 SPAdes team //* Copyright (c) 2015-2022 Saint Petersburg State University @@ -7,15 +6,18 @@ //* See file LICENSE for details. //*************************************************************************** +#pragma once + #include "key_with_hash.hpp" #include "storing_traits.hpp" #include "kmer_index/kmer_mph/kmer_index.hpp" +#include "kmer_index/kmer_mph/kmer_index_traits.hpp" + #include "io/binary/binary.hpp" -#include "utils/verify.hpp" #include "utils/parallel/openmp_wrapper.h" + #include #include -#include namespace kmers { diff --git a/src/common/kmer_index/ph_map/perfect_hash_map_builder.hpp b/src/common/kmer_index/ph_map/perfect_hash_map_builder.hpp index f6362f3746..d29e1b4f46 100644 --- a/src/common/kmer_index/ph_map/perfect_hash_map_builder.hpp +++ b/src/common/kmer_index/ph_map/perfect_hash_map_builder.hpp @@ -1,4 +1,3 @@ -#pragma once //*************************************************************************** //* Copyright (c) 2023-2024 SPAdes team //* Copyright (c) 2016-2022 Saint Petersburg State University @@ -6,6 +5,8 @@ //* See file LICENSE for details. //*************************************************************************** +#pragma once + #include "perfect_hash_map.hpp" #include "kmer_maps.hpp" #include "cqf_hash_map.hpp" diff --git a/src/common/library/library.cpp b/src/common/library/library.cpp index b1416d520f..0622f4b409 100644 --- a/src/common/library/library.cpp +++ b/src/common/library/library.cpp @@ -48,6 +48,7 @@ struct ScalarEnumerationTraits { io.enumCase(value, "path-extend-contigs", LibraryType::PathExtendContigs); io.enumCase(value, "fl-rna", LibraryType::FLRNAReads); io.enumCase(value, "assembly-graph", LibraryType::AssemblyGraph); + io.enumCase(value, "clouds10x", LibraryType::Clouds10x); } }; }} @@ -81,6 +82,7 @@ void SequencingLibraryBase::validate(llvm::yaml::IO &, llvm::StringRef &res) { case LibraryType::MatePairs: case LibraryType::HQMatePairs: case LibraryType::TellSeqReads: + case LibraryType::Clouds10x: if (left_paired_reads_.size() != right_paired_reads_.size()) { res = "Left and right reads lists should have equal length"; return; diff --git a/src/common/library/library.hpp b/src/common/library/library.hpp index 601afaf541..a2e497006a 100644 --- a/src/common/library/library.hpp +++ b/src/common/library/library.hpp @@ -197,7 +197,8 @@ class SequencingLibraryBase { bool is_graph_constructable() const { return type_ == io::LibraryType::PairedEnd || type_ == io::LibraryType::SingleReads || - type_ == io::LibraryType::HQMatePairs; + type_ == io::LibraryType::HQMatePairs || + type_ == io::LibraryType::Clouds10x; } bool is_bwa_alignable() const { @@ -215,7 +216,8 @@ class SequencingLibraryBase { bool is_paired() const { return type_ == io::LibraryType::PairedEnd || type_ == io::LibraryType::MatePairs || - type_ == io::LibraryType::HQMatePairs; + type_ == io::LibraryType::HQMatePairs || + type_ == io::LibraryType::Clouds10x; } bool is_single() const { diff --git a/src/common/library/library_data.cpp b/src/common/library/library_data.cpp index ba37dfcf78..4c4a8a3b3a 100644 --- a/src/common/library/library_data.cpp +++ b/src/common/library/library_data.cpp @@ -22,6 +22,10 @@ void MappingTraits::mapping(IO &io, LibraryData::B io.mapRequired("chunk num", info.chunk_num); } +void MappingTraits::mapping(IO &io, LibraryData::ReadCloudInfo &info) { + io.mapRequired("fragment length distribution", info.fragment_length_distribution); +} + void MappingTraits::mapping(IO &io, debruijn_graph::config::LibraryData &data) { io.mapRequired("unmerged read length", data.unmerged_read_length); io.mapRequired("merged read length", data.merged_read_length); @@ -38,6 +42,7 @@ void MappingTraits::mapping(IO &io, debruijn_graph::config::Library io.mapRequired("library index", data.lib_index); io.mapRequired("number of reads", data.read_count); io.mapRequired("total nucleotides", data.total_nucls); + io.mapRequired("read cloud info", data.read_cloud_info); } } } diff --git a/src/common/library/library_data.hpp b/src/common/library/library_data.hpp index 586fb7c096..2f11146426 100644 --- a/src/common/library/library_data.hpp +++ b/src/common/library/library_data.hpp @@ -55,6 +55,12 @@ struct LibraryData { size_t chunk_num = 0; } binary_reads_info; + struct ReadCloudInfo { + ReadCloudInfo() + : fragment_length_distribution() {} + std::map fragment_length_distribution; + } read_cloud_info; + void clear() { unmerged_read_length = merged_read_length = 0; mean_insert_size = insert_size_deviation @@ -68,6 +74,7 @@ struct LibraryData { total_nucls = read_count = 0; pi_threshold = 0.0; binary_reads_info = BinaryReadsInfo(); + read_cloud_info = ReadCloudInfo(); } LibraryData() { @@ -86,6 +93,11 @@ struct MappingTraits { static void mapping(IO &io, debruijn_graph::config::LibraryData::BinaryReadsInfo &info); }; +template<> +struct MappingTraits { + static void mapping(IO &io, debruijn_graph::config::LibraryData::ReadCloudInfo &info); +}; + template<> struct MappingTraits { static void mapping(IO &io, debruijn_graph::config::LibraryData &data); diff --git a/src/common/library/library_fwd.hpp b/src/common/library/library_fwd.hpp index bb63d0da36..f2e01f2015 100644 --- a/src/common/library/library_fwd.hpp +++ b/src/common/library/library_fwd.hpp @@ -26,6 +26,7 @@ enum class LibraryType { MatePairs, TrustedContigs, TSLReads, + Clouds10x, PathExtendContigs, UntrustedContigs, FLRNAReads, diff --git a/src/common/modules/path_extend/CMakeLists.txt b/src/common/modules/path_extend/CMakeLists.txt index 4cb85f3198..1c8aa8bfc3 100644 --- a/src/common/modules/path_extend/CMakeLists.txt +++ b/src/common/modules/path_extend/CMakeLists.txt @@ -15,16 +15,19 @@ add_library(path_extend STATIC path_extenders.cpp pe_resolver.cpp overlap_remover.cpp + path_scaffolder.cpp pipeline/launch_support.cpp pipeline/launcher.cpp pipeline/extenders_logic.cpp scaffolder2015/extension_chooser2015.cpp - scaffolder2015/scaffold_graph.cpp scaffolder2015/scaffold_graph_constructor.cpp scaffolder2015/scaffold_graph_visualizer.cpp - scaffolder2015/connection_condition2015.cpp - scaffolder2015/path_polisher.cpp) + scaffolder2015/connection_condition2015.cpp + scaffolder2015/path_polisher.cpp + read_cloud_path_extend/scaffold_graph_extractor.cpp + read_cloud_path_extend/scaffold_graph_construction/read_cloud_connection_conditions.cpp + ) -target_link_libraries(path_extend assembly_graph ssw configs) +target_link_libraries(path_extend assembly_graph auxiliary_graphs ssw configs) diff --git a/src/common/modules/path_extend/path_scaffolder.cpp b/src/common/modules/path_extend/path_scaffolder.cpp new file mode 100644 index 0000000000..a61cee54fe --- /dev/null +++ b/src/common/modules/path_extend/path_scaffolder.cpp @@ -0,0 +1,159 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "path_scaffolder.hpp" + +#include "modules/path_extend/read_cloud_path_extend/scaffold_graph_extractor.hpp" + +namespace path_extend { + +void SimplePathScaffolder::CondenseSimplePaths(const std::vector &scaffold_edges) const { + std::unordered_map merge_connections; + for (const auto &edge: scaffold_edges) { + ScaffoldVertex start = edge.getStart(); + ScaffoldVertex end = edge.getEnd(); + DEBUG(start.int_id() << " -> " << end.int_id()); + DEBUG("Weight: " << edge.getWeight()); + VERIFY(merge_connections.find(start) == merge_connections.end()); + merge_connections.insert({start, end}); + } + + for (const auto &connection: merge_connections) { + auto start = connection.first; + auto end = connection.second; + auto start_conjugate = start.GetConjugateFromGraph(g_); + auto end_conjugate = end.GetConjugateFromGraph(g_); + if (merge_connections.find(end_conjugate) == merge_connections.end() or + merge_connections.at(end_conjugate) != start_conjugate) { + WARN("Conjugate connection does not correspond to direct connection") + merge_connections.at(end_conjugate) = start_conjugate; + } else { + merge_connections.insert({end_conjugate, start_conjugate}); + } + } + + StartFinder start_finder(g_); + auto starts = start_finder.GetStarts(merge_connections); + std::unordered_map start_to_distance; + for (const auto &edge: scaffold_edges) { + start_to_distance.insert({edge.getStart(), edge.getLength()}); + start_to_distance.insert({edge.getEnd().GetConjugateFromGraph(g_), edge.getLength()}); + } + for (const auto &connection: merge_connections) { + DEBUG(connection.first.int_id() << " -> " << connection.second.int_id()); + } + INFO(starts.size() << " starts."); + for (const auto &start: starts) { + ScaffoldVertex current = start; + bool next_found = merge_connections.find(current) != merge_connections.end(); + DEBUG("Start: " << current.int_id()); + while (next_found and merge_connections.at(current) != start) { + current = merge_connections.at(current); + next_found = merge_connections.find(current) != merge_connections.end(); + DEBUG(current.int_id()); + } + } + for (const auto &start: starts) { + if (not start.ToPath(g_)->Empty()) { + ExtendPathAlongConnections(start, merge_connections, start_to_distance); + } + } +} + +void SimplePathScaffolder::MergePaths(const ScaffoldGraph &scaffold_graph) const { + INFO(scaffold_graph.VertexCount() << " vertices and " << scaffold_graph.EdgeCount() + << " edges in path scaffold graph"); + for (const ScaffoldVertex &vertex: scaffold_graph.vertices()) { + VERIFY_DEV(vertex.GetType() == scaffold_graph::ScaffoldVertexT::Path); + } + read_cloud::ScaffoldGraphExtractor graph_extractor; + auto reliable_edges = graph_extractor.ExtractReliableEdges(scaffold_graph); + INFO("Found " << reliable_edges.size() << " reliable edges"); + CondenseSimplePaths(reliable_edges); +} + +std::unordered_set StartFinder::GetStarts(const TransitionMap &transition_map) const { + std::unordered_set starts; + std::unordered_set used; + for (const auto &connection: transition_map) { + auto start = connection.first; + auto current = start; + auto current_conjugate = current.GetConjugateFromGraph(g_); + if (used.find(current) != used.end()) { + continue; + } + bool prev_found = transition_map.find(current_conjugate) != transition_map.end(); + bool prev_used = false; + while (prev_found) { + used.insert(current); + used.insert(current_conjugate); + auto prev_conjugate = transition_map.at(current_conjugate); + if (used.find(prev_conjugate) != used.end()) { + prev_used = true; + break; + } + current = prev_conjugate.GetConjugateFromGraph(g_); + current_conjugate = current.GetConjugateFromGraph(g_); + prev_found = transition_map.find(current_conjugate) != transition_map.end(); + } + starts.insert(current); + if (not prev_used) { + bool next_found = transition_map.find(current) != transition_map.end(); + while (next_found) { + current = transition_map.at(current); + used.insert(current); + used.insert(current.GetConjugateFromGraph(g_)); + next_found = transition_map.find(current) != transition_map.end(); + } + } else { + VERIFY_DEV(used.find(start) != used.end()); + } + } + return starts; +} + +void SimplePathScaffolder::ExtendPathAlongConnections( + const ScaffoldVertex &start, + const std::unordered_map &merge_connections, + const std::unordered_map &start_to_distance) const { + auto current = start; + bool next_found = merge_connections.find(current) != merge_connections.end(); + auto start_path = start.ToPath(g_); + while (next_found) { + auto next = merge_connections.at(current); + auto next_path = next.ToPath(g_); + if (start_path->GetId() == next_path->GetId()) { + break; + } + DEBUG("First path: " << start_path->GetId() << ", length : " << start_path->Length()); + DEBUG("Second path: " << next_path->GetId() << ", length: " << next_path->Length()); + DEBUG("First conj: " << start_path->GetConjPath()->GetId() << ", length : " + << start_path->GetConjPath()->Length()); + DEBUG( + "Second conj: " << next_path->GetConjPath()->GetId() << ", length: " << next_path->GetConjPath()->Length()); + DEBUG("Got paths") + int gap_length = static_cast(start_to_distance.at(current)); + if (gap_length == 0) { + gap_length = default_gap_; + } + Gap path_distance_gap(gap_length); + DEBUG("Push back") + start_path->PushBack(*next_path, path_distance_gap); + DEBUG("Clear"); + next_path->Clear(); + DEBUG("Second path: " << next_path->GetId() << ", length: " << next_path->Length()); + DEBUG(next_path->Empty()); + DEBUG("Conjugate: " << next_path->GetConjPath()->GetId() << ", length: " << next_path->GetConjPath()->Length()); + DEBUG("Conjugate empty: " << next_path->GetConjPath()->Empty()); + current = next; + next_found = merge_connections.find(current) != merge_connections.end(); + } +} +SimplePathScaffolder::SimplePathScaffolder(const Graph &g, int default_gap) : + g_(g), default_gap_(default_gap) {} + +StartFinder::StartFinder(const Graph &g): g_(g) {} +} \ No newline at end of file diff --git a/src/common/modules/path_extend/path_scaffolder.hpp b/src/common/modules/path_extend/path_scaffolder.hpp new file mode 100644 index 0000000000..7bb917c422 --- /dev/null +++ b/src/common/modules/path_extend/path_scaffolder.hpp @@ -0,0 +1,60 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "assembly_graph/paths/bidirectional_path_container.hpp" +#include "auxiliary_graphs/scaffold_graph/scaffold_graph.hpp" +#include "configs/pe_config_struct.hpp" + +namespace path_extend { + +class StartFinder { + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + typedef std::unordered_map TransitionMap; + typedef debruijn_graph::Graph Graph; + + const debruijn_graph::Graph &g_; + public: + StartFinder(const debruijn_graph::Graph &g); + + std::unordered_set GetStarts(const TransitionMap &transition_map) const; +}; + +class PathScaffolder { + public: + typedef scaffold_graph::ScaffoldGraph ScaffoldGraph; + + virtual ~PathScaffolder() = default; + + virtual void MergePaths(const ScaffoldGraph &scaffold_graph) const = 0; +}; + +class SimplePathScaffolder : public PathScaffolder { + public: + using PathScaffolder::ScaffoldGraph; + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + typedef ScaffoldGraph::ScaffoldEdge ScaffoldEdge; + typedef debruijn_graph::Graph Graph; + + SimplePathScaffolder(const debruijn_graph::Graph &g, int default_gap); + + void MergePaths(const ScaffoldGraph &scaffold_graph) const override; + + private: + + void CondenseSimplePaths(const std::vector &scaffold_edges) const; + void ExtendPathAlongConnections(const ScaffoldVertex &start, + const std::unordered_map &merge_connections, + const std::unordered_map &start_to_length) const; + + const debruijn_graph::Graph &g_; + const int default_gap_; + + DECL_LOGGER("SimplePathScaffolder"); +}; + +} diff --git a/src/common/modules/path_extend/pipeline/launcher.cpp b/src/common/modules/path_extend/pipeline/launcher.cpp index f336d00894..6f274708e2 100644 --- a/src/common/modules/path_extend/pipeline/launcher.cpp +++ b/src/common/modules/path_extend/pipeline/launcher.cpp @@ -26,6 +26,7 @@ namespace path_extend { using namespace debruijn_graph; using namespace omnigraph::de; +using namespace path_extend::scaffolder; std::vector> PathExtendLauncher::ConstructPairedConnectionConditions(const ScaffoldingUniqueEdgeStorage& edge_storage) const { @@ -88,13 +89,23 @@ void PathExtendLauncher::PrintScaffoldGraph(const scaffold_graph::ScaffoldGraph const debruijn_graph::GenomeConsistenceChecker &genome_checker, const std::filesystem::path &filename) const { using namespace scaffold_graph; + using namespace scaffolder; - auto vertex_colorer = std::make_shared(main_edge_set); + std::set scaff_vertex_set; + for (const auto& edge: main_edge_set) { + EdgeId copy = edge; + scaff_vertex_set.insert(copy); + } + auto vertex_colorer = std::make_shared(scaff_vertex_set); auto edge_colorer = std::make_shared(); graph_colorer::CompositeGraphColorer colorer(vertex_colorer, edge_colorer); INFO("Visualizing scaffold graph"); - ScaffoldGraphVisualizer singleVisualizer(scaffold_graph, genome_checker.EdgeLabels()); + std::map scaff_vertex_labels; + for (const auto& entry: genome_checker.EdgeLabels()) { + scaff_vertex_labels.insert({entry.first, entry.second}); + } + ScaffoldGraphVisualizer singleVisualizer(scaffold_graph, scaff_vertex_labels); std::ofstream single_dot; single_dot.open(filename.native() + "_single.dot"); singleVisualizer.Visualize(single_dot, colorer); diff --git a/src/common/modules/path_extend/pipeline/launcher.hpp b/src/common/modules/path_extend/pipeline/launcher.hpp index eff18a64c8..72bbe985ba 100644 --- a/src/common/modules/path_extend/pipeline/launcher.hpp +++ b/src/common/modules/path_extend/pipeline/launcher.hpp @@ -11,8 +11,9 @@ #include "extenders_logic.hpp" #include "launch_support.hpp" +#include "auxiliary_graphs/scaffold_graph/scaffold_graph.hpp" #include "modules/path_extend/pe_resolver.hpp" -#include "modules/path_extend/scaffolder2015/scaffold_graph.hpp" +#include "modules/path_extend/scaffolder2015/connection_condition2015.hpp" #include "modules/genome_consistance_checker.hpp" #include "alignment/rna/ss_coverage.hpp" diff --git a/src/common/modules/path_extend/read_cloud_path_extend/contracted_graph_scaffolding/contracted_gfa_writer.cpp b/src/common/modules/path_extend/read_cloud_path_extend/contracted_graph_scaffolding/contracted_gfa_writer.cpp new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_construction/read_cloud_connection_conditions.cpp b/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_construction/read_cloud_connection_conditions.cpp new file mode 100644 index 0000000000..d5a44bd273 --- /dev/null +++ b/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_construction/read_cloud_connection_conditions.cpp @@ -0,0 +1,140 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "read_cloud_connection_conditions.hpp" + +#include "modules/path_extend/pipeline/launcher.hpp" + +namespace path_extend { +namespace read_cloud { + +double NormalizedBarcodeScoreFunction::GetScore(const scaffold_graph::ScaffoldGraph::ScaffoldEdge &edge) const { + auto first = edge.getStart(); + auto second = edge.getEnd(); + DEBUG("Checking edge " << edge.getStart().int_id() << " -> " << edge.getEnd().int_id()); + size_t first_length = first.GetLengthFromGraph(graph_); + size_t second_length = second.GetLengthFromGraph(graph_); + size_t first_size = barcode_extractor_->GetTailSize(first); + size_t second_size = barcode_extractor_->GetHeadSize(second); + if (first_size == 0 or second_size == 0) { + DEBUG("No barcodes on one of the long edges"); + return 0.0; + } + size_t shared_count = barcode_extractor_->GetIntersectionSize(first, second); + size_t min_size = std::min(first_size, second_size); + double containment_index = static_cast(shared_count) / static_cast(min_size); + if (math::ge(containment_index, 0.05)) { + DEBUG("First length: " << first_length); + DEBUG("Second length: " << second_length); + DEBUG("First size: " << first_size); + DEBUG("Second size: " << second_size); + DEBUG("Intersection: " << shared_count); + DEBUG("Score: " << containment_index); + } + VERIFY(math::ge(1.0, containment_index)); +// double first_coverage = first.GetCoverageFromGraph(graph_); +// double second_coverage = second.GetCoverageFromGraph(graph_); + return containment_index; +} +NormalizedBarcodeScoreFunction::NormalizedBarcodeScoreFunction( + const Graph &graph_, + std::shared_ptr barcode_extractor_) : + AbstractBarcodeScoreFunction(graph_, barcode_extractor_) {} + +TransitiveEdgesPredicate::TransitiveEdgesPredicate(const scaffold_graph::ScaffoldGraph &graph, + const Graph &g, + size_t distance_threshold) : + scaffold_graph_(graph), g_(g), distance_threshold_(distance_threshold) {} +bool TransitiveEdgesPredicate::Check(const ScaffoldEdgePredicate::ScaffoldEdge &scaffold_edge) const { + ScaffoldVertex current = scaffold_edge.getStart(); + ScaffoldVertex candidate = scaffold_edge.getEnd(); + //fixme replace with dijkstra and length threshold + DEBUG("Checking edge (" << current.int_id() << ", " << candidate.int_id() << ")"); + SimpleSearcher simple_searcher(scaffold_graph_, g_, distance_threshold_); + auto reachable_vertices = simple_searcher.GetReachableVertices(current, scaffold_edge); + for (const auto &vertex: reachable_vertices) { + if (candidate == vertex) { + DEBUG("Found another path, false"); + return false; + } + } + DEBUG("True"); + return true; +} +SimpleSearcher::SimpleSearcher(const scaffold_graph::ScaffoldGraph &graph, const Graph &g, size_t distance) + : scaff_graph_(graph), g_(g), distance_threshold_(distance) {} +std::vector SimpleSearcher::GetReachableVertices( + const SimpleSearcher::ScaffoldVertex &vertex, + const ScaffoldGraph::ScaffoldEdge &restricted_edge) { + std::vector result; + VertexWithDistance new_vertex(vertex, 0); + std::queue vertex_queue; + vertex_queue.push(new_vertex); + std::unordered_set visited; + visited.insert(vertex); + visited.insert(vertex.GetConjugateFromGraph(g_)); + visited.insert(restricted_edge.getEnd().GetConjugateFromGraph(g_)); + while (not vertex_queue.empty()) { + auto current_vertex = vertex_queue.front(); + vertex_queue.pop(); + DEBUG("Id: " << current_vertex.vertex.int_id()); + DEBUG("Distance: " << current_vertex.distance); + if (current_vertex.distance <= distance_threshold_) { + DEBUG("Passed threshold. Processing") + ProcessVertex(vertex_queue, current_vertex, visited, restricted_edge); + DEBUG("Processing finished"); + result.push_back(current_vertex.vertex); + } + } + return result; +} + +void SimpleSearcher::ProcessVertex(std::queue &vertex_queue, + const VertexWithDistance &vertex, + std::unordered_set &visited, + const ScaffoldGraph::ScaffoldEdge &restricted_edge) { + size_t current_distance = vertex.distance; + size_t new_distance = current_distance + 1; + for (const ScaffoldGraph::ScaffoldEdge &edge: scaff_graph_.OutgoingEdges(vertex.vertex)) { + DEBUG("Checking vertex: " << edge.getEnd().int_id()); + DEBUG("Visited: " << (visited.find(edge.getEnd()) != visited.end())); + DEBUG("Edge restricted: " << AreEqual(edge, restricted_edge)); + if (visited.find(edge.getEnd()) == visited.end() and not AreEqual(edge, restricted_edge)) { + DEBUG("Passed"); + vertex_queue.emplace(edge.getEnd(), new_distance); + visited.insert(edge.getEnd()); + } + } +} +bool SimpleSearcher::AreEqual(const scaffold_graph::ScaffoldGraph::ScaffoldEdge &first, + const scaffold_graph::ScaffoldGraph::ScaffoldEdge &second) { + return first.getStart() == second.getStart() and first.getEnd() == second.getEnd(); +} + +SimpleSearcher::VertexWithDistance::VertexWithDistance(const SimpleSearcher::ScaffoldVertex &vertex, size_t distance) + : vertex(vertex), distance(distance) {} + +AbstractBarcodeScoreFunction::AbstractBarcodeScoreFunction( + const Graph &graph_, + const std::shared_ptr barcode_extractor) + : + graph_(graph_), + barcode_extractor_(barcode_extractor) {} +TrivialBarcodeScoreFunction::TrivialBarcodeScoreFunction( + const Graph &graph_, + std::shared_ptr barcode_extractor_, + const size_t read_count_threshold_, + const size_t tail_threshold_) : AbstractBarcodeScoreFunction(graph_, + barcode_extractor_), + read_count_threshold_(read_count_threshold_), + tail_threshold_(tail_threshold_) {} +double TrivialBarcodeScoreFunction::GetScore(const scaffold_graph::ScaffoldGraph::ScaffoldEdge &edge) const { + size_t shared_count = barcode_extractor_->GetIntersectionSize(edge.getStart(), edge.getEnd()); + + return static_cast(shared_count); +} +} +} \ No newline at end of file diff --git a/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_construction/read_cloud_connection_conditions.hpp b/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_construction/read_cloud_connection_conditions.hpp new file mode 100644 index 0000000000..1ce9fe8865 --- /dev/null +++ b/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_construction/read_cloud_connection_conditions.hpp @@ -0,0 +1,122 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "auxiliary_graphs/scaffold_graph/scaffold_graph.hpp" +#include "barcode_index/scaffold_vertex_index.hpp" +#include "modules/path_extend/path_extender.hpp" +#include "modules/path_extend/pipeline/launch_support.hpp" +#include "modules/path_extend/extension_chooser.hpp" +#include "modules/path_extend/scaffolder2015/connection_condition2015.hpp" + +namespace path_extend { +namespace read_cloud { + +class ScaffoldEdgePredicate : public func::AbstractPredicate { + public: + typedef scaffold_graph::ScaffoldGraph ScaffoldGraph; + typedef scaffold_graph::ScaffoldGraph::ScaffoldEdge ScaffoldEdge; + + virtual ~ScaffoldEdgePredicate() = default; +}; + +class SimpleSearcher { + public: + typedef scaffold_graph::ScaffoldGraph ScaffoldGraph; + typedef ScaffoldGraph::ScaffoldGraphVertex ScaffoldVertex; + + struct VertexWithDistance { + ScaffoldVertex vertex; + size_t distance; + VertexWithDistance(const ScaffoldVertex &vertex, size_t distance); + }; + + SimpleSearcher(const scaffold_graph::ScaffoldGraph &graph_, const Graph &g, size_t distance_); + + std::vector GetReachableVertices(const ScaffoldVertex &vertex, + const ScaffoldGraph::ScaffoldEdge &restricted_edge); + void ProcessVertex(std::queue &vertex_queue, const VertexWithDistance &vertex, + std::unordered_set &visited, const ScaffoldGraph::ScaffoldEdge &restricted_edge); + bool AreEqual(const ScaffoldGraph::ScaffoldEdge &first, const ScaffoldGraph::ScaffoldEdge &second); + + private: + const ScaffoldGraph &scaff_graph_; + const Graph &g_; + size_t distance_threshold_; + + DECL_LOGGER("SimpleSearcher"); +}; + +class TransitiveEdgesPredicate : public ScaffoldEdgePredicate { + public: + using ScaffoldEdgePredicate::ScaffoldEdge; + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + + TransitiveEdgesPredicate(const scaffold_graph::ScaffoldGraph &graph, const Graph &g, size_t distance_threshold); + + bool Check(const ScaffoldEdge &scaffold_edge) const override; + + private: + const scaffold_graph::ScaffoldGraph scaffold_graph_; + const Graph &g_; + size_t distance_threshold_; + + DECL_LOGGER("TransitiveEdgesPredicate"); +}; + +class ScaffoldEdgeScoreFunction { + public: + typedef scaffold_graph::ScaffoldGraph ScaffoldGraph; + typedef scaffold_graph::ScaffoldGraph::ScaffoldEdge ScaffoldEdge; + virtual double GetScore(const scaffold_graph::ScaffoldGraph::ScaffoldEdge &edge) const = 0; + virtual ~ScaffoldEdgeScoreFunction() = default; +}; + +class AbstractBarcodeScoreFunction : public ScaffoldEdgeScoreFunction { + public: + AbstractBarcodeScoreFunction( + const Graph &graph_, + std::shared_ptr barcode_extractor_); + + protected: + const Graph &graph_; + std::shared_ptr barcode_extractor_; +}; + +class NormalizedBarcodeScoreFunction : public AbstractBarcodeScoreFunction { + public: + NormalizedBarcodeScoreFunction(const Graph &graph_, + std::shared_ptr barcode_extractor_); + + double GetScore(const scaffold_graph::ScaffoldGraph::ScaffoldEdge &edge) const override; + + protected: + using AbstractBarcodeScoreFunction::barcode_extractor_; + using AbstractBarcodeScoreFunction::graph_; + + DECL_LOGGER("NormalizedBarcodeScoreFunction"); +}; + +class TrivialBarcodeScoreFunction : public AbstractBarcodeScoreFunction { + public: + TrivialBarcodeScoreFunction( + const Graph &graph_, + std::shared_ptr barcode_extractor_, + size_t read_count_threshold_, + size_t tail_threshold_); + + double GetScore(const scaffold_graph::ScaffoldGraph::ScaffoldEdge &edge) const override; + + protected: + using AbstractBarcodeScoreFunction::barcode_extractor_; + using AbstractBarcodeScoreFunction::graph_; + const size_t read_count_threshold_; + const size_t tail_threshold_; +}; + +} +} \ No newline at end of file diff --git a/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_extractor.cpp b/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_extractor.cpp new file mode 100644 index 0000000000..ece4aea37c --- /dev/null +++ b/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_extractor.cpp @@ -0,0 +1,77 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "scaffold_graph_extractor.hpp" + +namespace path_extend { +namespace read_cloud { + +std::vector ScaffoldGraphExtractor::ExtractReliableEdges( + const ScaffoldGraph &scaffold_graph) const { + std::vector result; + for (const ScaffoldGraph::ScaffoldEdge &edge: scaffold_graph.edges()) { + if (scaffold_graph.HasUniqueOutgoing(edge.getStart()) and scaffold_graph.HasUniqueIncoming(edge.getEnd())) { + result.push_back(edge); + } + } + return result; +} +std::vector ScaffoldGraphExtractor::ExtractMaxScoreEdges( + const ScaffoldGraphExtractor::ScaffoldGraph &scaffold_graph) const { + std::vector result; + std::unordered_map start_to_edge; + std::unordered_map end_to_edge; + size_t edge_counter = 0; + size_t edge_block = 10000; + for (const ScaffoldGraph::ScaffoldEdge &edge: scaffold_graph.edges()) { + double score = edge.getWeight(); + auto start = edge.getStart(); + auto end = edge.getEnd(); + bool is_max_score_edge = true; + for (const auto &in_edge: scaffold_graph.IncomingEdges(end)) { + double in_score = in_edge.getWeight(); + if (math::gr(in_score, score)) { + is_max_score_edge = false; + break; + } + } + if (not is_max_score_edge) { + continue; + } + for (const auto &out_edge: scaffold_graph.OutgoingEdges(start)) { + double out_score = out_edge.getWeight(); + if (math::gr(out_score, score)) { + is_max_score_edge = false; + break; + } + } + if (is_max_score_edge) { + if (start_to_edge.find(start) == start_to_edge.end() and end_to_edge.find(end) == end_to_edge.end()) { + start_to_edge.insert({start, edge}); + end_to_edge.insert({end, edge}); + result.push_back(edge); + } + } + ++edge_counter; + if (edge_counter % edge_block == 0) { + INFO("Processed " << edge_counter << " edges out of " << scaffold_graph.EdgeCount()); + } + } + return result; +} +std::unordered_map ScaffoldGraphExtractor::GetFirstEdgeMap( + const ScaffoldGraphExtractor::ScaffoldGraph &scaffold_graph, const func::TypedPredicate &pred) const { + std::unordered_map result; + for (const ScaffoldVertex &vertex: scaffold_graph.vertices()) { + auto first_edge = vertex.GetFirstEdgeWithPredicate(pred); + if (first_edge.is_initialized()) { + result[first_edge.get()].insert(vertex); + } + } + return result; +} +} +} \ No newline at end of file diff --git a/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_extractor.hpp b/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_extractor.hpp new file mode 100644 index 0000000000..7d4cc2caf9 --- /dev/null +++ b/src/common/modules/path_extend/read_cloud_path_extend/scaffold_graph_extractor.hpp @@ -0,0 +1,29 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "auxiliary_graphs/scaffold_graph/scaffold_graph.hpp" + +namespace path_extend { +namespace read_cloud { + +class ScaffoldGraphExtractor { + public: + typedef debruijn_graph::EdgeId EdgeId; + typedef scaffold_graph::ScaffoldGraph ScaffoldGraph; + typedef ScaffoldGraph::ScaffoldEdge ScaffoldEdge; + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + typedef std::unordered_set VertexSet; + + std::vector ExtractMaxScoreEdges(const ScaffoldGraph &scaffold_graph) const; + std::vector ExtractReliableEdges(const ScaffoldGraph &scaffold_graph) const; + std::unordered_map GetFirstEdgeMap(const ScaffoldGraph &scaffold_graph, + const func::TypedPredicate &pred) const; +}; + +} +} \ No newline at end of file diff --git a/src/common/modules/path_extend/scaffolder2015/connection_condition2015.cpp b/src/common/modules/path_extend/scaffolder2015/connection_condition2015.cpp index 40f58d49bc..e41b33109c 100644 --- a/src/common/modules/path_extend/scaffolder2015/connection_condition2015.cpp +++ b/src/common/modules/path_extend/scaffolder2015/connection_condition2015.cpp @@ -21,6 +21,9 @@ Connections ConnectionCondition::ConnectedWith(debruijn_graph::EdgeId e, } return res; } +bool ConnectionCondition::IsLast() const { + return false; +} PairedLibConnectionCondition::PairedLibConnectionCondition(const debruijn_graph::Graph &graph, std::shared_ptr lib, @@ -276,5 +279,8 @@ size_t AssemblyGraphConnectionCondition::GetLibIndex() const { int AssemblyGraphConnectionCondition::GetMedianGap (debruijn_graph::EdgeId, debruijn_graph::EdgeId) const { return 0; } +bool AssemblyGraphConnectionCondition::IsLast() const { + return true; +} } diff --git a/src/common/modules/path_extend/scaffolder2015/connection_condition2015.hpp b/src/common/modules/path_extend/scaffolder2015/connection_condition2015.hpp index b7027e30e4..836127c3f6 100644 --- a/src/common/modules/path_extend/scaffolder2015/connection_condition2015.hpp +++ b/src/common/modules/path_extend/scaffolder2015/connection_condition2015.hpp @@ -58,6 +58,7 @@ class ConnectionCondition { virtual Connections ConnectedWith(EdgeId e, const ScaffoldingUniqueEdgeStorage &storage) const; virtual int GetMedianGap(EdgeId e1, EdgeId e2) const = 0; virtual size_t GetLibIndex() const = 0; + virtual bool IsLast() const; virtual ~ConnectionCondition() { } }; @@ -150,6 +151,7 @@ class AssemblyGraphConnectionCondition : public ConnectionCondition { void AddInterestingEdges(func::TypedPredicate edge_condition); Connections ConnectedWith(EdgeId e) const override; size_t GetLibIndex() const override; + virtual bool IsLast() const override; int GetMedianGap(EdgeId, EdgeId ) const override; }; } diff --git a/src/common/modules/path_extend/scaffolder2015/scaffold_graph_constructor.cpp b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_constructor.cpp index 5fe211c861..17171f84b0 100644 --- a/src/common/modules/path_extend/scaffolder2015/scaffold_graph_constructor.cpp +++ b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_constructor.cpp @@ -9,11 +9,13 @@ // Created by andrey on 04.12.15. // +#include "common/modules/path_extend/scaffolder2015/scaffold_graph_dijkstra.hpp" #include "scaffold_graph_constructor.hpp" +#include "scaffold_graph_dijkstra.hpp" namespace path_extend { -namespace scaffold_graph { +namespace scaffolder { void BaseScaffoldGraphConstructor::ConstructFromEdgeConditions(func::TypedPredicate edge_condition, ConnectionConditions &connection_conditions, @@ -28,7 +30,10 @@ void BaseScaffoldGraphConstructor::ConstructFromEdgeConditions(func::TypedPredic void BaseScaffoldGraphConstructor::ConstructFromSet(const EdgeSet &edge_set, ConnectionConditions &connection_conditions, bool use_terminal_vertices_only) { - graph_->AddVertices(edge_set); + for (const auto &v: edge_set) { + graph_->AddVertex(v); + } + INFO("Added vertices") ConstructFromConditions(connection_conditions, use_terminal_vertices_only); } @@ -36,7 +41,7 @@ void BaseScaffoldGraphConstructor::ConstructFromConditions(ConnectionConditions bool use_terminal_vertices_only) { //TODO :: awful. It depends on ordering of connected conditions. for (auto condition : connection_conditions) { - if (condition->GetLibIndex() == (size_t) -1) + if (condition->IsLast()) ConstructFromSingleCondition(condition, true); else ConstructFromSingleCondition(condition, use_terminal_vertices_only); @@ -51,7 +56,8 @@ void BaseScaffoldGraphConstructor::ConstructFromSingleCondition(const std::share if (use_terminal_vertices_only && graph_->OutgoingEdgeCount(v) > 0) continue; - auto connected_with = condition->ConnectedWith(v); + EdgeId e = v.GetFirstEdge(); + auto connected_with = condition->ConnectedWith(e); for (const auto& pair : connected_with) { EdgeId connected = pair.first; double w = pair.second; @@ -59,23 +65,287 @@ void BaseScaffoldGraphConstructor::ConstructFromSingleCondition(const std::share if (graph_->Exists(connected)) { if (use_terminal_vertices_only && graph_->IncomingEdgeCount(connected) > 0) continue; - graph_->AddEdge(v, connected, condition->GetLibIndex(), w); + graph_->AddEdge(e, connected, condition->GetLibIndex(), w, 0); } } } } - -std::shared_ptr SimpleScaffoldGraphConstructor::Construct() { +std::shared_ptr SimpleScaffoldGraphConstructor::Construct() { ConstructFromSet(edge_set_, connection_conditions_); return graph_; } -std::shared_ptr DefaultScaffoldGraphConstructor::Construct() { +std::shared_ptr DefaultScaffoldGraphConstructor::Construct() { ConstructFromSet(edge_set_, connection_conditions_); ConstructFromEdgeConditions(edge_condition_, connection_conditions_); return graph_; } +PredicateScaffoldGraphFilter::PredicateScaffoldGraphFilter(const Graph &assembly_graph, + const ScaffoldGraph &old_graph, + std::shared_ptr predicate, + size_t max_threads) + : BaseScaffoldGraphConstructor(assembly_graph), old_graph_(old_graph), + predicate_(predicate), max_threads_(max_threads) {} + +void PredicateScaffoldGraphFilter::ConstructFromGraphAndPredicate(const ScaffoldGraph &old_graph, + std::shared_ptr predicate) { + for (const auto& vertex: old_graph.vertices()) { + graph_->AddVertex(vertex); + } + std::vector scaffold_edges; + for (const auto& edge: old_graph.edges()) { + scaffold_edges.push_back(edge); + } + size_t counter = 0; + const size_t block_size = scaffold_edges.size() / 10; + size_t threads = max_threads_; + DEBUG("Number of threads: " << threads); +#pragma omp parallel for num_threads(threads) + for (size_t i = 0; i < scaffold_edges.size(); ++i) { + auto edge = scaffold_edges[i]; + TRACE("Checking"); + bool check_predicate = (*predicate)(edge); + TRACE("Check result: " << check_predicate); +#pragma omp critical + { + if (check_predicate) { + graph_->AddEdge(edge); + } + ++counter; + if (block_size != 0 and counter % block_size == 0) { + DEBUG("Processed " << counter << " edges out of " << scaffold_edges.size()); + } + } + } +} + +std::shared_ptr PredicateScaffoldGraphFilter::Construct() { + ConstructFromGraphAndPredicate(old_graph_, predicate_); + return graph_; +} +ScoreFunctionScaffoldGraphFilter::ScoreFunctionScaffoldGraphFilter(const Graph &assembly_graph, + const ScaffoldGraph &old_graph, + std::shared_ptr score_function, + double score_threshold, size_t num_threads) + : BaseScaffoldGraphConstructor(assembly_graph), old_graph_(old_graph), + score_function_(score_function), score_threshold_(score_threshold), num_threads_(num_threads) {} + +void ScoreFunctionScaffoldGraphFilter::ConstructFromGraphAndScore(const ScaffoldGraph &graph, + std::shared_ptr score_function, + double score_threshold, size_t threads) { + for (const auto& vertex: graph.vertices()) { + graph_->AddVertex(vertex); + } + std::vector scaffold_edges; + for (const auto& edge: graph.edges()) { + scaffold_edges.push_back(edge); + } + size_t counter = 0; + const size_t block_size = scaffold_edges.size() / 25; + #pragma omp parallel for num_threads(threads) + for (size_t i = 0; i < scaffold_edges.size(); ++i) { + ScaffoldGraph::ScaffoldEdge edge = scaffold_edges[i]; + double score = score_function->GetScore(edge); + #pragma omp critical + { + TRACE("Checking edge " << edge.getStart().int_id() << " -> " << edge.getEnd().int_id()); + TRACE("Score: " << score); + TRACE("Score threshold: " << score_threshold); + if (math::ge(score, score_threshold)) { + TRACE("Success"); + graph_->AddEdge(edge.getStart(), edge.getEnd(), edge.getColor(), score, edge.getLength()); + } + TRACE("Edge added"); + ++counter; + if (counter % block_size == 0) { + INFO("Processed " << counter << " edges out of " << scaffold_edges.size()); + } + } + } +} +std::shared_ptr ScoreFunctionScaffoldGraphFilter::Construct() { + ConstructFromGraphAndScore(old_graph_, score_function_, score_threshold_, num_threads_); + return graph_; +} +std::shared_ptr ScoreFunctionGraphConstructor::Construct() { + ConstructFromScore(score_function_, score_threshold_); + return graph_; +} +ScoreFunctionGraphConstructor::ScoreFunctionGraphConstructor(const Graph &assembly_graph, + std::vector chunks, + std::shared_ptr score_function, + double score_threshold, + size_t num_threads): + BaseScaffoldGraphConstructor(assembly_graph), + chunks_(chunks), + score_function_(score_function), + score_threshold_(score_threshold), + num_threads_(num_threads) {} +void ScoreFunctionGraphConstructor::ConstructFromScore(std::shared_ptr score_function, + double score_threshold) { + for (const auto &chunk: chunks_) { + graph_->AddVertex(chunk.vertex_); + } + size_t approx_block_counter = 0; + const size_t NUM_BLOCKS = 100; + const size_t block_size = chunks_.size() / NUM_BLOCKS; +#pragma omp parallel for schedule(guided) + for (size_t i = 0; i < chunks_.size(); ++i) { + for (auto it = chunks_[i].begin_; it != chunks_[i].end_; ++it) { + const ScaffoldVertex &first = chunks_[i].vertex_; + //todo move this check elsewhere + if (first != *it and first.GetConjugateFromGraph(graph_->AssemblyGraph()) != *it) { + ScaffoldGraph::ScaffoldEdge edge(first, *it, 0, .0, 0); + double score = score_function->GetScore(edge); + if (math::ge(score, score_threshold)) { + #pragma omp critical + { + TRACE("Success"); + ScaffoldGraph::ScaffoldEdge new_edge(first, *it, 0, score, 0); + graph_->AddEdgeSimple(new_edge); + } + } + } + } + if (i % block_size == 0 and i != 0) { +#pragma omp critical + { + ++approx_block_counter; + INFO("Processed " << approx_block_counter << " out of " << NUM_BLOCKS << " blocks"); + } + } + } +} + +std::shared_ptr ScaffoldSubgraphConstructor::Construct() { + for (const ScaffoldVertex& vertex: large_graph_.vertices()) { + if (vertex_condition_(vertex)) { + graph_->AddVertex(vertex); + } + } + INFO(graph_->VertexCount() << " vertices"); + + //todo add distance calculation + ScaffoldDijkstraHelper helper; + for (const ScaffoldVertex& vertex: graph_->vertices()) { + auto scaffold_dijkstra = helper.CreatePredicateBasedScaffoldDijkstra(large_graph_, vertex, vertex_condition_); + scaffold_dijkstra.Run(vertex); + for (auto reached: scaffold_dijkstra.ReachedVertices()) { + size_t distance = scaffold_dijkstra.GetDistance(reached); + if (distance < distance_threshold_ and vertex_condition_(reached) and vertex != reached) { + graph_->AddEdge(vertex, reached, (size_t) - 1, 0, distance); + } + } + } + return graph_; +} +ScaffoldSubgraphConstructor::ScaffoldSubgraphConstructor(const Graph &assembly_graph, + const func::TypedPredicate &vertex_condition, + const ScaffoldGraph &large_graph, + const size_t distance_threshold) + : BaseScaffoldGraphConstructor(assembly_graph), + vertex_condition_(vertex_condition), + large_graph_(large_graph), + distance_threshold_(distance_threshold) {} +ScoreFunctionScaffoldGraphConstructor::ScoreFunctionScaffoldGraphConstructor( + const Graph &assembly_graph, + const std::set &scaffold_vertices, + const std::shared_ptr &score_function, + double score_threshold, + size_t num_threads) + : BaseScaffoldGraphConstructor(assembly_graph), + scaffold_vertices_(scaffold_vertices), + score_function_(score_function), + score_threshold_(score_threshold), + num_threads_(num_threads) {} + +std::shared_ptr ScoreFunctionScaffoldGraphConstructor::Construct() { + for (const auto& vertex: scaffold_vertices_) { + graph_->AddVertex(vertex); + } + //fixme switch to tbb or use chunk splitter + std::vector scaffold_vertex_vec; + for (const auto& vertex: scaffold_vertices_) { + scaffold_vertex_vec.push_back(vertex); + } + size_t counter = 0; + size_t edges_size = scaffold_vertices_.size() * scaffold_vertices_.size(); + const size_t block_size = edges_size / 10; +#pragma omp parallel for num_threads(num_threads_) + for (size_t i = 0; i < scaffold_vertex_vec.size(); ++i) { + for (size_t j = 0; j < scaffold_vertex_vec.size(); ++j) { + const ScaffoldVertex& from = scaffold_vertex_vec[i]; + const ScaffoldVertex& to = scaffold_vertex_vec[j]; + ScaffoldGraph::ScaffoldEdge edge(from, to); + double score = score_function_->GetScore(edge); +#pragma omp critical + { + TRACE("Checking edge " << edge.getStart().int_id() << " -> " << edge.getEnd().int_id()); + TRACE("Score: " << score); + TRACE("Score threshold: " << score_threshold_); + bool are_conjugate = from == to.GetConjugateFromGraph(graph_->AssemblyGraph()); + if (math::ge(score, score_threshold_) and from != to and not are_conjugate) { + TRACE("Success"); + graph_->AddEdge(edge.getStart(), edge.getEnd(), edge.getColor(), score, edge.getLength()); + } + TRACE("Edge added"); + ++counter; + if (block_size != 0 and counter % block_size == 0) { + DEBUG("Processed " << counter << " edges out of " << edges_size); + } + } + } + } + return graph_; +} +std::shared_ptr InternalScoreScaffoldGraphFilter::Construct() { + for (const auto& vertex: old_graph_.vertices()) { + graph_->AddVertex(vertex); + } + for (const ScaffoldVertex &vertex: old_graph_.vertices()) { + auto outgoing = old_graph_.OutgoingEdges(vertex); + auto incoming = old_graph_.IncomingEdges(vertex); + ProcessEdges(incoming); + ProcessEdges(outgoing); + } + return graph_; +} +boost::optional InternalScoreScaffoldGraphFilter::GetWinnerVertex( + std::vector &edges) const { + boost::optional result; + if (edges.size() < 2) { + return result; + } + std::sort(edges.begin(), edges.end(), [this](const ScaffoldEdge &first, const ScaffoldEdge &second) { + return math::gr(score_function_->GetScore(first), score_function_->GetScore(second)); + }); + const double top_score = score_function_->GetScore(edges[0]); + const double second_score = score_function_->GetScore(edges[1]); + if (math::gr(top_score, relative_threshold_ * second_score)) { + return edges[0]; + } + return result; +} +void InternalScoreScaffoldGraphFilter::ProcessEdges(std::vector &edges) { + boost::optional incoming_winner = GetWinnerVertex(edges); + if (incoming_winner.is_initialized()) { + graph_->AddEdge(incoming_winner.get()); + } else { + for (const auto &edge: edges) { + graph_->AddEdge(edge); + } + } +} +InternalScoreScaffoldGraphFilter::InternalScoreScaffoldGraphFilter( + const Graph &assembly_graph, + const ScaffoldGraph &old_graph, + std::shared_ptr score_function, + double relative_threshold) + : BaseScaffoldGraphConstructor(assembly_graph), + old_graph_(old_graph), + score_function_(score_function), + relative_threshold_(relative_threshold) {} } //scaffold_graph } //path_extend diff --git a/src/common/modules/path_extend/scaffolder2015/scaffold_graph_constructor.hpp b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_constructor.hpp index 833f66d819..232fd04b61 100644 --- a/src/common/modules/path_extend/scaffolder2015/scaffold_graph_constructor.hpp +++ b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_constructor.hpp @@ -11,24 +11,33 @@ #pragma once -#include "scaffold_graph.hpp" +#include "auxiliary_graphs/scaffold_graph/scaffold_graph.hpp" +#include "connection_condition2015.hpp" +#include "modules/path_extend/read_cloud_path_extend/scaffold_graph_construction/read_cloud_connection_conditions.hpp" + +#include +#include namespace path_extend { -namespace scaffold_graph { +namespace scaffolder { typedef std::vector> ConnectionConditions; //Iterface class ScaffoldGraphConstructor { - public: + typedef scaffold_graph::ScaffoldGraph ScaffoldGraph; + virtual std::shared_ptr Construct() = 0; }; //Basic scaffold graph constructor functions class BaseScaffoldGraphConstructor: public ScaffoldGraphConstructor { protected: + using ScaffoldGraphConstructor::ScaffoldGraph; + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + std::shared_ptr graph_; BaseScaffoldGraphConstructor(const debruijn_graph::Graph& assembly_graph) { @@ -48,11 +57,15 @@ class BaseScaffoldGraphConstructor: public ScaffoldGraphConstructor { void ConstructFromEdgeConditions(func::TypedPredicate edge_condition, ConnectionConditions &connection_conditions, bool use_terminal_vertices_only = false); + + DECL_LOGGER("BaseScaffoldGraphConstructor"); }; class SimpleScaffoldGraphConstructor: public BaseScaffoldGraphConstructor { protected: + using BaseScaffoldGraphConstructor::ScaffoldGraph; + const EdgeSet &edge_set_; ConnectionConditions &connection_conditions_; @@ -68,6 +81,8 @@ class SimpleScaffoldGraphConstructor: public BaseScaffoldGraphConstructor { class DefaultScaffoldGraphConstructor: public SimpleScaffoldGraphConstructor { protected: + using SimpleScaffoldGraphConstructor::ScaffoldGraph; + func::TypedPredicate edge_condition_; public: @@ -82,6 +97,150 @@ class DefaultScaffoldGraphConstructor: public SimpleScaffoldGraphConstructor { std::shared_ptr Construct() override; }; +class ScaffoldSubgraphConstructor: public BaseScaffoldGraphConstructor { + using BaseScaffoldGraphConstructor::ScaffoldGraph; + + func::TypedPredicate vertex_condition_; + const ScaffoldGraph& large_graph_; + const size_t distance_threshold_; + + public: + ScaffoldSubgraphConstructor(const Graph &assembly_graph, + const func::TypedPredicate &vertex_condition, + const ScaffoldGraph &large_graph, + const size_t distance_threshold); + + std::shared_ptr Construct() override; +}; + +class PredicateScaffoldGraphFilter: public BaseScaffoldGraphConstructor { + public: + typedef read_cloud::ScaffoldEdgePredicate EdgePairPredicate; + using BaseScaffoldGraphConstructor::ScaffoldGraph; + using BaseScaffoldGraphConstructor::ScaffoldVertex; + protected: + const ScaffoldGraph& old_graph_; + const std::shared_ptr predicate_; + const size_t max_threads_; + + public: + PredicateScaffoldGraphFilter(const Graph& assembly_graph, + const ScaffoldGraph& old_graph, + std::shared_ptr predicate, + size_t max_threads); + + std::shared_ptr Construct() override; + protected: + void ConstructFromGraphAndPredicate(const ScaffoldGraph& old_graph, std::shared_ptr predicate); + + DECL_LOGGER("PredicateScaffoldGraphFilter"); + +}; + +struct ScaffoldVertexPairChunk { + public: + using ScaffoldVertex = scaffold_graph::ScaffoldVertex; + //todo other containers? + using scaffold_vertex_iterator = std::unordered_set::const_iterator; + + ScaffoldVertexPairChunk(const ScaffoldVertex &vertex, + scaffold_vertex_iterator begin, + scaffold_vertex_iterator end) : vertex_(vertex), begin_(begin), end_(end) {} + + ScaffoldVertex vertex_; + scaffold_vertex_iterator begin_; + scaffold_vertex_iterator end_; +}; + +class ScoreFunctionGraphConstructor: public BaseScaffoldGraphConstructor { + public: + typedef read_cloud::ScaffoldEdgeScoreFunction EdgePairScoreFunction; + using BaseScaffoldGraphConstructor::ScaffoldGraph; + using BaseScaffoldGraphConstructor::ScaffoldVertex; + + ScoreFunctionGraphConstructor(const Graph &assembly_graph, + std::vector chunks, + std::shared_ptr score_function, + double score_threshold, size_t num_threads); + + std::shared_ptr Construct() override; + private: + void ConstructFromScore(std::shared_ptr score_function, + double score_threshold); + + std::vector chunks_; + const std::shared_ptr score_function_; + const double score_threshold_; + const size_t num_threads_; + DECL_LOGGER("ScoreFunctionScaffoldGraphConstructor") +}; + +class ScoreFunctionScaffoldGraphFilter: public BaseScaffoldGraphConstructor { + typedef read_cloud::ScaffoldEdgeScoreFunction EdgePairScoreFunction; + using BaseScaffoldGraphConstructor::ScaffoldGraph; + using BaseScaffoldGraphConstructor::ScaffoldVertex; + protected: + const ScaffoldGraph &old_graph_; + const std::shared_ptr score_function_; + const double score_threshold_; + const size_t num_threads_; + public: + ScoreFunctionScaffoldGraphFilter(const Graph& assembly_graph, + const ScaffoldGraph& old_graph, + std::shared_ptr score_function, + double score_threshold, size_t num_threads); + + std::shared_ptr Construct() override; + protected: + void ConstructFromGraphAndScore(const ScaffoldGraph& graph, std::shared_ptr score_function, + double score_threshold, size_t threads); + DECL_LOGGER("ScoreFunctionScaffoldGraphConstructor") +}; + +class InternalScoreScaffoldGraphFilter: public BaseScaffoldGraphConstructor { + typedef read_cloud::ScaffoldEdgeScoreFunction EdgePairScoreFunction; + using BaseScaffoldGraphConstructor::ScaffoldGraph; + using BaseScaffoldGraphConstructor::ScaffoldVertex; + typedef ScaffoldGraph::ScaffoldEdge ScaffoldEdge; + protected: + const ScaffoldGraph &old_graph_; + std::shared_ptr score_function_; + const double relative_threshold_; + public: + InternalScoreScaffoldGraphFilter(const Graph &assembly_graph, + const ScaffoldGraph &old_graph, + std::shared_ptr score_function, + double relative_threshold); + + std::shared_ptr Construct() override; + private: + void ProcessEdges(std::vector &edges); + + boost::optional GetWinnerVertex(std::vector &edges) const; +}; + +class ScoreFunctionScaffoldGraphConstructor: public BaseScaffoldGraphConstructor { + typedef read_cloud::ScaffoldEdgeScoreFunction EdgePairScoreFunction; + using BaseScaffoldGraphConstructor::ScaffoldGraph; + using BaseScaffoldGraphConstructor::ScaffoldVertex; + + protected: + const std::set scaffold_vertices_; + const std::shared_ptr score_function_; + const double score_threshold_; + const size_t num_threads_; + + public: + ScoreFunctionScaffoldGraphConstructor(const Graph &assembly_graph, + const std::set &scaffold_vertices, + const std::shared_ptr &score_function, + double score_threshold, + size_t num_threads); + + std::shared_ptr Construct() override; + + DECL_LOGGER("ScoreFunctionScaffoldGraphConstructor"); +}; } //scaffold_graph } //path_extend diff --git a/src/common/modules/path_extend/scaffolder2015/scaffold_graph_dijkstra.hpp b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_dijkstra.hpp new file mode 100644 index 0000000000..5948e84f28 --- /dev/null +++ b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_dijkstra.hpp @@ -0,0 +1,246 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "assembly_graph/dijkstra/dijkstra_helper.hpp" +#include "auxiliary_graphs/scaffold_graph/scaffold_graph.hpp" +#include "modules/path_extend/scaffolder2015/scaffold_vertex_predicates.hpp" + +namespace omnigraph { +template<> +class ForwardNeighbourIterator { + typedef scaffold_graph::ScaffoldGraph ScaffoldGraph; + typedef typename ScaffoldGraph::VertexId VertexId; + typedef typename ScaffoldGraph::EdgeId EdgeId; + typedef typename std::vector::const_iterator edge_const_iterator; + std::vector out_edges_; + edge_const_iterator current_; + public: + ForwardNeighbourIterator(const ScaffoldGraph &graph, VertexId vertex) : + out_edges_(graph.OutgoingEdges(vertex)), current_(out_edges_.begin()) {} + + bool HasNext() { + return current_ != out_edges_.end(); + } + + vertex_neighbour Next() { + TRACE("Before increment"); + TRACE(current_->getStart().int_id() << ", " << current_->getEnd().int_id()); + vertex_neighbour res(current_->getEnd(), *current_); + current_++; + return res; + } + + DECL_LOGGER("ScaffoldForwardNeighbourItetator"); +}; + +template<> +class BackwardNeighbourIterator { + typedef scaffold_graph::ScaffoldGraph ScaffoldGraph; + typedef typename ScaffoldGraph::VertexId VertexId; + typedef typename ScaffoldGraph::EdgeId EdgeId; + typedef typename std::vector::const_iterator edge_const_iterator; + + std::vector in_edges_; + edge_const_iterator current_; + public: + BackwardNeighbourIterator(const ScaffoldGraph &graph, VertexId vertex) : + in_edges_(graph.IncomingEdges(vertex)), current_(in_edges_.begin()) {} + + bool HasNext() { + return current_ != in_edges_.end(); + } + + vertex_neighbour Next() { + vertex_neighbour res(current_->getStart(), *current_); + current_++; + return res; + } +}; +} + +namespace path_extend { + +namespace scaffolder { + +template +class SimpleScaffoldGraphLengthCalculator { + protected: + typedef typename Graph::EdgeId EdgeId; + typedef typename Graph::VertexId VertexId; + public: + distance_t GetLength(EdgeId) const { + return 1; + } +}; + +template +class DistanceBasedScaffoldGraphLengthCalculator { + protected: + typedef typename Graph::EdgeId EdgeId; + typedef typename Graph::VertexId VertexId; + + const Graph &graph_; + public: + explicit DistanceBasedScaffoldGraphLengthCalculator(const Graph &graph) : graph_(graph) {} + distance_t GetLength(EdgeId edge) const { + return graph_.length(edge) + graph_.length(edge.getEnd()); + } +}; + +template +class ScaffoldBarcodedPathPutChecker { + typedef typename Graph::VertexId VertexId; + typedef typename Graph::EdgeId EdgeId; + + const Graph &g_; + const VertexId first_; + const VertexId second_; + std::shared_ptr predicate_; + + public: + ScaffoldBarcodedPathPutChecker(const Graph &g, const VertexId &first, const VertexId &second, + std::shared_ptr predicate) : + g_(g), + first_(first), + second_(second), + predicate_(predicate) { + TRACE("Construction"); + TRACE("First id: " << first_.int_id()); + TRACE("Second id: " << second_.int_id()); + } + + bool Check(VertexId vertex, EdgeId /*unused*/, distance_t distance) const { + TRACE("Checking vertex " << g_.str(vertex)); + TRACE("Id: " << vertex.int_id()); + TRACE("First id: " << first_.int_id()); + TRACE("Second id: " << second_.int_id()); + bool target_reached = distance > 0 and (vertex == first_ or vertex == second_); + if (target_reached) { + TRACE("Target reached"); + return false; + } + TRACE("Checking"); + return predicate_->Check(vertex); + } + DECL_LOGGER("ScaffoldBarcodePutChecker"); +}; + +template +class StartPredicateProcessChecker { + typedef typename Graph::VertexId VertexId; + typedef typename Graph::EdgeId EdgeId; + + const Graph &g_; + const VertexId start_; + const func::TypedPredicate &predicate_; + public: + StartPredicateProcessChecker(const Graph &g, + const VertexId &start, + const func::TypedPredicate &predicate) + : g_(g), start_(start), predicate_(predicate) {} + + bool Check(VertexId vertex, distance_t /*distance*/) const { + return vertex == start_ or not predicate_(vertex); + } +}; + +template +class TrivialScaffoldPutChecker { + typedef typename Graph::VertexId VertexId; + typedef typename Graph::EdgeId EdgeId; + + public: + TrivialScaffoldPutChecker() {} + + bool Check(VertexId /*unused*/, EdgeId /*unused*/, distance_t /*unused*/) const { + return true; + } + DECL_LOGGER("TrivialScaffoldPutChecker"); +}; + +typedef omnigraph::ComposedDijkstraSettings, + StartPredicateProcessChecker, + TrivialScaffoldPutChecker, + omnigraph::ForwardNeighbourIteratorFactory > + PredicateBasedScaffoldDijkstraSettings; + +typedef omnigraph::Dijkstra + PredicateBasedScaffoldDijkstra; + +//forward scaffold dijkstra + +typedef omnigraph::ComposedDijkstraSettings, + omnigraph::BoundedVertexTargetedProcessChecker, + ScaffoldBarcodedPathPutChecker, + omnigraph::ForwardNeighbourIteratorFactory > + ForwardBoundedScaffoldDijkstraSettings; + +typedef omnigraph::Dijkstra + ForwardBoundedScaffoldDijkstra; + +//backward scaffold dijkstra + +typedef omnigraph::ComposedDijkstraSettings, + omnigraph::BoundedVertexTargetedProcessChecker, + ScaffoldBarcodedPathPutChecker, + omnigraph::BackwardNeighbourIteratorFactory > + BackwardBoundedScaffoldDijkstraSettings; + +typedef omnigraph::Dijkstra + BackwardBoundedScaffoldDijkstra; + +class ScaffoldDijkstraHelper { + public: + static BackwardBoundedScaffoldDijkstra CreateBackwardBoundedScaffoldDijkstra( + const scaffold_graph::ScaffoldGraph &graph, + const scaffold_graph::ScaffoldVertex first, + const scaffold_graph::ScaffoldVertex second, + size_t length_bound, + std::shared_ptr predicate, + size_t max_vertex_number = -1ul) { + return BackwardBoundedScaffoldDijkstra(graph, BackwardBoundedScaffoldDijkstraSettings( + SimpleScaffoldGraphLengthCalculator(), + omnigraph::BoundedVertexTargetedProcessChecker(first, length_bound), + ScaffoldBarcodedPathPutChecker(graph, first, second, predicate), + omnigraph::BackwardNeighbourIteratorFactory(graph)), + max_vertex_number); + } + + static ForwardBoundedScaffoldDijkstra CreateForwardBoundedScaffoldDijkstra( + const scaffold_graph::ScaffoldGraph &graph, + const scaffold_graph::ScaffoldVertex &first, + const scaffold_graph::ScaffoldVertex &second, + size_t length_bound, + std::shared_ptr predicate, + size_t max_vertex_number = -1ul) { + return ForwardBoundedScaffoldDijkstra(graph, ForwardBoundedScaffoldDijkstraSettings( + SimpleScaffoldGraphLengthCalculator(), + omnigraph::BoundedVertexTargetedProcessChecker(second, length_bound), + ScaffoldBarcodedPathPutChecker(graph, first, second, predicate), + omnigraph::ForwardNeighbourIteratorFactory(graph)), + max_vertex_number); + } + + static PredicateBasedScaffoldDijkstra CreatePredicateBasedScaffoldDijkstra( + const scaffold_graph::ScaffoldGraph &graph, + const scaffold_graph::ScaffoldVertex &vertex, + const func::TypedPredicate &predicate, + size_t max_vertex_number = -1ul) { + return PredicateBasedScaffoldDijkstra(graph, PredicateBasedScaffoldDijkstraSettings( + DistanceBasedScaffoldGraphLengthCalculator(graph), + StartPredicateProcessChecker(graph, vertex, predicate), + TrivialScaffoldPutChecker(), + omnigraph::ForwardNeighbourIteratorFactory(graph)), + max_vertex_number); + } +}; +} +} \ No newline at end of file diff --git a/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.cpp b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.cpp index f86a23d29b..1f20eb2369 100644 --- a/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.cpp +++ b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.cpp @@ -13,7 +13,7 @@ namespace path_extend { -namespace scaffold_graph { +namespace scaffolder { const std::map ScaffoldEdgeColorer::color_map = {{(size_t) -1, "black"}, @@ -36,12 +36,12 @@ std::string ScaffoldGraphLabeler::label(VertexId v) const { auto it = additional_vertex_labels_.find(v); std::string additional_label = it == additional_vertex_labels_.end() ? "" : it->second + "\n"; return "ID: " + std::to_string(graph_.int_id(v)) + - "\\n Len: " + std::to_string(graph_.AssemblyGraph().length(v)) + - "\\n Cov: " + std::to_string(graph_.AssemblyGraph().coverage(v)) + "\n" + + "\\n Len: " + std::to_string(graph_.length(v)) + + "\\n Cov: " + std::to_string(graph_.coverage(v)) + "\n" + additional_label; } -void ScaffoldGraphVisualizer::Visualize(graph_printer::GraphPrinter &printer) { +void ScaffoldGraphVisualizer::Visualize(graph_printer::GraphPrinter &printer) { printer.open(); printer.AddVertices(graph_.vbegin(), graph_.vend()); for (const auto& e : graph_.edges()) { @@ -51,15 +51,15 @@ void ScaffoldGraphVisualizer::Visualize(graph_printer::GraphPrinter &colorer) { + graph_colorer::CompositeGraphColorer &colorer) { ScaffoldGraphLabeler labeler(graph_, additional_vertex_labels_); - vertex_linker::EmptyGraphLinker linker; + vertex_linker::EmptyGraphLinker linker; - graph_printer::SingleGraphPrinter printer(graph_, os, labeler, colorer, linker); + graph_printer::SingleGraphPrinter printer(graph_, os, labeler, colorer, linker); Visualize(printer); } -std::string ScaffoldEdgeColorer::GetValue(ScaffoldGraph::EdgeId e) const { +std::string ScaffoldEdgeColorer::GetValue(scaffold_graph::ScaffoldGraph::EdgeId e) const { auto it = color_map.find(e.getColor()); if (it != color_map.end()) { return it->second; @@ -67,7 +67,7 @@ std::string ScaffoldEdgeColorer::GetValue(ScaffoldGraph::EdgeId e) const { return default_color; } -std::string ScaffoldVertexSetColorer::GetValue(ScaffoldGraph::VertexId v) const { +std::string ScaffoldVertexSetColorer::GetValue(scaffold_graph::ScaffoldGraph::VertexId v) const { if (vertex_set_.count(v) > 0) return "white"; return "yellow"; diff --git a/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.hpp b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.hpp index 8005c65db0..b1bec1248f 100644 --- a/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.hpp +++ b/src/common/modules/path_extend/scaffolder2015/scaffold_graph_visualizer.hpp @@ -12,27 +12,27 @@ #ifndef PROJECT_SCAFFOLD_GRAPH_VISUALIZER_HPP #define PROJECT_SCAFFOLD_GRAPH_VISUALIZER_HPP -#include "scaffold_graph.hpp" +#include "auxiliary_graphs/scaffold_graph/scaffold_graph.hpp" #include "visualization/graph_colorer.hpp" -#include "visualization/graph_labeler.hpp" #include "visualization/graph_printer.hpp" namespace path_extend { -namespace scaffold_graph { +namespace scaffolder { using namespace visualization; -class ScaffoldGraphLabeler : public graph_labeler::GraphLabeler { + class ScaffoldGraphLabeler : public graph_labeler::GraphLabeler { private: - const ScaffoldGraph &graph_; + const scaffold_graph::ScaffoldGraph &graph_; const std::map &additional_vertex_labels_; public: - ScaffoldGraphLabeler(const ScaffoldGraph &graph, const std::map &additional_vertex_labels): + ScaffoldGraphLabeler(const scaffold_graph::ScaffoldGraph &graph, + const std::map &additional_vertex_labels): graph_(graph), additional_vertex_labels_(additional_vertex_labels) { } @@ -42,44 +42,44 @@ class ScaffoldGraphLabeler : public graph_labeler::GraphLabeler { }; -class ScaffoldEdgeColorer : public graph_colorer::ElementColorer { +class ScaffoldEdgeColorer : public graph_colorer::ElementColorer { private: static const std::map color_map; static const std::string default_color; public: - std::string GetValue(ScaffoldGraph::EdgeId e) const; + std::string GetValue(scaffold_graph::ScaffoldGraph::EdgeId e) const; }; -class ScaffoldVertexSetColorer : public graph_colorer::ElementColorer { +class ScaffoldVertexSetColorer : public graph_colorer::ElementColorer { private: - std::set vertex_set_; + std::set vertex_set_; public: - ScaffoldVertexSetColorer(const std::set &vertex_set): vertex_set_(vertex_set) { + ScaffoldVertexSetColorer(const std::set &vertex_set): vertex_set_(vertex_set) { } - std::string GetValue(ScaffoldGraph::VertexId v) const; + std::string GetValue(scaffold_graph::ScaffoldGraph::VertexId v) const; }; class ScaffoldGraphVisualizer { private: - const ScaffoldGraph &graph_; + const scaffold_graph::ScaffoldGraph &graph_; - const std::map &additional_vertex_labels_; + const std::map &additional_vertex_labels_; private: - void Visualize(graph_printer::GraphPrinter &printer); + void Visualize(graph_printer::GraphPrinter &printer); public: - ScaffoldGraphVisualizer(const ScaffoldGraph &graph, - const std::map &additional_vertex_labels) : + ScaffoldGraphVisualizer(const scaffold_graph::ScaffoldGraph &graph, + const std::map &additional_vertex_labels) : graph_(graph), additional_vertex_labels_(additional_vertex_labels){ } - void Visualize(std::ostream &os, graph_colorer::CompositeGraphColorer &colorer); + void Visualize(std::ostream &os, graph_colorer::CompositeGraphColorer &colorer); }; } //scaffold_graph diff --git a/src/common/modules/path_extend/scaffolder2015/scaffold_vertex_predicates.cpp b/src/common/modules/path_extend/scaffolder2015/scaffold_vertex_predicates.cpp new file mode 100644 index 0000000000..94feaf12db --- /dev/null +++ b/src/common/modules/path_extend/scaffolder2015/scaffold_vertex_predicates.cpp @@ -0,0 +1,26 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "scaffold_vertex_predicates.hpp" + +namespace path_extend { +namespace scaffolder { + +LengthChecker::LengthChecker(size_t length_threshold, const Graph &g) + : length_threshold_(length_threshold), g_(g) {} +bool LengthChecker::Check(const ScaffoldVertex &vertex) const { + return vertex.GetLengthFromGraph(g_) < length_threshold_; +} + +AndPredicate::AndPredicate(std::shared_ptr first, + std::shared_ptr second) : + first_(first), second_(second) {} +bool AndPredicate::Check(const ScaffoldVertex &scaffold_vertex) const { + return first_->Check(scaffold_vertex) and second_->Check(scaffold_vertex); +} + +} +} \ No newline at end of file diff --git a/src/common/modules/path_extend/scaffolder2015/scaffold_vertex_predicates.hpp b/src/common/modules/path_extend/scaffolder2015/scaffold_vertex_predicates.hpp new file mode 100644 index 0000000000..dd9134068a --- /dev/null +++ b/src/common/modules/path_extend/scaffolder2015/scaffold_vertex_predicates.hpp @@ -0,0 +1,51 @@ +//*************************************************************************** +//* Copyright (c) 2019 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "auxiliary_graphs/scaffold_graph/scaffold_vertex.hpp" +#include "assembly_graph/core/graph.hpp" + +#include + +namespace path_extend { +namespace scaffolder { + +class ScaffoldVertexPredicate + : public func::AbstractPredicate { + public: + virtual ~ScaffoldVertexPredicate() = default; +}; + +class LengthChecker : public ScaffoldVertexPredicate { + public: + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + typedef debruijn_graph::Graph Graph; + + LengthChecker(size_t length_threshold, const Graph &g); + + bool Check(const ScaffoldVertex &vertex) const override; + + private: + const size_t length_threshold_; + const Graph &g_; +}; + +class AndPredicate : public ScaffoldVertexPredicate { + public: + typedef scaffold_graph::ScaffoldVertex ScaffoldVertex; + + AndPredicate(std::shared_ptr first, std::shared_ptr second); + + bool Check(const ScaffoldVertex &scaffold_vertex) const override; + + private: + std::shared_ptr first_; + std::shared_ptr second_; +}; + +} +} \ No newline at end of file diff --git a/src/common/sequence/sequence.hpp b/src/common/sequence/sequence.hpp index b4b3af3788..00f5a567b9 100755 --- a/src/common/sequence/sequence.hpp +++ b/src/common/sequence/sequence.hpp @@ -76,7 +76,7 @@ class Sequence { size_t bytes_size = DataSize(size_); ST *bytes = data_->data(); - VERIFY(is_dignucl(s[0]) || is_nucl(s[0])); +// VERIFY(is_dignucl(s[0]) || is_nucl(s[0])); // Which symbols does our string contain : 0123 or ACGT? bool digit_str = is_dignucl(s[0]); diff --git a/src/projects/corrector/dataset_processor.cpp b/src/projects/corrector/dataset_processor.cpp index 11fb5bd341..1fe7d2baab 100644 --- a/src/projects/corrector/dataset_processor.cpp +++ b/src/projects/corrector/dataset_processor.cpp @@ -207,7 +207,8 @@ void DatasetProcessor::ProcessDataset() { for (size_t i = 0; i < corr_cfg::get().dataset.lib_count(); ++i) { const auto& dataset = corr_cfg::get().dataset[i]; auto lib_type = dataset.type(); - if (lib_type == io::LibraryType::PairedEnd || lib_type == io::LibraryType::HQMatePairs || lib_type == io::LibraryType::SingleReads) { + if (lib_type == io::LibraryType ::Clouds10x || lib_type == io::LibraryType::PairedEnd || + lib_type == io::LibraryType::HQMatePairs || lib_type == io::LibraryType::SingleReads) { for (auto iter = dataset.paired_begin(); iter != dataset.paired_end(); iter++) { handle_one_lib({iter->first, iter->second}, "paired", lib_type); } diff --git a/src/projects/hammer/kmer_data.hpp b/src/projects/hammer/kmer_data.hpp index d95da23dd9..b9dc74b786 100644 --- a/src/projects/hammer/kmer_data.hpp +++ b/src/projects/hammer/kmer_data.hpp @@ -10,10 +10,11 @@ #define __HAMMER_KMER_DATA_HPP__ #include "kmer_stat.hpp" -#include "adt/array_vector.hpp" #include "kmer_index/kmer_mph/kmer_index.hpp" +#include "kmer_index/kmer_mph/kmer_index_traits.hpp" #include "utils/logger/logger.hpp" +#include "adt/array_vector.hpp" #include diff --git a/src/projects/ionhammer/kmer_data.hpp b/src/projects/ionhammer/kmer_data.hpp index 30ca4e2af4..2cc3fe1c36 100644 --- a/src/projects/ionhammer/kmer_data.hpp +++ b/src/projects/ionhammer/kmer_data.hpp @@ -12,6 +12,7 @@ #include "config_struct.hpp" #include "kmer_index/kmer_mph/kmer_index.hpp" +#include "kmer_index/kmer_mph/kmer_index_traits.hpp" #include "utils/logger/logger.hpp" #include "hkmer.hpp" diff --git a/src/projects/mts/contig_abundance.cpp b/src/projects/mts/contig_abundance.cpp index d006aa5c28..97d60232d7 100644 --- a/src/projects/mts/contig_abundance.cpp +++ b/src/projects/mts/contig_abundance.cpp @@ -6,8 +6,8 @@ //*************************************************************************** #include "contig_abundance.hpp" -#include "kmer_index/kmer_mph/kmer_splitters.hpp" #include "math/xmath.h" +#include "utils/stl_utils.hpp" namespace debruijn_graph { diff --git a/src/projects/splitter/CMakeLists.txt b/src/projects/splitter/CMakeLists.txt new file mode 100644 index 0000000000..7288ae468b --- /dev/null +++ b/src/projects/splitter/CMakeLists.txt @@ -0,0 +1,19 @@ +############################################################################ +# Copyright (c) 2021-2023 Saint Petersburg State University +# All Rights Reserved +# See file LICENSE for details. +############################################################################ + +project(splitter CXX) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +add_executable(splitter + main.cpp barcode_index_construction.cpp graph_resolver.cpp graph_resolver_io.cpp + path_extractor.cpp scaffold_graph_helper.cpp) + +target_link_libraries(splitter spades-stages graphio toolchain common_modules ${COMMON_LIBRARIES} auxiliary_graphs) + +install(TARGETS splitter + DESTINATION bin + COMPONENT splitter) diff --git a/src/projects/splitter/barcode_index_construction.cpp b/src/projects/splitter/barcode_index_construction.cpp new file mode 100644 index 0000000000..84cb208bc2 --- /dev/null +++ b/src/projects/splitter/barcode_index_construction.cpp @@ -0,0 +1,82 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "alignment/bwa_sequence_mapper.hpp" +#include "alignment/kmer_sequence_mapper.hpp" + +#include "barcode_index_construction.hpp" +#include "barcode_index/barcode_index_builder.hpp" + +#include "utils/verify.hpp" + +namespace cont_index { + +using namespace barcode_index; + +void ConstructBarcodeIndex(barcode_index::FrameBarcodeIndex &barcode_index, + paired_info::SequencingLib &lib, + const debruijn_graph::Graph &graph, + const std::filesystem::path &workdir, + unsigned nthreads, + size_t frame_size, + unsigned mapping_k, + bool bin_load, + bool bin_save) { + if (!bin_load) { + const std::vector barcode_prefices = {"BC:Z:", "BX:Z:"}; +// alignment::BWAReadMapper mapper(graph); + const unsigned min_occ = 2; + alignment::ShortKMerReadMapper mapper(graph, workdir, mapping_k, min_occ); + FrameConcurrentBarcodeIndexBuffer buffer(graph, frame_size); + FrameBarcodeIndexBuilder barcode_index_builder(graph, mapper, barcode_prefices, frame_size, nthreads); + bool is_tellseq = lib.type() == io::LibraryType::TellSeqReads; + if (not is_tellseq) { + barcode_index_builder.ConstructBarcodeIndex(io::paired_easy_readers(lib, false, 0), barcode_index, lib, is_tellseq); + } + if (is_tellseq) { + INFO("Constructing from tellseq lib"); + barcode_index_builder.ConstructBarcodeIndex(io::tellseq_easy_readers(lib, false, 0), barcode_index, lib, is_tellseq); + } + INFO("Barcode index construction finished."); + + if (bin_save) { + INFO("Saving barcode index"); + io::binary::Save((workdir / "barcode_index").string(), barcode_index); + } + } else { + INFO("Loading barcode index"); + io::binary::Load((workdir / "barcode_index").string(), barcode_index); + } + INFO("Barcode index size: " << barcode_index.size()); + using BarcodeExtractor = barcode_index::FrameBarcodeIndexInfoExtractor; + auto barcode_extractor_ptr = std::make_shared(barcode_index, graph); + size_t total_reads = 0; + for (const auto &edge: graph.edges()) { + auto begin = barcode_extractor_ptr->barcode_iterator_begin(edge); + auto end = barcode_extractor_ptr->barcode_iterator_end(edge); + for (auto it = begin; it != end; ++it) { + total_reads += it->second.GetCount(); + } + } + INFO(total_reads << " total reads in the barcode index"); +} + +void DownsampleBarcodeIndex(const debruijn_graph::Graph &graph, + unsigned nthreads, + barcode_index::FrameBarcodeIndex &barcode_index, + barcode_index::FrameBarcodeIndex &downsampled_index, + double sampling_factor) { + VERIFY_DEV(math::ls(sampling_factor, 1.0)); + const size_t mapping_k = 31; + const std::vector barcode_prefices = {"BC:Z:", "BX:Z:"}; + debruijn_graph::Graph empty_graph(mapping_k); + alignment::BWAReadMapper mapper(empty_graph); + FrameBarcodeIndexBuilder barcode_index_builder(graph, mapper, barcode_prefices, barcode_index.GetFrameSize(), nthreads); + barcode_index_builder.DownsampleBarcodeIndex(downsampled_index, barcode_index, sampling_factor); +} + +} + diff --git a/src/projects/splitter/barcode_index_construction.hpp b/src/projects/splitter/barcode_index_construction.hpp new file mode 100644 index 0000000000..da660b4093 --- /dev/null +++ b/src/projects/splitter/barcode_index_construction.hpp @@ -0,0 +1,39 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "assembly_graph/core/graph.hpp" +#include "barcode_index/barcode_info_extractor.hpp" +#include "paired_info/paired_info_utils.hpp" + +#include "io/binary/read_cloud.hpp" +#include "io/dataset_support/read_converter.hpp" + +#include +#include + +namespace cont_index { + +typedef io::DataSet DataSet; +typedef io::SequencingLibrary SequencingLib; + +void ConstructBarcodeIndex(barcode_index::FrameBarcodeIndex &barcode_index, + paired_info::SequencingLib &lib, + const debruijn_graph::Graph &graph, + const std::filesystem::path &workdir, + unsigned nthreads, + size_t frame_size, + unsigned mapping_k, + bool bin_load, + bool bin_save); + +void DownsampleBarcodeIndex(const debruijn_graph::Graph &graph, + unsigned nthreads, + barcode_index::FrameBarcodeIndex &barcode_index, + barcode_index::FrameBarcodeIndex &downsampled_index, + double sampling_factor); +} diff --git a/src/projects/splitter/graph_resolver.cpp b/src/projects/splitter/graph_resolver.cpp new file mode 100644 index 0000000000..4e5e6c7bcb --- /dev/null +++ b/src/projects/splitter/graph_resolver.cpp @@ -0,0 +1,127 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "graph_resolver.hpp" + +#include "assembly_graph/core/construction_helper.hpp" +#include "assembly_graph/core/debruijn_data.hpp" + +namespace cont_index { + +GraphResolver::GraphResolverInfo::VertexMap GraphResolver::SplitVertices(debruijn_graph::Graph &graph, + const VertexResults &vertex_results) const { + GraphResolver::GraphResolverInfo::VertexMap transformed_vertex_to_original; + auto helper = graph.GetConstructionHelper(); + for (const auto &vertex_entry: vertex_results.vertex_to_result) { + const VertexId &vertex = vertex_entry.first; + DEBUG("Conjugate: " << graph.conjugate(vertex).int_id()); + const auto &vertex_result = vertex_entry.second; + + if (vertex_result.state == VertexState::Completely or vertex_result.state == VertexState::Partially) { + auto in_to_correct_link = GetLinkMap(graph, vertex, vertex_result); + VERIFY_DEV(in_to_correct_link.size() == vertex_entry.second.supported_pairs.size()); + std::unordered_set resolved_in_edges; + std::unordered_set resolved_out_edges; + for (const auto &entry: vertex_result.supported_pairs) { + EdgeId in_edge = entry.first; + EdgeId out_edge = entry.second; + LinkId link = in_to_correct_link.at(in_edge); + DEBUG("In edge: " << in_edge.int_id() << ", out edge: " << out_edge.int_id() << ", vertex: " << vertex.int_id()); + helper.DeleteLink(vertex, out_edge); + helper.DeleteLink(graph.conjugate(vertex), graph.conjugate(in_edge)); + std::vector links {link}; + VertexId new_vertex = helper.CreateVertex(debruijn_graph::DeBruijnVertexData(links)); + transformed_vertex_to_original[new_vertex] = vertex; + helper.LinkIncomingEdge(new_vertex, in_edge); + helper.LinkOutgoingEdge(new_vertex, out_edge); + resolved_in_edges.insert(in_edge); + resolved_out_edges.insert(out_edge); + } + if (vertex_result.state == VertexState::Completely) { + graph.DeleteVertex(vertex); + } else { + std::vector links = graph.move_links(vertex); + std::vector new_links; + for (const auto &link_id: links) { + auto link = graph.link(link_id); + if (resolved_in_edges.find(link.link.first) == resolved_in_edges.end() and resolved_out_edges.find(link.link.second) == resolved_out_edges.end()) { + new_links.push_back(link_id); + } + } + VertexId new_vertex = helper.CreateVertex(debruijn_graph::DeBruijnVertexData(new_links)); + for (const auto &in_edge: graph.IncomingEdges(vertex)) { + helper.DeleteLink(graph.conjugate(vertex), graph.conjugate(in_edge)); + helper.LinkIncomingEdge(new_vertex, in_edge); + } + for (const auto &out_edge: graph.OutgoingEdges(vertex)) { + helper.DeleteLink(vertex, out_edge); + helper.LinkOutgoingEdge(new_vertex, out_edge); + } + graph.DeleteVertex(vertex); + } + } + } + return transformed_vertex_to_original; +} +GraphResolver::GraphResolverInfo::EdgeMap GraphResolver::MergePaths(debruijn_graph::Graph &graph, + const path_extend::PathContainer &paths) const { + GraphResolver::GraphResolverInfo::EdgeMap original_edge_to_transformed; + for (const auto &path: paths) { + if (path.first->Size() == 1) { + original_edge_to_transformed[path.first->Back()] = path.first->Back(); + continue; + } + std::vector simple_path; + std::vector overlaps; + const auto &first_path = *(path.first); + for (size_t i = 0; i < first_path.Size(); ++i) { + if (i > 0 and graph.is_complex(graph.EdgeStart(first_path[i]))) { + size_t overlap = graph.link_length(graph.EdgeStart(first_path[i]), first_path[i - 1], first_path[i]); + overlaps.push_back(static_cast(overlap)); + } + simple_path.push_back(first_path[i]); + } + + EdgeId resulting_edge = graph.MergePath(simple_path, true, overlaps); + for (const auto &edge: simple_path) { + original_edge_to_transformed[edge] = resulting_edge; + } + } + return original_edge_to_transformed; +} +GraphResolver::GraphResolverInfo GraphResolver::TransformGraph(debruijn_graph::Graph &graph, + const path_extend::PathContainer &paths, + const VertexResults &vertex_results) const { + INFO("Transforming assembly graph"); + auto vertex_map = SplitVertices(graph, vertex_results); + auto edge_map = MergePaths(graph, paths); + GraphResolverInfo result(vertex_map, edge_map); + return result; +} +GraphResolver::LinkMap GraphResolver::GetLinkMap(const debruijn_graph::Graph &graph, + const GraphResolver::VertexId &vertex, + const VertexResult &vertex_result) const { + std::unordered_map in_to_out; + std::unordered_map in_to_correct_link; + for (const auto &entry: vertex_result.supported_pairs) { + EdgeId in_edge = entry.first; + EdgeId out_edge = entry.second; + VERIFY_DEV(in_to_out.find(in_edge) == in_to_out.end()); + in_to_out[in_edge] = out_edge; + } + for (const LinkId &link_id: graph.links(vertex)) { + const auto &link = graph.link(link_id); + auto in_result = in_to_out.find(link.link.first); + if (in_result == in_to_out.end()) { + continue; + } + if (in_result->second == link.link.second) { + in_to_correct_link.insert({link.link.first, link_id}); + } + } + return in_to_correct_link; +} +} diff --git a/src/projects/splitter/graph_resolver.hpp b/src/projects/splitter/graph_resolver.hpp new file mode 100644 index 0000000000..4e6184672b --- /dev/null +++ b/src/projects/splitter/graph_resolver.hpp @@ -0,0 +1,45 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "vertex_resolver.hpp" + +#include "assembly_graph/core/observable_graph.hpp" +#include "assembly_graph/paths/bidirectional_path_container.hpp" + +#pragma once + +namespace cont_index { +class GraphResolver { + public: + using EdgeId = debruijn_graph::EdgeId; + using VertexId = debruijn_graph::VertexId; + using LinkId = debruijn_graph::LinkId; + using LinkMap = std::unordered_map; + + struct GraphResolverInfo { + public: + using VertexMap = std::unordered_map; + using EdgeMap = std::unordered_map; + + GraphResolverInfo(const VertexMap &transformed_vertex_to_original, const EdgeMap &original_edge_to_transformed) + : transformed_vertex_to_original(transformed_vertex_to_original), + original_edge_to_transformed(original_edge_to_transformed) {} + VertexMap transformed_vertex_to_original; + EdgeMap original_edge_to_transformed; + }; + + GraphResolverInfo TransformGraph(debruijn_graph::Graph &graph, + const path_extend::PathContainer &paths, + const VertexResults &vertex_results) const; + private: + GraphResolverInfo::VertexMap SplitVertices(debruijn_graph::Graph &graph, + const VertexResults &vertex_results) const; + GraphResolverInfo::EdgeMap MergePaths(debruijn_graph::Graph &graph, const path_extend::PathContainer &paths) const; + LinkMap GetLinkMap(const debruijn_graph::Graph &graph, + const VertexId &vertex, + const VertexResult &vertex_result) const; +}; +} diff --git a/src/projects/splitter/graph_resolver_io.cpp b/src/projects/splitter/graph_resolver_io.cpp new file mode 100644 index 0000000000..917792e966 --- /dev/null +++ b/src/projects/splitter/graph_resolver_io.cpp @@ -0,0 +1,42 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "graph_resolver_io.hpp" + +#include "assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.hpp" +#include "io/graph/gfa_writer.hpp" + +namespace cont_index { + +void TransformedGraphIO::PrintGraph(const debruijn_graph::Graph &graph, + const GraphResolver::GraphResolverInfo &resolver_info, + const std::filesystem::path &output_base) const { + auto resolved_graph_out = std::ofstream(output_base / ("resolved_graph.gfa")); + path_extend::GFAPathWriter resolved_graph_writer(graph, resolved_graph_out); + resolved_graph_writer.WriteSegmentsAndLinks(); + + auto new_name_generator = std::make_shared(); + path_extend::ContigWriter resolved_writer(graph, new_name_generator); + path_extend::PathContainer resolved_edges; + for (const auto &edge: graph.canonical_edges()) { + resolved_edges.Create(graph, edge); + } + std::vector edge_writers; + edge_writers.push_back([&](const path_extend::ScaffoldStorage &scaffold_storage) { + auto fn = output_base / ("resolved_edges.fasta"); + INFO("Outputting edges to " << fn); + path_extend::ContigWriter::WriteScaffolds(scaffold_storage, fn); + }); + resolved_writer.OutputPaths(resolved_edges, edge_writers); + + auto edge_out = output_base / "edge_transform.tsv"; + auto edge_out_stream = std::ofstream(edge_out); + edge_out_stream << "Original edge id\tResolved graph edge id\n"; + for (const auto &entry: resolver_info.original_edge_to_transformed) { + edge_out_stream << (*id_mapper_)[entry.first.int_id()] << "\t" << entry.second.int_id() << std::endl; + } +} +} diff --git a/src/projects/splitter/graph_resolver_io.hpp b/src/projects/splitter/graph_resolver_io.hpp new file mode 100644 index 0000000000..98800cee70 --- /dev/null +++ b/src/projects/splitter/graph_resolver_io.hpp @@ -0,0 +1,22 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "graph_resolver.hpp" + +namespace cont_index { +class TransformedGraphIO { + public: + explicit TransformedGraphIO(io::IdMapper *id_mapper) : id_mapper_(id_mapper) {} + void PrintGraph(const debruijn_graph::Graph &graph, + const GraphResolver::GraphResolverInfo &resolver_info, + const std::filesystem::path &output_base) const; + + private: + io::IdMapper *id_mapper_; +}; +} diff --git a/src/projects/splitter/main.cpp b/src/projects/splitter/main.cpp new file mode 100644 index 0000000000..ed49380107 --- /dev/null +++ b/src/projects/splitter/main.cpp @@ -0,0 +1,402 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "barcode_index_construction.hpp" +#include "graph_resolver.hpp" +#include "graph_resolver_io.hpp" +#include "path_extractor.hpp" +#include "scaffold_graph_helper.hpp" +#include "vertex_resolver.hpp" + +#include "auxiliary_graphs/contracted_graph/contracted_graph_builder.hpp" +#include "io/binary/read_cloud.hpp" +#include "io/graph/gfa_writer.hpp" +#include "toolchain/utils.hpp" +#include "utils/filesystem/path_helper.hpp" +#include "utils/parallel/openmp_wrapper.h" +#include "utils/segfault_handler.hpp" +#include "utils/verify.hpp" + +#include + +using namespace debruijn_graph; +using namespace cont_index; +using namespace path_extend::read_cloud; + +enum class GraphType { + Blunted, + Multiplexed +}; + +enum class ResolutionMode { + Diploid, + Meta +}; + +struct gcfg { + unsigned k = 55; + unsigned mapping_k = 31; + std::filesystem::path graph; + std::filesystem::path output_dir; + std::filesystem::path refpath; + std::filesystem::path assembly_info; + unsigned nthreads = (omp_get_max_threads() / 2 + 1); + std::filesystem::path file = ""; + std::filesystem::path tmpdir = "saves"; + unsigned libindex = 0; + GraphType graph_type = GraphType::Multiplexed; + ResolutionMode mode = ResolutionMode::Diploid; + bool bin_load = false; + bool debug = false; + + //barcode_index_construction + size_t frame_size = 40000; + size_t read_linkage_distance = 40000; + double sampling_factor = 1.0; + + //graph construction + double graph_score_threshold = 2.0; + size_t tail_threshold = 200000; + size_t count_threshold = 1; + + //vertex resolution + double rel_threshold = 2.0; + bool scaffold_links = false; + + //meta mode + size_t length_threshold = 2000; +}; + +static void process_cmdline(int argc, char** argv, gcfg& cfg) { + using namespace clipp; + + std::string graph; + std::string output_dir; + std::string refpath; + std::string file; + std::string tmpdir; + std::string assembly_info; + + auto cli = ( + graph << value("graph (in binary or GFA)"), + file << value("SLR library description (in YAML)"), + output_dir << value("path to output directory"), + (option("--dataset") & value("yaml", file)) % "dataset description (in YAML)", + (option("-l") & integer("value", cfg.libindex)) % "library index (0-based, default: 0)", + (option("--assembly-info") & value("assembly-info", assembly_info)) + % "Path to metaflye assembly_info.txt file (meta mode, metaFlye graphs only)", + (option("-t") & integer("value", cfg.nthreads)) % "# of threads to use", + (option("--mapping-k") & integer("value", cfg.mapping_k)) % "k for read mapping", + (option("--tmp-dir") & value("tmp", tmpdir)) % "scratch directory to use", + (option("--ref") & value("reference", refpath)) % "Reference path for repeat resolution evaluation (developer option)", + (option("--bin-load").set(cfg.bin_load)) % "load binary-converted reads from tmpdir (developer option)", + (option("--debug").set(cfg.debug)) % "produce lots of debug data (developer option)", + (option("--sampling-factor") & value("sampling-factor", cfg.sampling_factor)) % "Sampling factor for read downsampling", + (with_prefix("-G", + option("mdbg").set(cfg.graph_type, GraphType::Multiplexed) | + option("blunt").set(cfg.graph_type, GraphType::Blunted)) % "assembly graph type (mDBG or blunted)"), + (with_prefix("-M", + option("diploid").set(cfg.mode, ResolutionMode::Diploid) | + option("meta").set(cfg.mode, ResolutionMode::Meta)) % "repeat resolution mode (diploid or meta)"), + (option("--frame-size") & value("frame-size", cfg.frame_size)) % "Resolution of barcode index", + (option("--linkage-distance") & value("read-linkage-distance", cfg.read_linkage_distance)) % + "Reads are assigned to the same fragment based on linkage distance", + (option("--score") & value("score", cfg.graph_score_threshold)) % "Score threshold for link index", + (option("--rel-threshold") & value("rel-threshold", cfg.rel_threshold)) % "Relative score threshold for vertex resolution", + (option("--tail-threshold") & value("tail-threshold", cfg.tail_threshold)) % + "Barcodes are assigned to the first and last nucleotides of the edge", + (option("--count-threshold") & value("count-threshold", cfg.count_threshold)) + % "Minimum number of reads for barcode index", + (option("--scaffold-links").set(cfg.scaffold_links)) % "Use scaffold links in addition to graph links for repeat resolution", + (option("--length-threshold") & value("length-threshold", cfg.length_threshold)) + % "Minimum scaffold graph edge length (meta mode option)" + ); + + auto result = parse(argc, argv, cli); + if (!result) { + std::cout << make_man_page(cli, argv[0]); + exit(1); + } + cfg.graph = graph; + cfg.output_dir = output_dir; + cfg.refpath = refpath; + cfg.file = file; + cfg.tmpdir = tmpdir; + cfg.assembly_info = assembly_info; +} + +struct TimeTracerRAII { + TimeTracerRAII(llvm::StringRef program_name, + unsigned granularity = 500, + const std::string &prefix = "", const std::string &suffix = "") { + time_trace_file_ = prefix + "time_trace_" + suffix + ".json"; + llvm::timeTraceProfilerInitialize(granularity, program_name); + } + ~TimeTracerRAII() { + if (auto E = llvm::timeTraceProfilerWrite(time_trace_file_, "cont-index")) { + handleAllErrors(std::move(E), + [&](const llvm::StringError &SE) { + ERROR("" << SE.getMessage() << "\n"); + }); + return; + } else { + INFO("Time trace is written to: " << time_trace_file_); + } + llvm::timeTraceProfilerCleanup(); + } + + std::string time_trace_file_; +}; + +gfa::GFAReader ReadGraph(const gcfg &cfg, + debruijn_graph::Graph &graph, + io::IdMapper *id_mapper) { + gfa::GFAReader gfa(cfg.graph); + gfa.to_graph(graph, id_mapper); + INFO("GFA segments: " << gfa.num_edges() << ", links: " << gfa.num_links() << ", paths: " + << gfa.num_paths()); + return gfa; +} + +std::unordered_set ParseRepetitiveEdges(const debruijn_graph::Graph &graph, + const std::string &path_to_info, + io::IdMapper *id_mapper) { + std::unordered_set result; + size_t total_repetitive_length = 0; + std::ifstream info_stream(path_to_info); + std::string ctg_id; + std::string is_repeat; + std::string blank; + std::string graph_path; + for (size_t i = 0; i < 8; ++i) { + info_stream >> blank; + } + while (!info_stream.eof()) { + info_stream >> ctg_id; + info_stream >> blank; + info_stream >> blank; + info_stream >> blank; + info_stream >> is_repeat; + info_stream >> blank; + info_stream >> blank; + info_stream >> graph_path; + if (is_repeat == "Y") { + auto current_pos = graph_path.find(','); + while (current_pos != std::string::npos) { + std::string edge_num = graph_path.substr(0, current_pos); + if (edge_num != "*") { + EdgeId edge = (*id_mapper)["edge_" + edge_num]; + result.insert(edge); + result.insert(graph.conjugate(edge)); + total_repetitive_length += graph.length(edge); + } + graph_path.erase(0, current_pos + 1); + current_pos = graph_path.find(','); + } + } + } + INFO(result.size() << " repetitive edges, total length: " << total_repetitive_length); + return result; +} + +size_t GetLengthThreshold(const gcfg &cfg) { + switch (cfg.mode) { + default: + FATAL_ERROR("Unknown repeat resolution mode"); + case ResolutionMode::Diploid: { + return 0; + } + case ResolutionMode::Meta: { + return cfg.length_threshold; + } + } +} + +cont_index::VertexResolver::LinkMap GetTrustedContigLinks(const std::unordered_set &repetitive_edges, + debruijn_graph::Graph &graph, + const gfa::GFAReader &gfa) { + cont_index::VertexResolver::LinkMap trusted_link_map; + std::unordered_set non_unique_starts; + size_t total_path_edges = 0; + std::vector> non_repetitive_paths; + for (const auto &path: gfa.paths()) { + std::vector non_repetitive_path; + for (const auto &edge: path.edges) { + if (repetitive_edges.find(edge) == repetitive_edges.end()) { + non_repetitive_path.push_back(edge); + ++total_path_edges; + } + } + non_repetitive_paths.push_back(non_repetitive_path); + } + for (const auto &path: non_repetitive_paths) { + if (path.size() < 2) { + continue; + } + for (auto it1 = path.begin(), it2 = std::next(it1); it2 != path.end(); ++it1, ++it2) { + EdgeId current = *it1; + EdgeId next = *it2; + trusted_link_map[current].insert(next); + trusted_link_map[graph.conjugate(next)].insert(graph.conjugate(current)); + } + } + size_t total_links = 0; + for (const auto &entry: trusted_link_map) { + total_links += entry.second.size(); + } + return trusted_link_map; +} + +cont_index::VertexResults GetRepeatResolutionResults(const gcfg &cfg, + debruijn_graph::Graph &graph, + const gfa::GFAReader &gfa, + std::shared_ptr barcode_extractor_ptr, + io::IdMapper *id_mapper) { + size_t length_threshold = GetLengthThreshold(cfg); + std::filesystem::path vertex_output_path = cfg.output_dir / "vertex_stats.tsv"; + + switch (cfg.mode) { + default: + FATAL_ERROR("Unknown repeat resolution mode"); + case ResolutionMode::Diploid: { + cont_index::VertexResolver::LinkMap empty_map; + cont_index::VertexResolver vertex_resolver + (graph, graph, empty_map, barcode_extractor_ptr, cfg.count_threshold, cfg.tail_threshold, + length_threshold, cfg.nthreads, cfg.graph_score_threshold, cfg.rel_threshold); + auto results = vertex_resolver.ResolveVertices(); + vertex_resolver.PrintVertexResults(results, vertex_output_path, id_mapper); + return results; + } + case ResolutionMode::Meta: { + auto repetitive_edges = ParseRepetitiveEdges(graph, cfg.assembly_info, id_mapper); + auto repeat_predicate = [&repetitive_edges](const debruijn_graph::EdgeId &edge) { + return repetitive_edges.find(edge) == repetitive_edges.end(); + }; + contracted_graph::DBGContractedGraphFactory factory(graph, repeat_predicate); + factory.Construct(); + INFO("Constructed graph"); + auto contracted_graph = factory.GetGraph(); + INFO("Total contracted graph vertices: " << contracted_graph->size()); + auto trusted_link_map = GetTrustedContigLinks(repetitive_edges, graph, gfa); + + cont_index::VertexResolver vertex_resolver + (*contracted_graph, contracted_graph->GetAssemblyGraph(), trusted_link_map, barcode_extractor_ptr, + cfg.count_threshold, cfg.tail_threshold, length_threshold, cfg.nthreads, + cfg.graph_score_threshold, cfg.rel_threshold); + auto results = vertex_resolver.ResolveVertices(); + vertex_resolver.PrintVertexResults(results, vertex_output_path, id_mapper); + return results; + } + } +} + +void ResolveComplexVertices(const gcfg &cfg, + debruijn_graph::Graph &graph, + std::shared_ptr barcode_extractor_ptr, + io::IdMapper *id_mapper, + const gfa::GFAReader &gfa, + path_extend::GFAPathWriter gfa_writer) { + auto vertex_results = GetRepeatResolutionResults(cfg, graph, gfa, barcode_extractor_ptr, id_mapper); + + cont_index::PathExtractor path_extractor(graph); + path_extend::PathContainer paths; + path_extractor.ExtractPaths(paths, vertex_results); + INFO("Extracted paths") + + auto name_generator = std::make_shared(); + path_extend::ContigWriter writer(graph, name_generator); + std::vector path_writers; + INFO("Creating writers") + path_writers.push_back([&](const path_extend::ScaffoldStorage &scaffold_storage) { + auto fn = cfg.output_dir / ("contigs.fasta"); + INFO("Outputting contigs to " << fn); + path_extend::ContigWriter::WriteScaffolds(scaffold_storage, fn); + }); + path_writers.push_back([&](const path_extend::ScaffoldStorage &storage) { + INFO("Populating GFA with scaffold paths"); + gfa_writer.WritePaths(storage); + }); + writer.OutputPaths(paths, path_writers); + + if (cfg.mode == ResolutionMode::Diploid) { + cont_index::GraphResolver graph_resolver; + auto graph_resolver_info = graph_resolver.TransformGraph(graph, paths, vertex_results); + TransformedGraphIO graph_output(id_mapper); + graph_output.PrintGraph(graph, graph_resolver_info, cfg.output_dir); + } +} + +int main(int argc, char** argv) { + utils::segfault_handler sh; + gcfg cfg; + + srand(42); + srandom(42); + + process_cmdline(argc, argv, cfg); + + toolchain::create_console_logger(); + START_BANNER("SpLitteR"); + + cfg.nthreads = spades_set_omp_threads(cfg.nthreads); + INFO("Maximum # of threads to use (adjusted due to OMP capabilities): " << cfg.nthreads); + + std::filesystem::create_directory(cfg.output_dir); + std::filesystem::create_directory(cfg.tmpdir); + + INFO("Loading graph"); + std::unique_ptr> id_mapper(new io::IdMapper()); + + debruijn_graph::Graph graph(cfg.k); + auto gfa = ReadGraph(cfg, graph, id_mapper.get()); + INFO("Graph loaded. Total vertices: " << graph.size() << ", total edges: " << graph.e_size()); + + std::ofstream graph_out(cfg.output_dir / "assembly_graph.gfa"); + path_extend::GFAPathWriter gfa_writer(graph, graph_out, + io::MapNamingF(*id_mapper)); + gfa_writer.WriteSegmentsAndLinks(); + + INFO("Building barcode index"); + if (cfg.libindex != -1u) { + INFO("Processing paired-end reads"); + DataSet dataset; + if (cfg.file != "") { + dataset.load(cfg.file); + if (cfg.libindex == -1u) + cfg.libindex = 0; + CHECK_FATAL_ERROR(cfg.libindex < dataset.lib_count(), "invalid library index"); + } + + debruijn_graph::config::init_libs(dataset, cfg.nthreads, cfg.tmpdir); + barcode_index::FrameBarcodeIndex barcode_index(graph, cfg.frame_size); + using BarcodeExtractor = barcode_index::FrameBarcodeIndexInfoExtractor; + auto barcode_extractor_ptr = std::make_shared(barcode_index, graph); + + std::unique_ptr traceraii; + traceraii.reset(new TimeTracerRAII(argv[0], 500)); + INFO("Time tracing is enabled"); + + TIME_TRACE_SCOPE("Containment index"); + + auto &lib = dataset[cfg.libindex]; + if (lib.type() == io::LibraryType::Clouds10x or lib.type() == io::LibraryType::TellSeqReads) { + cont_index::ConstructBarcodeIndex(barcode_index, lib, graph, cfg.tmpdir, cfg.nthreads, cfg.frame_size, + cfg.mapping_k, cfg.bin_load, cfg.debug); + } else { + ERROR("Only read cloud libraries with barcode tags are supported for links"); + } + + barcode_index::FrameBarcodeIndex downsampled_index(graph, cfg.frame_size); + if (not math::eq(cfg.sampling_factor, 1.0)) { + INFO("Downsampling the barcode index with factor " << cfg.sampling_factor); + cont_index::DownsampleBarcodeIndex(graph, cfg.nthreads, barcode_index, downsampled_index, + cfg.sampling_factor); + barcode_extractor_ptr = std::make_shared(downsampled_index, graph); + } + + ResolveComplexVertices(cfg, graph, barcode_extractor_ptr, id_mapper.get(), gfa, gfa_writer); + } +} diff --git a/src/projects/splitter/path_extractor.cpp b/src/projects/splitter/path_extractor.cpp new file mode 100644 index 0000000000..eeb232a51f --- /dev/null +++ b/src/projects/splitter/path_extractor.cpp @@ -0,0 +1,165 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "path_extractor.hpp" + +namespace cont_index { + +void PathExtractor::ExtractPaths(path_extend::PathContainer &paths, + const VertexResults &vertex_results) const { + //fixme replace with graph distance + int DEFAULT_GAP = 500; + + auto scaffold_links = GetScaffoldLinks(vertex_results); + const auto &in_degrees = scaffold_links.in_degrees; + const auto &out_degrees = scaffold_links.out_degrees; + const auto &in_to_out = scaffold_links.in_to_out; + const auto &vertex_link_storage = scaffold_links.vertex_link_storage; + for (const auto &entry: in_degrees) { + VERIFY_MSG(entry.second < 2, "In degree " << entry.second << ", " << entry.first); + } + for (const auto &entry: out_degrees) { + VERIFY_MSG(entry.second < 2, "Out degree " << entry.second << ", " << entry.first); + } + size_t visited_edges = 0; + size_t total_path_overlap = 0; + std::unordered_set visited; + std::unordered_map end_to_path_idx; + for (const auto &entry: out_degrees) { + if (in_degrees.find(entry.first) == in_degrees.end()) { + if (visited.find(entry.first) == visited.end()) { + debruijn_graph::EdgeId current_edge = entry.first; + auto &path = paths.Create(graph_, current_edge); + visited.insert(current_edge); + visited.insert(graph_.conjugate(current_edge)); + while (out_degrees.find(current_edge) != out_degrees.end()) { + const auto &next_edge = in_to_out.at(current_edge); + if (visited.find(next_edge) != visited.end()) { + TRACE("Edge is visited!"); + visited_edges++; + break; + } + if (IsGraphLink(current_edge, next_edge, vertex_link_storage)) { + total_path_overlap += graph_.data(graph_.EdgeStart(next_edge)).overlap(); + path.PushBack(next_edge); + } else { + path.PushBack(next_edge, path_extend::Gap(DEFAULT_GAP)); + } + visited.insert(next_edge); + visited.insert(graph_.conjugate(next_edge)); + current_edge = next_edge; + } + } + } + } + + for (const debruijn_graph::EdgeId &edge: graph_.canonical_edges()) { + if (visited.find(edge) == visited.end()) { + paths.Create(graph_, edge); + visited.insert(edge); + visited.insert(graph_.conjugate(edge)); + } + } + size_t total_path_size = 0; + size_t total_path_length = 0; + for (const auto &path: paths) { + total_path_size += path.first->Size(); + total_path_length += path.first->Length(); + } + INFO("Total path size: " << total_path_size); + INFO("Total path length: " << total_path_length); + INFO("Total path overlap: " << total_path_overlap); + INFO("Edges visited by several paths: " << visited_edges); +} +bool PathExtractor::IsConjugatePair(const PathExtractor::SimplePath &first, + const PathExtractor::SimplePath &second) const { + if (first.size() != second.size()) { + return false; + } + for (auto it1 = first.begin(), it2 = second.end(); it1 != first.end(); ++it1) { + --it2; + if (*it1 != graph_.conjugate(*it2)) { + return false; + } + } + return true; +} +bool PathExtractor::IsGraphLink(const debruijn_graph::EdgeId &first, + const debruijn_graph::EdgeId &second, + const PathExtractor::VertexLinkStorage &vertex_storage) const { + auto out_graph_links = vertex_storage.find(first); + if (out_graph_links == vertex_storage.end()) { + return false; + } + auto out_link_result = out_graph_links->second.find(second); + if (out_link_result == out_graph_links->second.end()) { + return false; + } + return true; +} +PathExtractor::ScaffoldLinks PathExtractor::GetScaffoldLinks(const VertexResults &vertex_results) const { + INFO("Extracting paths"); + std::unordered_map in_degrees; + std::unordered_map out_degrees; + std::unordered_map in_to_out; + std::unordered_map> vertex_link_storage; + size_t total_length = 0; + size_t total_edges = 0; + size_t total_resolved_overlap = 0; + size_t not_graph_supported_links = 0; + size_t graph_supported_links = 0; + + for (const debruijn_graph::EdgeId &edge: graph_.canonical_edges()) { + total_length += graph_.length(edge); + ++total_edges; + } + + for (const auto &vertex_entry: vertex_results.vertex_to_result) { + const auto &vertex_result = vertex_entry.second; + auto vertex = vertex_entry.first; + DEBUG("Updating link storage"); + if (graph_.is_complex(vertex)) { + for (const debruijn_graph::LinkId &link_id: graph_.links(vertex)) { + auto &link = graph_.link(link_id); + TRACE(link.link.first.int_id() << "," << link.link.second.int_id() << "," << link_id); + vertex_link_storage[link.link.first].insert(link.link.second); + } + } else { + for (const auto &in_edge: graph_.IncomingEdges(vertex)) { + for (const auto &out_edge: graph_.OutgoingEdges(vertex)) { + vertex_link_storage[in_edge].insert(out_edge); + } + } + } + DEBUG("Constructing path map"); + for (const auto &entry: vertex_result.supported_pairs) { + if (vertex_result.state == VertexState::Completely or vertex_result.state == VertexState::Partially) { + if (in_to_out.find(entry.first) == in_to_out.end()) { + TRACE(entry.first.int_id() << "," << entry.second.int_id()); + if (IsGraphLink(entry.first, entry.second, vertex_link_storage)) { + total_resolved_overlap += graph_.data(vertex_entry.first).overlap(); + ++graph_supported_links; + } else { + ++not_graph_supported_links; + } + in_to_out[entry.first] = entry.second; + in_degrees[entry.second]++; + out_degrees[entry.first]++; + } + } + } + } + + INFO("Total graph size: " << total_edges); + INFO("Total graph length: " << total_length); + INFO("Links not supported by graph: " << not_graph_supported_links); + INFO("Links supported by graph: " << graph_supported_links); + INFO("Total resolved overlap: " << total_resolved_overlap); + ScaffoldLinks result(in_degrees, out_degrees, in_to_out, vertex_link_storage); + return result; +} + +} diff --git a/src/projects/splitter/path_extractor.hpp b/src/projects/splitter/path_extractor.hpp new file mode 100644 index 0000000000..bedaef70e7 --- /dev/null +++ b/src/projects/splitter/path_extractor.hpp @@ -0,0 +1,56 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "assembly_graph/paths/bidirectional_path_container.hpp" +#include "assembly_graph/paths/bidirectional_path_io/bidirectional_path_output.hpp" +#include "barcode_index/barcode_info_extractor.hpp" +#include "io/graph/gfa_reader.hpp" + +#include "vertex_resolver.hpp" + +namespace cont_index { + +class PathExtractor { + typedef std::vector SimplePath; + typedef std::unordered_map> VertexLinkStorage; + public: + explicit PathExtractor(const debruijn_graph::Graph &graph) : graph_(graph) {} + + void ExtractPaths(path_extend::PathContainer &paths, const VertexResults &vertex_results) const; + private: + struct ScaffoldLinks { + typedef std::unordered_map DegreeMap; + DegreeMap in_degrees; + DegreeMap out_degrees; + std::unordered_map in_to_out; + std::unordered_map> vertex_link_storage; + + ScaffoldLinks(const DegreeMap &in_degrees, + const DegreeMap &out_degrees, + const std::unordered_map &in_to_out, + const std::unordered_map> &vertex_link_storage) + : in_degrees(in_degrees), + out_degrees(out_degrees), + in_to_out(in_to_out), + vertex_link_storage(vertex_link_storage) {} + }; + + bool IsConjugatePair(const SimplePath &first, const SimplePath &second) const; + bool IsGraphLink(const debruijn_graph::EdgeId &first, + const debruijn_graph::EdgeId &second, + const VertexLinkStorage &vertex_storage) const; + + ScaffoldLinks GetScaffoldLinks(const VertexResults &vertex_results) const; + + const debruijn_graph::Graph &graph_; + + DECL_LOGGER("PathExtractor"); +}; + +} \ No newline at end of file diff --git a/src/projects/splitter/scaffold_graph_helper.cpp b/src/projects/splitter/scaffold_graph_helper.cpp new file mode 100644 index 0000000000..e53018a0b3 --- /dev/null +++ b/src/projects/splitter/scaffold_graph_helper.cpp @@ -0,0 +1,253 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#include "barcode_index/scaffold_vertex_index_builder.hpp" +//#include "modules/path_extend/read_cloud_path_extend/scaffold_graph_construction/read_cloud_connection_conditions.hpp" +#include "modules/path_extend/scaffolder2015/scaffold_graph_constructor.hpp" +#include "scaffold_graph_helper.hpp" + +namespace cont_index { + +scaffold_graph::ScaffoldGraph LinkIndexGraphConstructor::ConstructGraph() const { + scaffold_graph::ScaffoldGraph result(g_); + std::unordered_set scaffold_vertices; + for (const debruijn_graph::EdgeId &edge: g_.canonical_edges()) { + scaffold_vertices.insert(edge); + scaffold_vertices.insert(g_.conjugate(edge)); + } + auto score_function = ConstructScoreFunction(); + barcode_index::SimpleScaffoldVertexIndexBuilderHelper helper; + auto tail_threshold_getter = std::make_shared(tail_threshold_); + auto scaffold_vertex_index = helper.ConstructScaffoldVertexIndex(g_, + *barcode_extractor_, + tail_threshold_getter, + count_threshold_, + length_threshold_, + max_threads_, + scaffold_vertices); + + auto scaffold_index_extractor = + std::make_shared(scaffold_vertex_index); + INFO("Setting score index threshold to " << graph_score_threshold_); + + for (const auto &vertex: scaffold_vertices) { + result.AddVertex(vertex); + } + +// ReverseBarcodeIndexConstructor reverse_index_constructor(g_, barcode_extractor_, length_threshold_, tail_threshold_, +// count_threshold_, max_threads_); +// auto reverse_index = reverse_index_constructor.ConstructReverseIndex(scaffold_vertices); +// +// size_t total_head_size = 0; +// size_t total_tail_size = 0; +// for (const auto &vertex: scaffold_vertices) { +// total_head_size += scaffold_index_extractor->GetHeadSize(vertex); +// total_tail_size += scaffold_index_extractor->GetTailSize(vertex); +// } +// INFO("Total head size: " << total_head_size); +// INFO("Total tail size: " << total_tail_size); +// size_t total_pairs = 0; +// for (const auto &entry: reverse_index) { +// total_pairs += entry.second.size() * entry.second.size(); +// } + + std::vector chunks; + for (const auto &first: scaffold_vertices) { + chunks.emplace_back(first, scaffold_vertices.begin(), scaffold_vertices.end()); + } +// for (const auto &entry: reverse_index) { +// for (const auto &first: entry.second) { +// chunks.emplace_back(first, entry.second.begin(), entry.second.end()); +// } +// } + INFO(chunks.size() << " chunks"); + auto score_filter = std::make_shared(g_, chunks, + score_function, + graph_score_threshold_, + max_threads_); + return *(score_filter->Construct()); +} +LinkIndexGraphConstructor::LinkIndexGraphConstructor(const debruijn_graph::Graph &g, + LinkIndexGraphConstructor::BarcodeExtractorPtr barcode_extractor, + const double graph_score_threshold, + const size_t tail_threshold, + const size_t length_threshold, + const size_t count_threshold, + size_t max_threads) : g_(g), + barcode_extractor_(barcode_extractor), + graph_score_threshold_( + graph_score_threshold), + tail_threshold_(tail_threshold), + length_threshold_(length_threshold), + count_threshold_(count_threshold), + max_threads_(max_threads) {} +LinkIndexGraphConstructor::BarcodeScoreFunctionPtr LinkIndexGraphConstructor::ConstructScoreFunction() const { + std::set scaffold_vertices; + for (const debruijn_graph::EdgeId &edge: g_.canonical_edges()) { + scaffold_vertices.insert(edge); + scaffold_vertices.insert(g_.conjugate(edge)); + } + + barcode_index::SimpleScaffoldVertexIndexBuilderHelper helper; + auto tail_threshold_getter = std::make_shared(tail_threshold_); + auto scaffold_vertex_index = helper.ConstructScaffoldVertexIndex(g_, + *barcode_extractor_, + tail_threshold_getter, + count_threshold_, + length_threshold_, + max_threads_, + scaffold_vertices); + + auto scaffold_index_extractor = + std::make_shared(scaffold_vertex_index); + auto score_function = + std::make_shared(g_, scaffold_index_extractor, + count_threshold_, tail_threshold_); + return score_function; +} +GFAGraphConstructor::GFAGraphConstructor(const debruijn_graph::Graph &g, + const gfa::GFAReader &gfa, + io::IdMapper *id_mapper) : + g_(g), gfa_(gfa), id_mapper_(id_mapper) {} +scaffold_graph::ScaffoldGraph GFAGraphConstructor::ConstructGraphFromDBG() const { + scaffold_graph::ScaffoldGraph scaffold_graph(g_); + for (const EdgeId &edge: g_.canonical_edges()) { + scaffold_graph.AddVertex(edge); + } + for (const auto &vertex: g_.vertices()) { + for (const auto &outgoing: g_.OutgoingEdges(vertex)) { + for (const auto &incoming: g_.IncomingEdges(vertex)) { + scaffold_graph.AddEdge(incoming, outgoing, 0, 1.0, 0); + } + } + } + return scaffold_graph; +} + +ReverseBarcodeIndex ReverseBarcodeIndexConstructor::ConstructReverseIndex(const std::set &scaffold_vertices) const { + barcode_index::SimpleScaffoldVertexIndexBuilderHelper helper; + auto tail_threshold_getter = std::make_shared(tail_threshold_); + auto scaffold_vertex_index = helper.ConstructScaffoldVertexIndex(g_, *barcode_extractor_, tail_threshold_getter, + count_threshold_, length_threshold_, + max_threads_, scaffold_vertices); + auto scaffold_index_extractor = + std::make_shared(scaffold_vertex_index); + ReverseBarcodeIndex result; + for (const auto &vertex: scaffold_vertices) { + for (const auto &barcode: scaffold_index_extractor->GetHeadEntry(vertex)) { + result[barcode].insert(vertex); + } + for (const auto &barcode: scaffold_index_extractor->GetTailEntry(vertex)) { + result[barcode].insert(vertex); + } + } + double mean_barcode_size = .0; + double barcode_size_m2 = .0; + for (const auto &entry: result) { + auto entry_size = static_cast(entry.second.size()); + mean_barcode_size += entry_size; + barcode_size_m2 += entry_size * entry_size; + } + mean_barcode_size /= static_cast(result.size()); + barcode_size_m2 /= static_cast(result.size()); + INFO("Number of barcodes: " << result.size()); + INFO("Mean edges in barcode: " << mean_barcode_size); + INFO("Raw second moment of barcode edges: " << barcode_size_m2); + return result; +} +ReverseBarcodeIndexConstructor::ReverseBarcodeIndexConstructor(const debruijn_graph::Graph &g, + BarcodeExtractorPtr barcode_extractor, + const size_t length_threshold, + const size_t tail_threshold, + const size_t count_threshold, + size_t max_threads) : + g_(g), + barcode_extractor_(barcode_extractor), + length_threshold_(length_threshold), + tail_threshold_(tail_threshold), + count_threshold_(count_threshold), + max_threads_(max_threads) {} +scaffold_graph::ScaffoldGraph ScaffoldGraphSerializer::ReadGraph(const string &path_to_graph) { + scaffold_graph::ScaffoldGraph result(g_); + std::unordered_map id_to_vertex; + for (const debruijn_graph::EdgeId &edge: g_.canonical_edges()) { + auto str_id = (*id_mapper_)[edge.int_id()]; + scaffold_graph::ScaffoldVertex vertex(edge); + scaffold_graph::ScaffoldVertex conj_vertex(g_.conjugate(edge)); + id_to_vertex.emplace(str_id, vertex); + id_to_vertex.emplace(str_id + "\'", conj_vertex); + result.AddVertex(vertex); + result.AddVertex(conj_vertex); + } + + size_t number_of_edges; + std::ifstream graph_reader(path_to_graph); + graph_reader >> number_of_edges; + size_t i = 0; + std::string first_id, second_id; + double weight; + //fixme optimize link deduplication in scaffold graph itself + std::set> unique_links; + while (i < number_of_edges) { + graph_reader >> first_id >> second_id >> weight; + auto first_vertex = id_to_vertex.at(first_id); + auto second_vertex = id_to_vertex.at(second_id); + auto emplace_result = unique_links.emplace(first_vertex, second_vertex); + if (emplace_result.second) { + scaffold_graph::ScaffoldGraph::ScaffoldEdge sc_edge(first_vertex, second_vertex, 0, weight, 0); + result.AddEdgeSimple(sc_edge); + } + ++i; + } + return result; +} +void ScaffoldGraphSerializer::WriteGraph(const scaffold_graph::ScaffoldGraph &scaffold_graph, const std::string &path_to_graph) const { + std::ofstream os(path_to_graph); + os << scaffold_graph.EdgeCount() << "\n"; +// os << "FirstId\tSecondId\tWeight\n"; + for (const scaffold_graph::ScaffoldGraph::ScaffoldEdge &edge: scaffold_graph.edges()) { + os << (*id_mapper_)[edge.getStart().int_id()] << "\t" << (*id_mapper_)[edge.getEnd().int_id()] << "\t" << edge.getWeight() << "\n"; + } +} +ScaffoldGraphSerializer::ScaffoldGraphSerializer(const debruijn_graph::Graph &g, io::IdMapper *id_mapper) : + g_(g), id_mapper_(id_mapper) {} +scaffold_graph::ScaffoldGraph GetTellSeqScaffoldGraph(const debruijn_graph::Graph &g, + BarcodeExtractorPtr barcode_extractor, + double score_threshold, + size_t length_threshold, + size_t tail_threshold, + size_t count_threshold, + size_t max_threads, + bool bin_load, + bool debug, + const std::filesystem::path &output_dir, + io::IdMapper *id_mapper) { + auto path_to_scaffold_graph = output_dir / "tellseq_links.scg"; + scaffold_graph::ScaffoldGraph scaffold_graph(g); + if (!bin_load or !std::filesystem::exists(path_to_scaffold_graph)) { + LinkIndexGraphConstructor link_index_constructor(g, + barcode_extractor, + score_threshold, + tail_threshold, + length_threshold, + count_threshold, + max_threads); + INFO("Constructing scaffold graph"); + scaffold_graph = link_index_constructor.ConstructGraph(); + } else { + INFO("Reading scaffold graph from " << path_to_scaffold_graph); + ScaffoldGraphSerializer graph_serializer(g, id_mapper); + scaffold_graph = graph_serializer.ReadGraph(path_to_scaffold_graph); + } + INFO(scaffold_graph.VertexCount() << " vertices and " << scaffold_graph.EdgeCount() + << " edges in scaffold graph"); + if (debug) { + ScaffoldGraphSerializer graph_serializer(g, id_mapper); + graph_serializer.WriteGraph(scaffold_graph, path_to_scaffold_graph); + } + return scaffold_graph; +} +} diff --git a/src/projects/splitter/scaffold_graph_helper.hpp b/src/projects/splitter/scaffold_graph_helper.hpp new file mode 100644 index 0000000000..dd6a121ae7 --- /dev/null +++ b/src/projects/splitter/scaffold_graph_helper.hpp @@ -0,0 +1,114 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "assembly_graph/core/graph.hpp" +#include "auxiliary_graphs/scaffold_graph/scaffold_graph.hpp" +#include "barcode_index/barcode_index_builder.hpp" +#include "barcode_index/barcode_info_extractor.hpp" +#include "io/graph/gfa_reader.hpp" +#include "library/library.hpp" +#include "library/library_data.hpp" +#include "modules/path_extend/read_cloud_path_extend/scaffold_graph_construction/read_cloud_connection_conditions.hpp" +#include "modules/path_extend/scaffolder2015/scaffold_graph_constructor.hpp" + +namespace cont_index { + +typedef std::unordered_map> ReverseBarcodeIndex; + +class LinkIndexGraphConstructor { + public: + using Graph = debruijn_graph::Graph; + using BarcodeExtractorPtr = std::shared_ptr; + using BarcodeScoreFunctionPtr = std::shared_ptr; + + LinkIndexGraphConstructor(const Graph &g, + BarcodeExtractorPtr barcode_extractor, + const double graph_score_threshold, + const size_t tail_threshold, + const size_t length_threshold, + const size_t count_threshold, + size_t max_threads); + + scaffold_graph::ScaffoldGraph ConstructGraph() const; + + BarcodeScoreFunctionPtr ConstructScoreFunction() const; + + private: + const debruijn_graph::Graph &g_; + BarcodeExtractorPtr barcode_extractor_; + const double graph_score_threshold_; + const size_t tail_threshold_; + const size_t length_threshold_; + const size_t count_threshold_; + size_t max_threads_; +}; + +class ReverseBarcodeIndexConstructor { + public: + using ScaffoldVertex = scaffold_graph::ScaffoldVertex; + using BarcodeExtractorPtr = std::shared_ptr; + ReverseBarcodeIndexConstructor(const debruijn_graph::Graph &g, + BarcodeExtractorPtr barcode_extractor, + const size_t length_threshold, + const size_t tail_threshold, + const size_t count_threshold, + size_t max_threads); + + ReverseBarcodeIndex ConstructReverseIndex(const std::set &scaffold_vertices) const; + private: + const debruijn_graph::Graph &g_; + BarcodeExtractorPtr barcode_extractor_; + const size_t length_threshold_; + const size_t tail_threshold_; + const size_t count_threshold_; + size_t max_threads_; +}; + +class ScaffoldGraphSerializer { + public: + ScaffoldGraphSerializer(const debruijn_graph::Graph &g, io::IdMapper *id_mapper); + + scaffold_graph::ScaffoldGraph ReadGraph(const std::string &path_to_graph); + void WriteGraph(const scaffold_graph::ScaffoldGraph &scaffold_graph, const std::string &path_to_graph) const; + + private: + const debruijn_graph::Graph &g_; + io::IdMapper *id_mapper_; +}; + +class GFAGraphConstructor { + public: + using Graph = debruijn_graph::Graph; + using EdgeId = debruijn_graph::EdgeId; + using BarcodeExtractorPtr = std::shared_ptr; + + GFAGraphConstructor(const Graph &g, + const gfa::GFAReader &gfa, + io::IdMapper *id_mapper); + + scaffold_graph::ScaffoldGraph ConstructGraphFromDBG() const; + + private: + const debruijn_graph::Graph &g_; + const gfa::GFAReader &gfa_; + io::IdMapper *id_mapper_; +}; + +using BarcodeExtractorPtr = std::shared_ptr; +scaffold_graph::ScaffoldGraph GetTellSeqScaffoldGraph(const debruijn_graph::Graph &g, + BarcodeExtractorPtr barcode_extractor, + double score_threshold, + size_t length_threshold, + size_t tail_threshold, + size_t count_threshold, + size_t max_threads, + bool bin_load, + bool debug, + const std::filesystem::path &output_dir, + io::IdMapper *id_mapper); +} diff --git a/src/projects/splitter/vertex_resolver.hpp b/src/projects/splitter/vertex_resolver.hpp new file mode 100644 index 0000000000..b24df3d38e --- /dev/null +++ b/src/projects/splitter/vertex_resolver.hpp @@ -0,0 +1,291 @@ +//*************************************************************************** +//* Copyright (c) 2021-2023 Saint Petersburg State University +//* All Rights Reserved +//* See file LICENSE for details. +//*************************************************************************** + +#pragma once + +#include "barcode_index/barcode_info_extractor.hpp" +#include "common/sequence/rtseq.hpp" +#include "io/graph/gfa_reader.hpp" + +#include "scaffold_graph_helper.hpp" + +namespace cont_index { + +enum class VertexState { + Completely, + Partially, + Ambiguous, + Uncovered +}; + +struct VertexResult { + VertexResult(VertexState state, + const size_t &total_links, + const size_t &supporting_links, + const std::unordered_map &supported_pairs) : + state(state), + total_links(total_links), + supporting_links(supporting_links), + supported_pairs(supported_pairs) {} + + VertexState state; + size_t total_links; + size_t supporting_links; + std::unordered_map supported_pairs; +}; + +struct VertexResults { + VertexResults(const std::unordered_map &vertex_to_result) : + vertex_to_result(vertex_to_result) {} + + std::unordered_map vertex_to_result; +}; + +template +class VertexResolver { + public: + typedef std::unordered_map ResolutionResults; + typedef typename Graph::EdgeId EdgeId; + typedef std::unordered_map> LinkMap; + + VertexResolver(Graph &graph, + const debruijn_graph::Graph &assembly_graph, + const LinkMap &links, + const std::shared_ptr &barcode_extractor_ptr, + size_t count_threshold, + size_t tail_threshold, + size_t length_threshold, + size_t threads, + double score_threshold, + double rel_threshold) : + graph_(graph), + assembly_graph_(assembly_graph), + links_(links), + barcode_extractor_ptr_(barcode_extractor_ptr), + count_threshold_(count_threshold), + tail_threshold_(tail_threshold), + length_threshold_(length_threshold), + threads_(threads), + score_threshold_(score_threshold), + rel_threshold_(rel_threshold) {} + + VertexResults ResolveVertices() { + std::unordered_set interesting_vertices; + size_t total_in_edges = 0; + size_t total_out_edges = 0; + for (const auto &vertex: graph_.vertices()) { + if (vertex.int_id() > graph_.conjugate(vertex).int_id()) { + continue; + } + //todo use predicate iterator + if (graph_.OutgoingEdgeCount(vertex) >= 2 and graph_.IncomingEdgeCount(vertex) >= 2) { + interesting_vertices.insert(vertex); + total_in_edges += graph_.IncomingEdgeCount(vertex); + total_out_edges += graph_.OutgoingEdgeCount(vertex); + } + } + INFO(interesting_vertices.size() << " complex vertices"); + INFO("Total indegree: " << total_in_edges << ", total outdegree: " << total_out_edges); + LinkIndexGraphConstructor link_index_constructor(assembly_graph_, barcode_extractor_ptr_, score_threshold_, + tail_threshold_, length_threshold_, count_threshold_, threads_); + auto score_function = link_index_constructor.ConstructScoreFunction(); + INFO("Constructed score function"); + + std::unordered_map vertex_to_result; + for (const auto &vertex: interesting_vertices) { + auto vertex_result = ResolveVertex(vertex, score_function); + vertex_to_result.insert({vertex, vertex_result}); + } + return vertex_to_result; + } + + VertexResult ResolveVertex(const debruijn_graph::VertexId &vertex, + LinkIndexGraphConstructor::BarcodeScoreFunctionPtr score_function) const { + size_t total_links = 0; + size_t answer_links = 0; + std::unordered_map in_to_out; + bool is_ambiguous = false; + std::unordered_set covered_vertices; + double LINK_BONUS = 1000000; + + for (const EdgeId &sc_in_edge: graph_.IncomingEdges(vertex)) { + //convert to dbg EdgeId + scaffold_graph::ScaffoldVertex sc_in_vertex(sc_in_edge); + scaffold_graph::EdgeGetter edge_getter; + debruijn_graph::EdgeId in_edge = edge_getter.GetEdgeFromScaffoldVertex(sc_in_vertex); + + std::pair max_pair(0, 0); + std::pair second_pair(0, 0); + size_t max_links = 0; + size_t second_links = 0; + for (const EdgeId &sc_out_edge: graph_.OutgoingEdges(vertex)) { + scaffold_graph::ScaffoldVertex sc_out_vertex(sc_out_edge); + debruijn_graph::EdgeId out_edge = edge_getter.GetEdgeFromScaffoldVertex(sc_out_vertex); + if (in_edge == out_edge or in_edge == assembly_graph_.conjugate(out_edge)) { + continue; + } + + scaffold_graph::ScaffoldGraph::ScaffoldEdge sc_edge(in_edge, out_edge); + auto score = score_function->GetScore(sc_edge); + auto link_result = links_.find(in_edge); + if (link_result != links_.end() and link_result->second.find(out_edge) != link_result->second.end()) { + score += LINK_BONUS; + } + total_links += static_cast(score); + if (math::ge(score, score_threshold_)) { + covered_vertices.insert(vertex); + if (score > static_cast(max_links)) { + second_pair = max_pair; + second_links = max_links; + max_links = static_cast(score); + max_pair = std::make_pair(in_edge, out_edge); + } + } + } + if (static_cast(max_links) < static_cast(second_links) * rel_threshold_) { + is_ambiguous = true; + } else if (static_cast(max_links) >= score_threshold_) { + in_to_out[max_pair.first] = max_pair.second; + answer_links += max_links; + } + } + bool is_covered = covered_vertices.find(vertex) != covered_vertices.end(); + VertexState state = GetState(in_to_out, vertex, is_ambiguous, is_covered); + VertexResult result(state, total_links, answer_links, in_to_out); + return result; + } + + void PrintVertexResults(const VertexResults &results, + const std::filesystem::path &output_path, + io::IdMapper *id_mapper) const { + std::ofstream ver_stream(output_path); + ver_stream << + "Vertex Id\tInDegree\tInEdges\tOutDegree\tOutEdges\tCovered edges\tVertex result\tSupported paths\tTotal links\tAnswer links\tAnswer\n"; + size_t uncovered = 0; + size_t ambiguous = 0; + size_t partially = 0; + size_t completely = 0; + for (const auto &entry: results.vertex_to_result) { + const auto &vertex_results = entry.second; + switch (vertex_results.state) { + case VertexState::Uncovered: + ++uncovered; + break; + case VertexState::Ambiguous: + ++ambiguous; + break; + case VertexState::Partially: + ++partially; + break; + case VertexState::Completely: + ++completely; + break; + } + ver_stream << VertexResultString(entry.first, vertex_results, id_mapper) << std::endl; + } + INFO(uncovered << " uncovered vertices"); + INFO(ambiguous << " ambiguous vertices"); + INFO(partially << " partially resolved vertices"); + INFO(completely << " completely resolved vertices"); + } + + std::string VertexResultString(const debruijn_graph::VertexId &vertex, + const VertexResult &vertex_result, + io::IdMapper *id_mapper) const { + std::string result_string; + switch (vertex_result.state) { + case VertexState::Uncovered: + result_string = "Uncovered"; + break; + case VertexState::Ambiguous: + result_string = "Ambiguous"; + break; + case VertexState::Partially: + result_string = "Partially"; + break; + case VertexState::Completely: + result_string = "Completely"; + break; + } + std::string answer_string; + for (const auto &entry: vertex_result.supported_pairs) { + answer_string += (*id_mapper)[entry.first.int_id()] + "#" + (*id_mapper)[entry.second.int_id()] + ","; + } + std::string in_edge_string, out_edge_string; + for (const EdgeId &edge: graph_.IncomingEdges(vertex)) { + in_edge_string += (*id_mapper)[edge.int_id()] + ","; + } + for (const EdgeId &edge: graph_.OutgoingEdges(vertex)) { + out_edge_string += (*id_mapper)[edge.int_id()] + ","; + } + in_edge_string = in_edge_string.substr(0, in_edge_string.size() - 1); + out_edge_string = out_edge_string.substr(0, out_edge_string.size() - 1); + answer_string = answer_string.substr(0, answer_string.size() - 1); + std::string vertex_string; + vertex_string += + std::to_string(vertex.int_id()) + "\t" + std::to_string(graph_.IncomingEdgeCount(vertex)) + "\t" + + in_edge_string + "\t"; + vertex_string += + std::to_string(graph_.OutgoingEdgeCount(vertex)) + "\t" + out_edge_string + "\t" + result_string + "\t"; + vertex_string += + std::to_string(vertex_result.supported_pairs.size()) + "\t" + std::to_string(vertex_result.total_links); + vertex_string += "\t" + std::to_string(vertex_result.supporting_links) + "\t" + answer_string; + return vertex_string; + } + private: + VertexState GetState(std::unordered_map &in_to_out, + const debruijn_graph::VertexId &vertex, + bool is_ambiguous, + bool is_covered) const { + std::unordered_set in_edges; + std::unordered_set out_edges; + for (const auto &entry: in_to_out) { + in_edges.insert(entry.first); + out_edges.insert(entry.second); + } + if (is_ambiguous or in_edges.size() > out_edges.size()) { + std::unordered_map outedge_to_indegree; + for (const auto &entry: in_to_out) { + outedge_to_indegree[entry.second]++; + } + std::unordered_map new_in_to_out; + for (const auto &entry: in_to_out) { + auto outedge = entry.second; + if (outedge_to_indegree.at(outedge) == 1) { + new_in_to_out[entry.first] = entry.second; + } + } + if (not new_in_to_out.empty()) { + in_to_out = std::move(new_in_to_out); + return VertexState::Partially; + } else { + return VertexState::Ambiguous; + } + } + if (not is_covered) { + return VertexState::Uncovered; + } else { + if (in_edges.size() == graph_.IncomingEdgeCount(vertex)) { + return VertexState::Completely; + } else { + return VertexState::Partially; + } + } + } + + Graph &graph_; + const debruijn_graph::Graph &assembly_graph_; + const LinkMap links_; + std::shared_ptr barcode_extractor_ptr_; + size_t count_threshold_; + size_t tail_threshold_; + size_t length_threshold_; + size_t threads_; + double score_threshold_; + double rel_threshold_; +}; + +} \ No newline at end of file diff --git a/src/test/debruijn/barcode_index.hpp b/src/test/debruijn/barcode_index.hpp new file mode 100644 index 0000000000..765c178245 --- /dev/null +++ b/src/test/debruijn/barcode_index.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include +#include "test_utils.hpp" +#include "common/barcode_index/barcode_index.hpp" + +namespace debruijn_graph { + BOOST_AUTO_TEST_SUITE(barcode_index_tests) + + BOOST_AUTO_TEST_CASE(TestingTest) { + BOOST_CHECK_EQUAL(1, 1); + } + + + BOOST_AUTO_TEST_SUITE_END() +} \ No newline at end of file diff --git a/src/test/debruijn/barcode_index_test.hpp b/src/test/debruijn/barcode_index_test.hpp new file mode 100644 index 0000000000..c7a9bf3efe --- /dev/null +++ b/src/test/debruijn/barcode_index_test.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include +#include "test_utils.hpp" +#include "common/barcode_index/barcode_mapper.hpp" + +namespace debruijn_graph { + BOOST_AUTO_TEST_SUITE(barcode_index_tests) + + BOOST_AUTO_TEST_CASE(TestingTest) { + BOOST_CHECK_EQUAL(1, 1); + } + + + BOOST_AUTO_TEST_SUITE_END() +} \ No newline at end of file diff --git a/src/test/debruijn/contracted_graph_test.hpp b/src/test/debruijn/contracted_graph_test.hpp new file mode 100644 index 0000000000..89458d20a2 --- /dev/null +++ b/src/test/debruijn/contracted_graph_test.hpp @@ -0,0 +1,143 @@ +#pragma once + +#include "test_utils.hpp" +#include "auxiliary_graphs/contracted_graph/contracted_graph_helper.hpp" + +#include + +namespace debruijn_graph { + +std::shared_ptr CreateContractedGraph(const Graph& g, size_t length_threshold) { + auto length_predicate = [length_threshold, &g](EdgeId edge) { + return g.length(edge) >= length_threshold; + }; + + contracted_graph::DBGContractedGraphFactory graph_factory(g, length_predicate); + graph_factory.Construct(); + auto contracted_graph = graph_factory.GetGraph(); + + return contracted_graph; +} + +BOOST_FIXTURE_TEST_SUITE(contracted_graph_tests, fs::TmpFolderFixture); + +BOOST_AUTO_TEST_CASE( ContractedGraphBuilder ) { + Graph g(55); + graphio::ScanBasicGraph("./src/test/debruijn/graph_fragments/contracted_graph/simple_bulge", g); + BOOST_CHECK_EQUAL(g.size(), 8); + BOOST_CHECK_EQUAL(g.k(), 55); + + size_t contraction_threshold = 80; + auto contracted_graph = CreateContractedGraph(g, contraction_threshold); + + BOOST_CHECK_EQUAL(contracted_graph->size(), g.size()); + BOOST_CHECK_EQUAL(contracted_graph->CountEdges(), 10); + omnigraph::IterationHelper vertex_it_helper(g); + for (const auto& vertex: vertex_it_helper) { + BOOST_CHECK(contracted_graph->ContainsVertex(vertex)); + } + + const size_t first_id = 132238743; + const size_t second_id = 102357497; + const size_t third_id = 243148631; + const size_t fourth_id = 170924699; + + std::map simple_vertex_map; + for (const auto& vertex: vertex_it_helper) { + simple_vertex_map.insert({vertex.int_id(), vertex}); + } + BOOST_CHECK_EQUAL(contracted_graph->GetOutDegree(simple_vertex_map.at(first_id)), 2); + BOOST_CHECK_EQUAL(contracted_graph->GetInDegree(simple_vertex_map.at(first_id)), 0); + BOOST_CHECK_EQUAL(contracted_graph->GetOutDegree(simple_vertex_map.at(second_id)), 1); + BOOST_CHECK_EQUAL(contracted_graph->GetInDegree(simple_vertex_map.at(second_id)), 2); + BOOST_CHECK_EQUAL(contracted_graph->GetOutDegree(simple_vertex_map.at(third_id)), 2); + BOOST_CHECK_EQUAL(contracted_graph->GetInDegree(simple_vertex_map.at(third_id)), 1); + BOOST_CHECK_EQUAL(contracted_graph->GetOutDegree(simple_vertex_map.at(fourth_id)), 0); + BOOST_CHECK_EQUAL(contracted_graph->GetInDegree(simple_vertex_map.at(fourth_id)), 2); + + contraction_threshold = 200; + contracted_graph = CreateContractedGraph(g, contraction_threshold); + BOOST_CHECK_EQUAL(contracted_graph->size(), 6); + BOOST_CHECK_EQUAL(contracted_graph->CountEdges(), 8); + BOOST_CHECK_EQUAL(contracted_graph->GetOutDegree(simple_vertex_map.at(first_id)), 2); + BOOST_CHECK_EQUAL(contracted_graph->GetInDegree(simple_vertex_map.at(first_id)), 0); + BOOST_CHECK_EQUAL(contracted_graph->GetOutDegree(simple_vertex_map.at(fourth_id)), 0); + BOOST_CHECK_EQUAL(contracted_graph->GetInDegree(simple_vertex_map.at(fourth_id)), 2); + std::vector out_edges; + for (const auto &edge: contracted_graph->OutcomingEdges(simple_vertex_map.at(first_id))) { + out_edges.emplace_back(edge); + } + BOOST_CHECK_EQUAL(out_edges.size(), 2); + out_edges.clear(); + for (const auto &edge: contracted_graph->OutcomingEdges(simple_vertex_map.at(fourth_id))) { + out_edges.emplace_back(edge); + } + BOOST_CHECK_EQUAL(out_edges.size(), 0); + + std::vector in_edges; + for (const auto &edge: contracted_graph->IncomingEdges(simple_vertex_map.at(first_id))) { + in_edges.emplace_back(edge); + } + BOOST_CHECK_EQUAL(in_edges.size(), 0); + in_edges.clear(); + for (const auto &edge: contracted_graph->IncomingEdges(simple_vertex_map.at(fourth_id))) { + in_edges.emplace_back(edge); + } + BOOST_CHECK_EQUAL(in_edges.size(), 2); + + contraction_threshold = 5000; + contracted_graph = CreateContractedGraph(g, contraction_threshold); + BOOST_CHECK_EQUAL(contracted_graph->size(), 4); + BOOST_CHECK_EQUAL(contracted_graph->CountEdges(), 4); + BOOST_CHECK_EQUAL(contracted_graph->GetOutDegree(simple_vertex_map.at(first_id)), 2); + BOOST_CHECK_EQUAL(contracted_graph->GetInDegree(simple_vertex_map.at(first_id)), 0); + + contraction_threshold = 25000; + contracted_graph = std::move(CreateContractedGraph(g, contraction_threshold)); + BOOST_CHECK_EQUAL(contracted_graph->size(), 2); + BOOST_CHECK_EQUAL(contracted_graph->CountEdges(), 0); +} + +BOOST_AUTO_TEST_CASE( ContractedSubgraph ) { + typedef contracted_graph::ContractedGraphFactoryHelper ContractedGraphFactoryHelper; + Graph g(55); + graphio::ScanBasicGraph("./src/test/debruijn/graph_fragments/contracted_graph/simple_bulge", g); + + size_t contraction_threshold = 80; + auto contracted_graph = CreateContractedGraph(g, contraction_threshold); + + const size_t first_id = 132238743; + const size_t second_id = 102357497; + const size_t third_id = 243148631; + const size_t fourth_id = 170924699; + + omnigraph::IterationHelper vertex_it_helper(g); + + std::map vertex_map; + for (const auto& vertex: vertex_it_helper) { + vertex_map.insert({vertex.int_id(), vertex}); + } + ContractedGraphFactoryHelper helper(g); + + std::unordered_set no_fourth_subset({vertex_map.at(first_id), + vertex_map.at(second_id), + vertex_map.at(third_id)}); + auto first_subgraph = helper.ExtractContractedSubgraph(*contracted_graph, no_fourth_subset); + BOOST_CHECK_EQUAL(first_subgraph->size(), 3); + BOOST_CHECK_EQUAL(first_subgraph->CountEdges(), 3); + + std::unordered_set first_third_subset({vertex_map.at(first_id), vertex_map.at(third_id)}); + auto second_subgraph = helper.ExtractContractedSubgraph(*contracted_graph, first_third_subset); + BOOST_CHECK_EQUAL(second_subgraph->size(), 2); + BOOST_CHECK_EQUAL(second_subgraph->CountEdges(), 0); + + std::unordered_set no_third_subset({vertex_map.at(first_id), + vertex_map.at(second_id), + vertex_map.at(fourth_id)}); + auto third_subgraph = helper.ExtractContractedSubgraph(*contracted_graph, no_third_subset); + BOOST_CHECK_EQUAL(third_subgraph->size(), 3); + BOOST_CHECK_EQUAL(third_subgraph->CountEdges(), 2); +} + +BOOST_AUTO_TEST_SUITE_END() +} diff --git a/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.cvr b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.cvr new file mode 100644 index 0000000000..167bc99502 --- /dev/null +++ b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.cvr @@ -0,0 +1,11 @@ +10 +419360740 176.768502 . +419360741 176.768502 . +419388514 464.834644 . +419388515 464.834644 . +419433555 528.524995 . +419433556 528.524995 . +419434025 727.076923 . +419434026 727.076923 . +419442075 179.959408 . +419442076 179.959408 . diff --git a/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.dot b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.dot new file mode 100644 index 0000000000..c2b87822aa --- /dev/null +++ b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.dot @@ -0,0 +1,24 @@ +digraph graph_picture { +node[fontname= ,penwidth=<1.8> ,shape= ] +vertex_102357496_102357497[label=<<TABLE BORDER="1" CELLSPACING="0" > +<TR><TD BORDER="0" PORT = "port_102357497_in" color="white" bgcolor="white" ></TD><TD BORDER="0" color="white" bgcolor="white" >102357497</TD><TD BORDER="0" PORT = "port_102357497_out" color="white" bgcolor="white" ></TD></TR> +<TR><TD BORDER="0" PORT = "port_102357496_out" color="white" bgcolor="white" ></TD><TD BORDER="0" color="white" bgcolor="white" >102357496</TD><TD BORDER="0" PORT = "port_102357496_in" color="white" bgcolor="white" ></TD></TR> +</TABLE>> ,color=<black> ,URL=</vertex/102357497.svg> ] +vertex_132238742_132238743[label=<<TABLE BORDER="1" CELLSPACING="0" > +<TR><TD BORDER="0" PORT = "port_132238743_in" color="yellow" bgcolor="yellow" ></TD><TD BORDER="0" color="yellow" bgcolor="yellow" >132238743</TD><TD BORDER="0" PORT = "port_132238743_out" color="yellow" bgcolor="yellow" ></TD></TR> +<TR><TD BORDER="0" PORT = "port_132238742_out" color="white" bgcolor="white" ></TD><TD BORDER="0" color="white" bgcolor="white" >132238742</TD><TD BORDER="0" PORT = "port_132238742_in" color="white" bgcolor="white" ></TD></TR> +</TABLE>> ,color=<black> ,URL=</vertex/132238743.svg> ] +vertex_170924698_170924699[label=<<TABLE BORDER="1" CELLSPACING="0" > +<TR><TD BORDER="0" PORT = "port_170924699_in" color="yellow" bgcolor="yellow" ></TD><TD BORDER="0" color="yellow" bgcolor="yellow" >170924699</TD><TD BORDER="0" PORT = "port_170924699_out" color="yellow" bgcolor="yellow" ></TD></TR> +<TR><TD BORDER="0" PORT = "port_170924698_out" color="white" bgcolor="white" ></TD><TD BORDER="0" color="white" bgcolor="white" >170924698</TD><TD BORDER="0" PORT = "port_170924698_in" color="white" bgcolor="white" ></TD></TR> +</TABLE>> ,color=<black> ,URL=</vertex/170924699.svg> ] +vertex_243148630_243148631[label=<<TABLE BORDER="1" CELLSPACING="0" > +<TR><TD BORDER="0" PORT = "port_243148631_in" color="white" bgcolor="white" ></TD><TD BORDER="0" color="white" bgcolor="white" >243148631</TD><TD BORDER="0" PORT = "port_243148631_out" color="white" bgcolor="white" ></TD></TR> +<TR><TD BORDER="0" PORT = "port_243148630_out" color="white" bgcolor="white" ></TD><TD BORDER="0" color="white" bgcolor="white" >243148630</TD><TD BORDER="0" PORT = "port_243148630_in" color="white" bgcolor="white" ></TD></TR> +</TABLE>> ,color=<black> ,URL=</vertex/243148631.svg> ] +vertex_243148630_243148631:port_243148631_out->vertex_170924698_170924699:port_170924699_in[label="Id 419360740 (3905)\nPositions:\nLen(cov): 3905(176.768502)" ,color=<black> ] +vertex_243148630_243148631:port_243148631_out->vertex_170924698_170924699:port_170924699_in[label="Id 419388515 (3943)\nPositions:\nLen(cov): 3943(464.834644)" ,color=<black> ] +vertex_132238742_132238743:port_132238743_out->vertex_102357496_102357497:port_102357497_in[label="Id 419433555 (20284)\nPositions:\nLen(cov): 20284(528.524995)" ,color=<black> ] +vertex_102357496_102357497:port_102357497_out->vertex_243148630_243148631:port_243148631_in[label="Id 419434025 (104)\nPositions:\nLen(cov): 104(727.076923)" ,color=<black> ] +vertex_132238742_132238743:port_132238743_out->vertex_102357496_102357497:port_102357497_in[label="Id 419442076 (19388)\nPositions:\nLen(cov): 19388(179.959408)" ,color=<black> ] +} diff --git a/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.flcvr b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.flcvr new file mode 100644 index 0000000000..87d5e34fd8 --- /dev/null +++ b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.flcvr @@ -0,0 +1,11 @@ +10 +419360740 6909 . +419360741 5834 . +419388514 15538 . +419388515 13733 . +419433555 16557 . +419433556 13501 . +419434025 20217 . +419434026 22879 . +419442075 5129 . +419442076 6935 . diff --git a/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.gid b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.gid new file mode 100644 index 0000000000..0c085a2b9f --- /dev/null +++ b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.gid @@ -0,0 +1 @@ +419462720 diff --git a/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.grp b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.grp new file mode 100644 index 0000000000..f735ae15f5 --- /dev/null +++ b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.grp @@ -0,0 +1,20 @@ +8 10 +Vertex 102357496 ~ 102357497 . +Vertex 102357497 ~ 102357496 . +Vertex 132238742 ~ 132238743 . +Vertex 132238743 ~ 132238742 . +Vertex 170924698 ~ 170924699 . +Vertex 170924699 ~ 170924698 . +Vertex 243148630 ~ 243148631 . +Vertex 243148631 ~ 243148630 . + +Edge 419360740 : 243148631 -> 170924699, l = 3905 ~ 419360741 . +Edge 419360741 : 170924698 -> 243148630, l = 3905 ~ 419360740 . +Edge 419388514 : 170924698 -> 243148630, l = 3943 ~ 419388515 . +Edge 419388515 : 243148631 -> 170924699, l = 3943 ~ 419388514 . +Edge 419433555 : 132238743 -> 102357497, l = 20284 ~ 419433556 . +Edge 419433556 : 102357496 -> 132238742, l = 20284 ~ 419433555 . +Edge 419434025 : 102357497 -> 243148631, l = 104 ~ 419434026 . +Edge 419434026 : 243148630 -> 102357496, l = 104 ~ 419434025 . +Edge 419442075 : 102357496 -> 132238742, l = 19388 ~ 419442076 . +Edge 419442076 : 132238743 -> 102357497, l = 19388 ~ 419442075 . diff --git a/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.pos b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.pos new file mode 100644 index 0000000000..91072269d7 --- /dev/null +++ b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.pos @@ -0,0 +1,11 @@ +10 +419360740 0 +419360741 0 +419388514 0 +419388515 0 +419433555 0 +419433556 0 +419434025 0 +419434026 0 +419442075 0 +419442076 0 diff --git a/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.sqn b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.sqn new file mode 100644 index 0000000000..d57ed587ca --- /dev/null +++ b/src/test/debruijn/graph_fragments/contracted_graph/simple_bulge.sqn @@ -0,0 +1,20 @@ +>419360740 +TTTACCTTCGTCACCCCATTGGGTGCCCAGTACGACGACGTTGTTACCCATTTTTTTCAAAATCACCGTTTGCTTAAAAATGGATTCTACCATCGCTTTTTCAGATATACAGCACTTTTTGACCCCAAAATATGCCAAAACCGATCACTTTTTGATCAGCCAATCGTTTTCCTCAACATGTAGTAGATAACAATGCCAGCAACCACAAGACCGCCGCCGAATCGGCGCAAAATATTATCCGGCAACTGGCTCAGGGTGGCGATCATGCGACGCCAGGCGCGCGGATAGAGCATCGGGCCCAGGCCTTCCAGGACTAAAACCAGCGCCAGTGCCAGCCAGATCGTTGAATTCATTTTTAATCCTTATAAAAGAAAACCACCGCTCCGTTAAGAGCGGTGGTTTGAATACTCAAACGAGACTGAGTTTATCGCGTTGCGTTAGTCGGCGTCTTCATATAACGGAAGAAATCGCTGTCCGGGCTGAGCACCATCACATCCTGGTTGCTCTGGAAGCTATTCTCGTAAGCACGCAGGCTACGGATAAAGGCGTAGAAGTCTGGATCCTGGCTAAACGCATCGGCGAACAGTTTCGCGGCTTCCGCATCACCTTCACCACGCAGGATACGGCCCTGACGCTCAGATTCCGCCAGCGTCTTGGTCACTTCGTAGTCCGCGGCCGCACGAAGTTTTTCCGCTTCTTCCTGACCCTGTGAACGGTGACGACGGGCTACCGCTTCACGCTCGGCGCGCATACGGTTGTAAATCGCCTCAGACACTTCCGCCGGCAGGTTGATCTGCTTAATACGCACATCCACCACTTCGATACCCAGCGCAGCCATACTGTTCGGGTTGATTACAGGCACTTTACCATTGGTTTCAGCCTGAACACGTTCCGCCGCTTTAGCAATTGCATCGTCCGCTGCTGGCGTTTCAACTTCGTCTTCCGTACCCGCGGTGCCGGAGTTCAGCGCATCGCGCACTTCCAGCGTCAGACGACCACGAGAGTCGGTCACGATGTCTTTCACATCCAGACGACCAATTTCAGAACGCAGACGGTCAGAGAATTTACGTTTCAACAGCACTTCCGCCTGAGAAACATCACCGCCGCCCGTTGCCAGGAAGTAACGGCTGAAATCACTGATACGCCACTTGATATAAGAGTCAACGATCAGGTCTTTCTTCTCTTTAGTCACGAAACGGTCGGCCTGGTTATCCATGGTCTGGATACGCGCATCAAGCGTCTTCACTGACTGGATAAAAGGAACCTTGAAGTGCAGACCCGGCTCATAAATCACCGGACGCTTGTCACTGTCACGGACGACGCTGCTGAACTGGAACTTAATCCCGCGCTCGCCCTCTTTCACCACAAAGATAGAGGTATAAAGTACGACCAGTACGATGACGATGATCGCAATAACTGACTTACGCATCCTTATTCCCCCTGACGCTGGTAGTCGTTACGCTGCGCGTTAGCACGGCGTTGGTCCATAATGTCACCATCGTTCGAAGAAGGCGTGGTTGTTGCGCTGGAGCTACCGGACGATGCAGGTGGCAGACGCAGCAGGTTATTCGCACCGCTGTTGTCTTTTGCTGCCGGTGCAGAACCGCCTTTCAGCATCTGATCCAGCGGCAGTACCATCAGGTTTCCACCTTTGCTGTCGTTAACCAGCACTTTACGTGTATGGCTCAGCACTTTTTCCATGGTTTCGATATAGAGACGCTCGCGGGTAATTTCCGGAGCCGCTTTATATTCCGGCAGGATTTTCGCGAAACGAGCCACCTCACCCTGCGCTTCCAGGATGGTCTGGGTCTTATACGCGCGTGCTTCTTCAAGAATACGCTGCGCCTGACCGTTAGCACGTGGCTGAACTTCGTTGGCGTACGCTTCCGCTTCACGGATGTACTGCTGTTCGTTCTCACGCGCGGCAATCGCGTCATCAAACGCGGCTTTCACCTCTTCCGGCGGACGAGCAGCCTGGAAGTTGACGTCCAGCAGGGTGATACCCATGTTGTACGGACGAATGGTCTCTTCCAGCTCACGCTGTGTATCGCTACGAATAACGGTACGACCTTCAGTCAGAATGCGGTCCATGGTGTATTTACCGATCACACCACGCAGGGCGCTGTCGGTCGCCTGACGCAGACTGTCATCGGCACTGGTTACGCTAAACAGATAACGTTCTGGATCCGTAACACGATATTGCACGTTCATCTCAACGCGCACTACGTTCTCATCAGAGGTCAGCATCACGCCGGAAGCAGCCAGCTCACGCACCGACTCCACGTTCACTGCAGTGACGTCGTCAATGAAGGTCGGTTTCCAGTTCAGGCCCGGCTCAACCAGATGGCTGAACTTACCAAAACGGGTGACCACGCCGCGCTCGGCTTCTTTAATGGTGTAGAACCCGCTGGCAGCCCAGATGATGACCACAGCAGCGGCCACAATGCCCACGATGCGACCACCCATTTGCGGGCGCGGGCCCTGGGTAGAATTGCCACCCGAGCCAGACCCTTTTCCTCCACCAAGCCCACCAAGCTTCTTGCTTAGCTTGCGGAAGATATCATCCAGATCCGGCGGCCCCTGATCGCGACCACCTTTGTTGCCATTTCCCCCAGAGTTGCCGCCTTGATTATTGCTGCTTCCCCACGGGTCGCGGTCCTGTCCGTTATTACCGGGCTGATTCCACGCCATGTATATGCTCCATATTTGTTATGCAAGGGCGAATTATTCAGGCATCCCCTTTCCGATCAGACGATATAGTCGACCAGCGCAGGTTCTTGTTTACAGAGGCGACGCCAGTCAACGATCGGCATACGCACCTGCATCCCCACGCTGCCGTCATCCTCCATCCACTCTTTTTCTATCGCCTGAAGCTGATAAAACCGGCTGCGCAGCCTGCCTTCCTGTGGTGGCAGTCGCAGCGTGTGCTGGGCTACCTCACCGGAAAGACGTTCTGTCAAAGCCTGGAAAAGCAGTGGCACGCCAATACCGGTCTGGGCGGAAAGCCAGACCCGAATTGGTTTGTTTTCTTCATCACGATCGATACGCGGCTCAAAATCATCCAGCATGTCGATCTTGTTCATCACCAACAGCGTTGGGATCTCGTGGGCCTCGATCTCTTCGAGCACCACATTCACCGCGTCGATGTTTTCCTGCACGCGAACATCTGCAGCATCAATCACGTGCAGCAGCAGGGTCGCCTGACGCGTCTCCTGCAGGGTGGCTTTAAACGCGGCCACCAGATCATGCGGCAGGTGGCGGATAAACCCTACGGTATCCGCCAGCACGGTTTCACCCACATCCGCCACGTCGATACGACGCAACGTCGGGTCCAGGGTCGCAAACAGCTGGTCTGCGGCATAGACCTGGGCCTCGGTAATCTGGTTAAACAGGGTGGATTTACCGGCGTTGGTATACCCCACCAGCGACACCGTCGGGATGTCGGCTTTCGTACGCGCCCGACGTCCCTGCTCACGCTGTTTCTCGACTCTTTCCAGACGAGAGAGGATCTGAGTAATACGGCCGCGCAGCAAACGGCGGTCGGTTTCGAGCTGGGTTTCACCCGGACCGCGCAAACCAATCCCGCCTTTTTGTCGTTCAAGGTGGGTCCAGCCACGTACAAGACGCGTGGCCAGATGGCGCAACTGCGCCAGCTCAACCTGCAGCTTACCCTCGTGGGTACGTGCACGCTGAGCAAAAATATCTAAAATCAACCCCGTGCGATCGATAACACGGCATTCGCAAAGCGCTTCCAGGTTACGCTCCTGAGCCGGAGACAGCGCATGATCAAACAACACGACTGAAGCACCGGTTGCTTTTACGGCATCCGCAATTTCTACTGCTTTACCTTCACCAACAAAATACTTTGGGTGCGGCGCTTTACGGCTACCGGTAATCACCTGCATTGCTTCGACACCGGCGGAAGAGACCAGAGATTCAAACTCCTGGAGGTCTTCCATA +>419360741 +TATGGAAGACCTCCAGGAGTTTGAATCTCTGGTCTCTTCCGCCGGTGTCGAAGCAATGCAGGTGATTACCGGTAGCCGTAAAGCGCCGCACCCAAAGTATTTTGTTGGTGAAGGTAAAGCAGTAGAAATTGCGGATGCCGTAAAAGCAACCGGTGCTTCAGTCGTGTTGTTTGATCATGCGCTGTCTCCGGCTCAGGAGCGTAACCTGGAAGCGCTTTGCGAATGCCGTGTTATCGATCGCACGGGGTTGATTTTAGATATTTTTGCTCAGCGTGCACGTACCCACGAGGGTAAGCTGCAGGTTGAGCTGGCGCAGTTGCGCCATCTGGCCACGCGTCTTGTACGTGGCTGGACCCACCTTGAACGACAAAAAGGCGGGATTGGTTTGCGCGGTCCGGGTGAAACCCAGCTCGAAACCGACCGCCGTTTGCTGCGCGGCCGTATTACTCAGATCCTCTCTCGTCTGGAAAGAGTCGAGAAACAGCGTGAGCAGGGACGTCGGGCGCGTACGAAAGCCGACATCCCGACGGTGTCGCTGGTGGGGTATACCAACGCCGGTAAATCCACCCTGTTTAACCAGATTACCGAGGCCCAGGTCTATGCCGCAGACCAGCTGTTTGCGACCCTGGACCCGACGTTGCGTCGTATCGACGTGGCGGATGTGGGTGAAACCGTGCTGGCGGATACCGTAGGGTTTATCCGCCACCTGCCGCATGATCTGGTGGCCGCGTTTAAAGCCACCCTGCAGGAGACGCGTCAGGCGACCCTGCTGCTGCACGTGATTGATGCTGCAGATGTTCGCGTGCAGGAAAACATCGACGCGGTGAATGTGGTGCTCGAAGAGATCGAGGCCCACGAGATCCCAACGCTGTTGGTGATGAACAAGATCGACATGCTGGATGATTTTGAGCCGCGTATCGATCGTGATGAAGAAAACAAACCAATTCGGGTCTGGCTTTCCGCCCAGACCGGTATTGGCGTGCCACTGCTTTTCCAGGCTTTGACAGAACGTCTTTCCGGTGAGGTAGCCCAGCACACGCTGCGACTGCCACCACAGGAAGGCAGGCTGCGCAGCCGGTTTTATCAGCTTCAGGCGATAGAAAAAGAGTGGATGGAGGATGACGGCAGCGTGGGGATGCAGGTGCGTATGCCGATCGTTGACTGGCGTCGCCTCTGTAAACAAGAACCTGCGCTGGTCGACTATATCGTCTGATCGGAAAGGGGATGCCTGAATAATTCGCCCTTGCATAACAAATATGGAGCATATACATGGCGTGGAATCAGCCCGGTAATAACGGACAGGACCGCGACCCGTGGGGAAGCAGCAATAATCAAGGCGGCAACTCTGGGGGAAATGGCAACAAAGGTGGTCGCGATCAGGGGCCGCCGGATCTGGATGATATCTTCCGCAAGCTAAGCAAGAAGCTTGGTGGGCTTGGTGGAGGAAAAGGGTCTGGCTCGGGTGGCAATTCTACCCAGGGCCCGCGCCCGCAAATGGGTGGTCGCATCGTGGGCATTGTGGCCGCTGCTGTGGTCATCATCTGGGCTGCCAGCGGGTTCTACACCATTAAAGAAGCCGAGCGCGGCGTGGTCACCCGTTTTGGTAAGTTCAGCCATCTGGTTGAGCCGGGCCTGAACTGGAAACCGACCTTCATTGACGACGTCACTGCAGTGAACGTGGAGTCGGTGCGTGAGCTGGCTGCTTCCGGCGTGATGCTGACCTCTGATGAGAACGTAGTGCGCGTTGAGATGAACGTGCAATATCGTGTTACGGATCCAGAACGTTATCTGTTTAGCGTAACCAGTGCCGATGACAGTCTGCGTCAGGCGACCGACAGCGCCCTGCGTGGTGTGATCGGTAAATACACCATGGACCGCATTCTGACTGAAGGTCGTACCGTTATTCGTAGCGATACACAGCGTGAGCTGGAAGAGACCATTCGTCCGTACAACATGGGTATCACCCTGCTGGACGTCAACTTCCAGGCTGCTCGTCCGCCGGAAGAGGTGAAAGCCGCGTTTGATGACGCGATTGCCGCGCGTGAGAACGAACAGCAGTACATCCGTGAAGCGGAAGCGTACGCCAACGAAGTTCAGCCACGTGCTAACGGTCAGGCGCAGCGTATTCTTGAAGAAGCACGCGCGTATAAGACCCAGACCATCCTGGAAGCGCAGGGTGAGGTGGCTCGTTTCGCGAAAATCCTGCCGGAATATAAAGCGGCTCCGGAAATTACCCGCGAGCGTCTCTATATCGAAACCATGGAAAAAGTGCTGAGCCATACACGTAAAGTGCTGGTTAACGACAGCAAAGGTGGAAACCTGATGGTACTGCCGCTGGATCAGATGCTGAAAGGCGGTTCTGCACCGGCAGCAAAAGACAACAGCGGTGCGAATAACCTGCTGCGTCTGCCACCTGCATCGTCCGGTAGCTCCAGCGCAACAACCACGCCTTCTTCGAACGATGGTGACATTATGGACCAACGCCGTGCTAACGCGCAGCGTAACGACTACCAGCGTCAGGGGGAATAAGGATGCGTAAGTCAGTTATTGCGATCATCGTCATCGTACTGGTCGTACTTTATACCTCTATCTTTGTGGTGAAAGAGGGCGAGCGCGGGATTAAGTTCCAGTTCAGCAGCGTCGTCCGTGACAGTGACAAGCGTCCGGTGATTTATGAGCCGGGTCTGCACTTCAAGGTTCCTTTTATCCAGTCAGTGAAGACGCTTGATGCGCGTATCCAGACCATGGATAACCAGGCCGACCGTTTCGTGACTAAAGAGAAGAAAGACCTGATCGTTGACTCTTATATCAAGTGGCGTATCAGTGATTTCAGCCGTTACTTCCTGGCAACGGGCGGCGGTGATGTTTCTCAGGCGGAAGTGCTGTTGAAACGTAAATTCTCTGACCGTCTGCGTTCTGAAATTGGTCGTCTGGATGTGAAAGACATCGTGACCGACTCTCGTGGTCGTCTGACGCTGGAAGTGCGCGATGCGCTGAACTCCGGCACCGCGGGTACGGAAGACGAAGTTGAAACGCCAGCAGCGGACGATGCAATTGCTAAAGCGGCGGAACGTGTTCAGGCTGAAACCAATGGTAAAGTGCCTGTAATCAACCCGAACAGTATGGCTGCGCTGGGTATCGAAGTGGTGGATGTGCGTATTAAGCAGATCAACCTGCCGGCGGAAGTGTCTGAGGCGATTTACAACCGTATGCGCGCCGAGCGTGAAGCGGTAGCCCGTCGTCACCGTTCACAGGGTCAGGAAGAAGCGGAAAAACTTCGTGCGGCCGCGGACTACGAAGTGACCAAGACGCTGGCGGAATCTGAGCGTCAGGGCCGTATCCTGCGTGGTGAAGGTGATGCGGAAGCCGCGAAACTGTTCGCCGATGCGTTTAGCCAGGATCCAGACTTCTACGCCTTTATCCGTAGCCTGCGTGCTTACGAGAATAGCTTCCAGAGCAACCAGGATGTGATGGTGCTCAGCCCGGACAGCGATTTCTTCCGTTATATGAAGACGCCGACTAACGCAACGCGATAAACTCAGTCTCGTTTGAGTATTCAAACCACCGCTCTTAACGGAGCGGTGGTTTTCTTTTATAAGGATTAAAAATGAATTCAACGATCTGGCTGGCACTGGCGCTGGTTTTAGTCCTGGAAGGCCTGGGCCCGATGCTCTATCCGCGCGCCTGGCGTCGCATGATCGCCACCCTGAGCCAGTTGCCGGATAATATTTTGCGCCGATTCGGCGGCGGTCTTGTGGTTGCTGGCATTGTTATCTACTACATGTTGAGGAAAACGATTGGCTGATCAAAAAGTGATCGGTTTTGGCATATTTTGGGGTCAAAAAGTGCTGTATATCTGAAAAAGCGATGGTAGAATCCATTTTTAAGCAAACGGTGATTTTGAAAAAAATGGGTAACAACGTCGTCGTACTGGGCACCCAATGGGGTGACGAAGGTAAA +>419388514 +TATGGAAGACCTCCAGGAGTTTGAATCTCTGGTCTCTTCCGCCGGTGTCGAAGCATTGCAGGTGATTACCGGTAGCCGTAAAGCGCCGCACCCAAAGTATTTTGTAGGTGAAGGTAAAGCAGTTGAAATTGCGGAAGCTGTCAAAGCGACGGGTGCTTCGGTCGTTCTTTTTGACCATGCCCTGAGCCCGGCGCAAGAGCGTAACCTGGAGCGTTTGTGCGAGTGTCGTGTTATCGACCGCACCGGCCTTATTTTAGATATTTTCGCCCAACGTGCGCGTACCCATGAGGGTAAGTTGCAGGTTGAGCTGGCGCAGCTGCGCCATCTGGCTACGCGCCTGGTGCGTGGCTGGACCCACCTTGAAAGACAGAAAGGCGGGATAGGTTTGCGTGGTCCGGGTGAAACCCAGCTCGAAACCGACCGTCGTTTGTTGCGTAATCGCATCGTGCAGATACAGTCGCGCCTGGAAAGAGTTGAAAAGCAGCGTGAGCAGGGGCGGCAATCGCGTATCAAAGCCGACGTTCCTACTGTTTCGCTGGTGGGATATACCAACGCCGGTAAATCTACCCTTTTCAATCGCATCACCGAAGCGCGGGTCTACGCGGCAGACCAGTTGTTTGCCACCCTCGACCCGACGTTGCGGCGTATTGACGTTGCAGATGTCGGTGAAACCGTACTTGCAGATACCGTAGGGTTTATTCGCCACCTGCCGCACGATCTGGTGGCGGCATTTAAAGCCACGTTACAAGAGACGCGGCAAGCCACATTACTGCTGCACGTCATTGATGCGGCGGATGTGCGTGTACAAGAAAACATCGAAGCGGTGAATACGGTTCTTGAAGAGATCGACGCTCACGAGATCCCAACCCTGCTGGTGATGAACAAGATCGATATGCTGGAAGATTTCGAACCGCGTATTGATCGGGACGAAGAGAACAAACCGAACCGTGTCTGGCTTTCCGCACAGACCGGAGCGGGGATACCACAGCTTTTTCAGGCTTTGACGGAGCGGCTTTCCGGCGAGGTGGCGCAGCATACATTGCGTCTGCCACCGCAGGAAGGGCGTCTGAGAAGTCGTTTTTATCAGCTTCAGGCAATAGAAAAAGAGTGGATGGAGGAGGACGGCAGCGTAAGTCTGCAAGTTCGTATGCCGATCGTTGACTGGCGTCGCCTCTGTAAACAAGAACCGGCGTTGATCGATTACCTGATCTAACGGCGTAGCGTCTGAAGCGTGGAGTCATATCCTCTGGCGTCGAAAGACAACAGGGATCACCGCATAACAAATATGGAGCACAAACATGGCGTGGAATCAGCCCGGTAATAACGGACAAGACCGCGACCCGTGGGGAAGCAGCAAACCTGGCGGCAACTCTGAGGGAAATGGAAACAAAGGCGGTCGCGATCAAGGGCCACCTGATTTAGATGATATCTTCCGCAAACTGAGCAAAAAGCTCGGTGGTCTGGGCGGCGGTAAAGGCACCGGATCTGGCGGTGGCAGTTCATCGCAAGGCCCGCGCCCGCAGCTTGGCGGTCGTGTCGTTACCATCGCAGCGGCAGCGATTGTCATTATCTGGGCGGCCAGTGGTTTCTATACCATTAAAGAAGCCGAACGCGGCGTGGTAACACGCTTTGGTAAATTCAGCCATCTGGTTGAGCCGGGTCTGAACTGGAAACCGACGTTTATCGACGAAGTCAAACCGGTGAACGTGGAAGCCGTGCGTGAACTGGCCGCTTCTGGTGTGATGCTGACGTCGGACGAGAACGTAGTGCGCGTTGAGATGAACGTGCAGTACCGCGTCACCAATCCGGAAAAATATCTGTATAGCGTGACCAGCCCGGATGACAGCCTGCGTCAGGCTACCGACAGCGCCCTGCGTGGAGTTATCGGTAAATACACCATGGACCGCATTCTGACGGAAGGTCGTACCGTGATTCGTAGCGATACTCAGCGCGAACTGGAAGAGACGATTCGTCCGTATGACATGGGTATCACGCTGCTGGACGTCAACTTCCAGGCTGCTCGTCCGCCGGAAGAAGTAAAAGCGGCGTTTGACGATGCGATTGCCGCGCGTGAAAACGAACAGCAATACATTCGTGAAGCAGAAGCGTATACCAACGAAGTTCAGCCGCGTGCGAACGGTCAGGCGCAACGTATCCTCGAAGAGGCGCGTGCGTACAAGGCCCAGACCATCCTGGAAGCTCAGGGTGAAGTGGCGCGCTTTGCTAAACTTCTGCCGGAATATAAAGCCGCGCCGGAAATTACTCGCGAGCGTCTGTATATCGAGACGATGGAAAAAGTGTTGGGTAACACCCGCAAAGTGCTGGTTAACGATAAAGGTGGCAACCTGATGGTTCTGCCGTTAGACCAGATGCTGAAAGGTGGTAACGCCCCTGCGGCGAAGAGCGATAACGGTGCCAGCAATCTGCTGCGTCTGCCGCCAGCCTCTTCCTCCACAACCAGTGGAGCAAGCAACACGTCGTCCACCAGTCAGGGCGATATTATGGACCAACGCCGCGCCAACGCGCAGCGTAACGACTACCAGCGTCAGGGGGAATAACGATGCGTAAGTCAGTTATCGCGATTATCATCATCGTGCTGGTAGTGCTTTACATGTCTGTCTTTGTCGTCAAAGAAGGTGAGCGCGGTATTACGCTGCGTTTTGGTAAGGTACTGCGTGACGATGACAACAAACCTCTGGTTTATGAGCCGGGTCTGCATTTCAAGATACCGTTCATTGAAACGGTGAAAATGCTCGACGCACGTATTCAGACCATGGACAACCAGGCCGACCGCTTTGTGACCAAAGAGAAGAAAGACCTGATCGTCGACTCTTACATCAAATGGCGCATCAGCGATTTCAGCCGTTACTACCTGGCAACGGGTGGTGGCGACATTTCGCAAGCGGAAGTGCTGTTGAAACGTAAGTTCTCTGACCGTCTGCGTTCTGAAATTGGTCGCCTGGACGTGAAAGATATCGTCACCGATTCCCGTGGTCGTCTGACCCTCGAAGTACGTGACGCGCTGAACTCCGGTTCTGCGGGTACAGAAGATGAAGTTACTACCCCGGCGGCAGATAACGCCATTGCCGAAGCGGCAGAGCGCGTAACGGCTGAGACGAAGGGCAAAGTTCCGGTCATCAACCCGAACAGTATGGCGGCGCTGGGTATTGAAGTTGTCGATGTGCGTATCAAGCAGATCAACCTGCCGACCGAAGTGTCTGAAGCGATCTACAACCGTATGCGCGCCGAGCGTGAAGCGGTAGCGCGTCGTCACCGTTCACAAGGTCAGGAAGAAGCGGAAAAACTGCGCGCGACTGCCGACTATGAAGTGACCAGAACGCTGGCAGAAGCTGAGCGTCAGGGCCGCATCATGCGTGGTGAAGGCGATGCCGAAGCAGCCAAACTGTTTGCTGATGCATTCAGTAAAGATCCGGACTTCTACGCATTCATCCGTAGCCTGCGTGCTTATGAGAACAGCTTCTCTGGCAATCAGGACGTGATGGTCATGAGCCCGGATAGCGATTTCTTCCGCTACATGAAGACGCCGACTTCCGCAACGCGTTAATATAACGACTGCGGTACAGGTCAATAAAGCCACCGCATCCTCAGGGATGTCGGTGGTTTTCTTTTTCTATAAGGATAATGAATGAATTCGACAATCTGGCTGGCGCTTGCCCTGGTTTTGGTACTGGAAGGTTTAGGGCCGATGCTTTACCCGAAGGCATGGAAGAAGATGATCTCTGCGATGACCAATTTGCCCGATAATATTTTACGTCGTTTTGGCGGTGGACTTGTGGTTGCGGGCGTTGTGGTCTACTACATGTTGAGGAAAACGATTGGCTGAACAAAAAACAGACTGATCGAGGTCATTTTTGAGTGCAAAAAGTGCTGTAACTCTGAAAAAGCGATGGTAGAATCCATTTTTAAGCAAACGGTGATTTTGAAAAATGGGTAACAACGTCGTCGTACTGGGCACCCAATGGGGTGACGAAGGTAAA +>419388515 +TTTACCTTCGTCACCCCATTGGGTGCCCAGTACGACGACGTTGTTACCCATTTTTCAAAATCACCGTTTGCTTAAAAATGGATTCTACCATCGCTTTTTCAGAGTTACAGCACTTTTTGCACTCAAAAATGACCTCGATCAGTCTGTTTTTTGTTCAGCCAATCGTTTTCCTCAACATGTAGTAGACCACAACGCCCGCAACCACAAGTCCACCGCCAAAACGACGTAAAATATTATCGGGCAAATTGGTCATCGCAGAGATCATCTTCTTCCATGCCTTCGGGTAAAGCATCGGCCCTAAACCTTCCAGTACCAAAACCAGGGCAAGCGCCAGCCAGATTGTCGAATTCATTCATTATCCTTATAGAAAAAGAAAACCACCGACATCCCTGAGGATGCGGTGGCTTTATTGACCTGTACCGCAGTCGTTATATTAACGCGTTGCGGAAGTCGGCGTCTTCATGTAGCGGAAGAAATCGCTATCCGGGCTCATGACCATCACGTCCTGATTGCCAGAGAAGCTGTTCTCATAAGCACGCAGGCTACGGATGAATGCGTAGAAGTCCGGATCTTTACTGAATGCATCAGCAAACAGTTTGGCTGCTTCGGCATCGCCTTCACCACGCATGATGCGGCCCTGACGCTCAGCTTCTGCCAGCGTTCTGGTCACTTCATAGTCGGCAGTCGCGCGCAGTTTTTCCGCTTCTTCCTGACCTTGTGAACGGTGACGACGCGCTACCGCTTCACGCTCGGCGCGCATACGGTTGTAGATCGCTTCAGACACTTCGGTCGGCAGGTTGATCTGCTTGATACGCACATCGACAACTTCAATACCCAGCGCCGCCATACTGTTCGGGTTGATGACCGGAACTTTGCCCTTCGTCTCAGCCGTTACGCGCTCTGCCGCTTCGGCAATGGCGTTATCTGCCGCCGGGGTAGTAACTTCATCTTCTGTACCCGCAGAACCGGAGTTCAGCGCGTCACGTACTTCGAGGGTCAGACGACCACGGGAATCGGTGACGATATCTTTCACGTCCAGGCGACCAATTTCAGAACGCAGACGGTCAGAGAACTTACGTTTCAACAGCACTTCCGCTTGCGAAATGTCGCCACCACCCGTTGCCAGGTAGTAACGGCTGAAATCGCTGATGCGCCATTTGATGTAAGAGTCGACGATCAGGTCTTTCTTCTCTTTGGTCACAAAGCGGTCGGCCTGGTTGTCCATGGTCTGAATACGTGCGTCGAGCATTTTCACCGTTTCAATGAACGGTATCTTGAAATGCAGACCCGGCTCATAAACCAGAGGTTTGTTGTCATCGTCACGCAGTACCTTACCAAAACGCAGCGTAATACCGCGCTCACCTTCTTTGACGACAAAGACAGACATGTAAAGCACTACCAGCACGATGATGATAATCGCGATAACTGACTTACGCATCGTTATTCCCCCTGACGCTGGTAGTCGTTACGCTGCGCGTTGGCGCGGCGTTGGTCCATAATATCGCCCTGACTGGTGGACGACGTGTTGCTTGCTCCACTGGTTGTGGAGGAAGAGGCTGGCGGCAGACGCAGCAGATTGCTGGCACCGTTATCGCTCTTCGCCGCAGGGGCGTTACCACCTTTCAGCATCTGGTCTAACGGCAGAACCATCAGGTTGCCACCTTTATCGTTAACCAGCACTTTGCGGGTGTTACCCAACACTTTTTCCATCGTCTCGATATACAGACGCTCGCGAGTAATTTCCGGCGCGGCTTTATATTCCGGCAGAAGTTTAGCAAAGCGCGCCACTTCACCCTGAGCTTCCAGGATGGTCTGGGCCTTGTACGCACGCGCCTCTTCGAGGATACGTTGCGCCTGACCGTTCGCACGCGGCTGAACTTCGTTGGTATACGCTTCTGCTTCACGAATGTATTGCTGTTCGTTTTCACGCGCGGCAATCGCATCGTCAAACGCCGCTTTTACTTCTTCCGGCGGACGAGCAGCCTGGAAGTTGACGTCCAGCAGCGTGATACCCATGTCATACGGACGAATCGTCTCTTCCAGTTCGCGCTGAGTATCGCTACGAATCACGGTACGACCTTCCGTCAGAATGCGGTCCATGGTGTATTTACCGATAACTCCACGCAGGGCGCTGTCGGTAGCCTGACGCAGGCTGTCATCCGGGCTGGTCACGCTATACAGATATTTTTCCGGATTGGTGACGCGGTACTGCACGTTCATCTCAACGCGCACTACGTTCTCGTCCGACGTCAGCATCACACCAGAAGCGGCCAGTTCACGCACGGCTTCCACGTTCACCGGTTTGACTTCGTCGATAAACGTCGGTTTCCAGTTCAGACCCGGCTCAACCAGATGGCTGAATTTACCAAAGCGTGTTACCACGCCGCGTTCGGCTTCTTTAATGGTATAGAAACCACTGGCCGCCCAGATAATGACAATCGCTGCCGCTGCGATGGTAACGACACGACCGCCAAGCTGCGGGCGCGGGCCTTGCGATGAACTGCCACCGCCAGATCCGGTGCCTTTACCGCCGCCCAGACCACCGAGCTTTTTGCTCAGTTTGCGGAAGATATCATCTAAATCAGGTGGCCCTTGATCGCGACCGCCTTTGTTTCCATTTCCCTCAGAGTTGCCGCCAGGTTTGCTGCTTCCCCACGGGTCGCGGTCTTGTCCGTTATTACCGGGCTGATTCCACGCCATGTTTGTGCTCCATATTTGTTATGCGGTGATCCCTGTTGTCTTTCGACGCCAGAGGATATGACTCCACGCTTCAGACGCTACGCCGTTAGATCAGGTAATCGATCAACGCCGGTTCTTGTTTACAGAGGCGACGCCAGTCAACGATCGGCATACGAACTTGCAGACTTACGCTGCCGTCCTCCTCCATCCACTCTTTTTCTATTGCCTGAAGCTGATAAAAACGACTTCTCAGACGCCCTTCCTGCGGTGGCAGACGCAATGTATGCTGCGCCACCTCGCCGGAAAGCCGCTCCGTCAAAGCCTGAAAAAGCTGTGGTATCCCCGCTCCGGTCTGTGCGGAAAGCCAGACACGGTTCGGTTTGTTCTCTTCGTCCCGATCAATACGCGGTTCGAAATCTTCCAGCATATCGATCTTGTTCATCACCAGCAGGGTTGGGATCTCGTGAGCGTCGATCTCTTCAAGAACCGTATTCACCGCTTCGATGTTTTCTTGTACACGCACATCCGCCGCATCAATGACGTGCAGCAGTAATGTGGCTTGCCGCGTCTCTTGTAACGTGGCTTTAAATGCCGCCACCAGATCGTGCGGCAGGTGGCGAATAAACCCTACGGTATCTGCAAGTACGGTTTCACCGACATCTGCAACGTCAATACGCCGCAACGTCGGGTCGAGGGTGGCAAACAACTGGTCTGCCGCGTAGACCCGCGCTTCGGTGATGCGATTGAAAAGGGTAGATTTACCGGCGTTGGTATATCCCACCAGCGAAACAGTAGGAACGTCGGCTTTGATACGCGATTGCCGCCCCTGCTCACGCTGCTTTTCAACTCTTTCCAGGCGCGACTGTATCTGCACGATGCGATTACGCAACAAACGACGGTCGGTTTCGAGCTGGGTTTCACCCGGACCACGCAAACCTATCCCGCCTTTCTGTCTTTCAAGGTGGGTCCAGCCACGCACCAGGCGCGTAGCCAGATGGCGCAGCTGCGCCAGCTCAACCTGCAACTTACCCTCATGGGTACGCGCACGTTGGGCGAAAATATCTAAAATAAGGCCGGTGCGGTCGATAACACGACACTCGCACAAACGCTCCAGGTTACGCTCTTGCGCCGGGCTCAGGGCATGGTCAAAAAGAACGACCGAAGCACCCGTCGCTTTGACAGCTTCCGCAATTTCAACTGCTTTACCTTCACCTACAAAATACTTTGGGTGCGGCGCTTTACGGCTACCGGTAATCACCTGCAATGCTTCGACACCGGCGGAAGAGACCAGAGATTCAAACTCCTGGAGGTCTTCCATA +>419433555 +TGACGCATCGAATTGCTCCTTACGGATTATTCAGCCTCCTGTCTGGGTCAGCCGAATCCCGGGGAGGCAAGGAACGTGTTAAAGGTCGGCTGAAAAATGACGCGTTATAGTACTTGTATGCCTCGCTAAACTCAAGGGCAGAGCCGAAATAAATCTCCCCTTTCGCAATTAACCTCTAACACGCTGATTTGAAGTTCATCACACTTCATCATGGTGTTTTTTTGAACAACTGCATCAGAAATGGTCACCATCAACTTCTTTATCATTGATTAAAATTAAATCAGAAGATCATCACAGAGGAGAGCACACAAAGTCAGCCGTCGAGGAGGTATCTATGTTCAGTCGTGTTTTAGCCCTTCTGGCTGTGCTTTTGCTAAGTGCAAATACATGGGCAGCCATTGAAATTAATAACCACCAGGCCAGAAATATGGACGATGTGCAAAGCTTAGGCGTGATTTATATCAATCATAATTTCGCCACTGAAAGCGAAGCACGTCAGGCATTAAATGAAGAAACAGATGCGCAAGGCGCAACGTACTACCACGTAATTCTGATGCGGGAACCGGGGAGTAACGGCAATATGCACGCCAGCGCGGATATTTATCGCTAGCACCAGGTATAACCAACGAAACATTGCCATAGTTTGCTTTGCCCCCTTCGCAGGGGGCTTTTTTTTGAAAAACGGCTGCGGAATTAGACCAGTTATCTCCCGAGGAAGGAAATTTCCGCAGCGCGTGTTCTACTTCTGCCCGTAATAAGCGTTAGGACCGTGTTTACGCATGAAGTGTTTATTCATCAGGAAGCTGTCGATGTGATTGAGTTGTGGGTTAATGCCGCGGGCAATCCACGCCATTTTCGCCACTTCTTCCATCACCACCGCGTTATGCACCGCATCGTGAGCATCTTTCCCCCAGGCGAACGGCCCGTGCTGATACACCACAATTCCCGGCGTATGCAGCGGCTCGGCGTTGCCCAGCGTTTCGATAATCACTTTGCCGGTGTTCAGTTCATACTCGCCCTGCACCTCTTCTTCGCTTAACCCGCGCGTACACGGAATGTCGCCAAAGAAGTAGTCGGCGTGCGTGGTGCCTAACGCCGGGATCGCCAGCCCCGCCTGCGCCCATGCGGTGGCATGAGTGGAGTGGGTATGGACAATGCCACCAAGCGACGGGTAACGACGGTAGAGTTCGAGATGCGTCGCAGTGTCGGAAGATGGGCGATACTCCCCTTCCACCACCTTGCCGCTCATATCAACCACCACCATATCGGCCGCTTTCATGGTTTCGTAGGCAACGCCGCTGGGCTTGATCACCACCAGCCCGCGTTCGCGGTCGATAGCGCTGACGTTGCCCCAGGTAAAGGTCACCAGCCCGTAGCGCGGCAGCTCCATGTTGGCTTCAAATACCTGCTGTTTTAGCTTTTGCATTATGCCGCCTCCACCATGCCCGCTTTCGCCATGCGCGCTTTCACCCAATCACGCGCTTTCGCCACTTCTGCCGCCGGGTCTTCCGCCGTTTCGCTCCACATCTCAATCAGGTACGGCCCGCAATAGCCACTCTGTTTGAGCGTTTCGAAACAACGTTCGAAATCCACTACACCTTCGCCAAACGGCACGTTTTTGAAGACGCCAGGTTTGGTGTCTTTCACATGTACCGCGACGATATGCCCGATTCCGGCCTGCAACTCCATCTGCACATCGTTGTCCCACGCCGACAGGTTGCCGATATCCGGGTAGAGCTGGAACCACGGATTGTTGAGATAGTGCGCGTATCCCAGCGCCTTGCTGATGGAGCTCATCAACGGATAATCCATGATCTCCATCGCCAGCGTCACCTGCGCGCGGCTTGCCATCTCAACGCTCTCTTTCAGGCCGTCACGGAAACGACGACGCGTTTCGTTATTGGCTTCCTGATAGTAAACGTCATAGCCCGCCAGCTGGATCACGCGAATACCGACATCCTGGGCGAACTGGATAGCTTTACGCATAATCTCCAGCCCCTGCGCCCGCACCGCGTCATCTTCACTGCCCAGCGGGAAACGACGATGAGCAGAAAGGCACATGGACGGCACGCGCACGCCGGTTTCAACAATCGCATTGACCAGCGCCAGACGCTGCTCGCGGCTCCAGTTGAGGCGCGACAGGCGATCGTCAGTTTCATCTACCGACATTTCGACAAAATCGAAGCCTAACGTTTTTGCCAGTTGCAGGCGTTCCAGCCAGCACTCCCCGGCGGGGAGCGCTTTTTCATAGATGCCAAGCGGGATTTGTTTGGACAACATATCCGCTCCTTAGCCCCACAGTTCAGCGATGGAACGTTTGAACTGACGTGCGGCTTCCACCGGAGAAGCGGCATCACGGATACTACGGCCCGCGATAAAGACGTGAATCGGAATACCCTTGAACAGCGGCAGATCTTCCAGCGCCAGGCCTCCGGTGACGGTGACTTTGAAGCCCATATCGGAAAGACGTTTGATCGCGGTGATGTCCGCTTCGCCCCACGCCACGCCTGCGGCCTGCGCGTCACGGCTGCGGTGATAAACCACCTGCCCAATGCCTGCATCGCGCCACTGTTGCGCCTGTTCCCAGGTCCAGTAACCGGTCAGTTCGATCTGCACGTCGCCGTTAAACTCTTTTGCCACGTCCAGCGCGCCTTTGGCGGTGTTGATATCCGCACAGCAAATTACCGTCACCCAGTCAGCGTTGGCTTCGAAGCACATACGCGAAAGGATTTTGCCTGCATCGGCAATTTTGGCGTCTGCCAGTACGATTTTGTGCGGGTAGAGCGCTTTCAGGTCACGAACCGCACGCACGCCTTCGCCCACGCACAGAATGGTGCCCACTTCGATAATGTCGACTTCTTCGGCAATCAGGCGAGTGGTTTCGTAGGCGCTATCCATAGTCTGGTTGTCCAGCGCGACTTGCAACATCGGTAATGACATGTTCAATTCCTTCTTAAGCTGCCGCGTTGGTGCGGTCAATGAGATCCAGTACTTCCTGCTCGGTACGGCAGGCGCGTAAACGGTCAAAATTCTCTTCATCTTCAAACAGGTTGACGATCTGCATGATGCCCACTTCCTGGTGAGTATTGGCATCGACCGCCGCCATGGTGATGAGGATATCCACCGGATCGTTGTCATCGTGGTTGAACTCCAGCGGCTTTTTCAGCGTTACCAGAGAGAAACCGGTTTTCTTAACGCCCTCTTCCGGACGCCCGTGCGGCATTGCCAGGCCCGGAGCGATAACGAAATAAGGACCGAACTGTTCAACGCCATCCAGAATCGCCTGGTAGTAACGCGGCTCTACCACATCTGCCGCCACCAGCAGGTCAACGCCGATTTTCACCGCTTCCTGCCATGTCTCTGCTTCAGCCTGCAGGCGGATGGATTTATTTTCCGCCAGCGAATCACGTAATTTCATGGCGCGTCCTTACTTCACATCCTGCGGGAAATGCTCTTTGATCACTTCCAGCAGTTTCGGGCCAAAGTCGGCAGGAGAGAGCATGTTGCGCACGCCAACCACGTATTTGTTGCCGGTCACGGTGATTTCGCCCGCAATGTGCGTAGAAGCGATGATGATATCCGCGCCACTCAACTCGCTTTTGTACTCGCCAACCGCGCAGCTGTTTACCGTATGGTCAATGTTTGATTGGGTTAAAAACTGGTCCACTTTCATCTTCATGATCATGGAACTGCCTTGTCCGTTGCCACACACAGCCAGAATACGTACGGTCATAATCAAAACTCCTTATTAAGCAGACTGTTCTGCCAGTTGTTTTTCTGCATCTTCTTCTGCGCGCAGCGCGCGGCCAGCGAAGAACATATAAGCCAGTGCAATTACAATGATGACGGCCATAAAGGCGATACCGATGGAGAAGAAGCCTTGCATCATCGGCGGTGCCAGAATCGACCAGTCCGCCATGCCCATCCAGGCACTCATACCGGTGAGTTTCACCGCCCAGACGCAACCAAAGATTTCGATCATCCCCATCACCAGACAAATCTTCAGCGCCGCACGCCAGCCGCCGAAGTGGTTAGCGAACACGCCGATGGTGGCGTTAGAGAAGAACATCGGGATAAAGCCAGGAATAATCAGGATCGAGGAGCCGCAGGCGACCAGGATGCCAACCGCAATCAGCTGACCGATGGTGCCCCACATAAAGCCCCAGACCACGGCGTTCGGCGCGAAGCTATAGATAGCTGCACAGTCAATCGCCAGAACCGCACCTGGGATCAGGCGCTGGGAAATGCCGTTAAATGCTTCAGAGAGTTCCGCCACAAACATGCGCACACCCTGCGTGATGATGAAGATCGCCACCGCAAAGGAGAAACCAGTTTGCAGGATGTACACCGTCCAGTGCACTTTGCCTGCCATCGCCTGCACGGTGTCGATACCGAAGGAGAGCAGAATGGCACCAAAGAAGATGGTCATCACAATCGCCGTGGAGACGATGTTGTCGTGGAAAATGTTCAGCCAGCCCGGCAATTTGAGGTCTTCAACGCTCTCCTCTTTTTTGCCGAGGAACGGCGCGACTTTATAGGCAATCCATGATGCAAACTGCTGCTGGTGACCGATGGAGAAACCACAGCCATCCGTCACTTCCTGAGTCGGCTTGTACATCATGTTGGAAGTGATGCCCCAGTAGAGCGAAACCAGAATCGCGGTACAGATAATGGTGGTCCACATGGAGTAGCCGAAGATAAACAGCGTAACGGCAATCAACCCGGCCTGCTGGAACATGATGTGGCCGGTCAACATGATTGTGCGAATGCCGGTAATGCGACGCAACAGCACGTAACAGATGTTCAGCGCCAGCGCTAACAATACGGCGTAACCCACCCAGCTATAGGCATCGCCCATGCGGTCGATGGTTGCCATCATTGAAGCGTAGGTATCGGAAATTGCGCCGTTAATGCCGTAGACTTCGGACATTTTCGCCACCACCGGTTTGAAGGTGCTGGTGAGGATGCCGGACCCTGCCTGCAACAACATGAAACCAATTATGGTTTTAATCGTGCCTTTAATAATAACGCTGACACTTTTGCGCAGTAGGATGTAGCCCAGACAGGTCACAATACCCAGCAACAACGGGGCATTGGTCATGACCTGGTTAAAAAACACGGTAAAGATGTTGTAGAGGATCTCCATAACGATCTCCTGAAGAAGAGGTAACCGGGTATTCCACACACCCGGAATGTTGTGCACTCACTCTAATTTTCAAAAGTAATCACAACAAGATTATTTATGATTAAATGTGACGCGACCCGCAAATAATTCCACATGGATAGAGTGGGATTCACACAACGAAACAATTATCCATTTAAATTCATTTAGTTAATTGAATTACACATTCCATTAAATCTTTCCATACTCAAAATCAACAACAACTTCCCTGGCAAAAAGTCAAAAAATGGTCATTGCCTTCCTGAAAAAATAGTGTCAGGATTAATCAAAACCAATCACATATTGATTCGAATTGAACTATCATGAAGGTAAATGGCGATGAGTAAAGTGAAAAGTATCACCCGTGAATCCTGGATCCTGAGCACTTTCCCGGAGTGGGGTAGCTGGTTGAATGAAGAAATTGAACAAGAACAGGTCGCTCCTGGCACATTTGCGATGTGGTGGCTTGGCTGCACCGGGATCTGGTTGAAATCGGAAGGTGGCACCAACGTTTGCGTTGATTTCTGGTGCGGCACTGGCAAACAAAGTCACGGTAACCCGTTAATGAAACAGGGTCACCAGATGCAGCGCATGGCTGGCGTGAAAAAACTGCAGCCAAACCTGCGTACCACCCCGTTTGTTCTTGATCCGTTTGCGATTCGCCAGATCGACGCGGTACTGGCGACTCACGATCACAACGATCATATCGACGTTAACGTCGCTGCTGCCGTGATGCAGAATTGTGCAGATGACGTACCGTTTATCGGACCGAAAACCTGTGTGGATTTGTGGATTGGCTGGGGCGTACCGAAAGAGCGTTGCATCGTGGTCAAACCGGGCGATGTAGTAAAAGTGAAAGACATTGAAATTCATGCGCTTGATGCTTTCGACCGTACTGCACTGATCACCCTGCCTGCCGATCAAAAAGCGGCTGGCGTACTGCCAGATGGCATGGACGATCGCGCGGTGAACTACCTGTTCAAAACGCCTGGCGGCTCCCTGTATCACAGCGGCGACTCCCACTACTCTAACTATTATGCGAAGCACGGTAACGAACATCAGATCGACGTGGCGTTAGGATCGTACGGCGAAAACCCGCGCGGTATCACCGACAAAATGACCAGCGCCGATATGCTGCGTATGGGTGAAGCGCTGAATGCGAAAGTAGTGATCCCGTTCCACCACGATATCTGGTCAAACTTCCAGGCCGATCCGCAAGAGATCCGCGTGCTGTGGGAGATGAAAAAAGATCGCCTGAAGTATGGCTTCAAGCCGTTTATCTGGCAGGTGGGTGGCAAATTTACCTGGCCGCTGGATAAAGACAACTTCGAGTACCACTATCCGCGCGGTTTCGATGATTGCTTCACTATTGAACCGGATCTGCCGTTCAAGTCATTCCTGTAATCTGTCGTTTATACCGGGTAGCGTGGAGAAGTTCGCGCTATCCGGCGTAGTATTTCCTGGCTATTTCAAATATCATCTAAAAAAATCAAATTTTATCGGAATAGCTCATGACTGAAGCACAAAGACATCAAATCCTCCTGGAAATGCTCGCACAATTGGGCTTTGTGACCGTTGAGAAAGTCGTTGAGCGTCTGGGAATTTCGCCTGCCACTGCGCGACGCGATATCAATAAACTTGACGAAAGCGGCAAACTGAAAAAAGTGCGCAATGGCGCAGAAGCTATTACCCAACAGCGCCCGCGCTGGACGCCGATGAATCTGCATCAGGCGCAGAATCACGATGAAAAAGTACGTATCGCTAAAGCGGCCTCGCAGCTGGTTAATCCGGGCGAAAGCGTAGTCATCAACTGCGGCTCCACCGCGTTTCTGCTTGGGCGGGAAATGTGTGGCAAGCCAGTGCAAATCATCACTAATTATCTACCGCTGGCAAATTACCTGATCGATCAAGAACATGACAGCGTGATCATTATGGGCGGACAGTACAACAAAAGTCAGTCCATCACTTTAAGCCCGCAGGGCAGCGAAAACAGTCTCTATGCCGGGCACTGGATGTTTACCAGCGGAAAAGGGCTGACCGCAGAAGGGTTGTATAAAACCGATATGCTGACAGCAATGGCAGAGCAGAAGATGCTGAGCGTGGTAGGGAAACTGGTGGTACTGGTTGATAGCAGTAAGATTGGCGAACGCGCGGGAATGCTTTTTAGCCGTGCCGATCAAATCGATATGCTTATCACCGGCAAAAATGCTAACCCGGAAATCCTGCAACAACTGGAAGCGCAAGGGGTCAGCATTCTGCGTGTTTAAAGATGCTGGCGGAAAAATGTCACGGCAGCATCTAACGCCTCAGGCGTAATGCGGTGACGCACGCCTGGCTGCCATGAACAGGTTAGCAGTTTATCCCGTCCCGTCTCGCTTAAGGCCTGCTGCAAACGTAGTGATTCGTCGGCAGGCACAACGTCATCGTCGAGGCCATGCCACAGCAGTAGAGGTCTGTCACTAAGTTGTTCCAGGTGGTTTGTCGCTTCCCACTCTGCCAGTGGCGCGACAATGTTATTGAATTCATTCTGCTGTGCTGCCGTTTCAGGTATCAGCGGTGGAAACAGTGAACGGGCGAGTGATGTAAAATAGCCCGATCCCATCATGCTGGCGGTACATCTCACCGTGGGGTGGCGAGCGGTAATCCCCAGTGCCGTCATCGCGCCCATCGACGCGCCACCGACTGCCAGACGGTCATCAAGCAGCCAGTTTTCTTCGGCTATTGCCGCACGTAAAGTAGTGAATTCCTGCATACTTTGTAGCAAGATTTGCCAGAATTGATTTAACCGCCGCGCTGCGTCACCACTAAAACGGCTACCGTGATCGGGCGCATCCGGCATGATCACCCGCAAACCAGCCTGCGCCAGCGCAACGGCAAAATAGCTATACACCAGACTGGATGAAGTAAAGCCGTGATAAAAAATTACGCACGGTAACGGGGTATCTTTTTGCCCGACAGGATAAGCATGAAGAACGGGAATATCTGCCAGCTCGCGTGATTCTATTTCAATCATCTGTCTCTCCTTTTTGTCTGGTATGACATGAAGAAATCCTTAGCACAAGTTTCTGCTGGCGGTATTAGTTCATAATGTTGAGATGTGGGTTACGCTTTCGTCACGTTTTCATTCGAAAATCGCGTACCAGGTAACAATTCGGGAACATTCCCCCAAAGTTAAATTAATAAGGCACTACACTATGGTTAGCAGGAAACGTAATAGCGTTATTTACCGGTTTGCCAGTTTATTATTGGTGTTGATGTTAAGTGCCTGTAGCGCACTGCAAGGTACGCCACAGCCAGCACCGCCAGTAACGGACCATCCGCAAGAGATTCGCCGCGACCAGACGCAAGGGTTACAGCGAATAGGTAGCGTAAGCACGATGGTTCGGGGTTCTCCGGATGACGCATTAGCAGAAATCAAAGCGAAAGCTGTCGCTGCAAAAGCTGATTATTACGTTGTCGTCATGGTGGACGAAACCATCGTGACAGGACAGTGGTATTCACAAGCCATTTTGTACCGTAAATAAATCAGACAACTTACATAGTCAGCTTTACATTGCTTTGCGTCCCTATGCGTTTACCTGTGCACAATAAATTACCGGCTGGAAGCCTGGGCGTTTATTACCGCGATGGAATGCCCTGCAACATGTGGGGAAACGAAAAATGGAGCTGACGATGAAACAATTACTTGCCTCACCCTCGCTGCAATTAGTGACTTATCCTGCGAGCGCCACGGCACAGTCTGCAGAATTCGCCAGTGCTGACTGCGTGACGGGCCTGAATGAAATAGGTCAGATCTCAGTTAGCAATATTTCAGGGGATCCGCAGGACGTGGAACGTATCGTGGCGTTAAAGGCCGATGAACAAGGTGCGTCATGGTACCGCATTATTACCATGTACGAAGACCAGCAGCCTGATAACTGGCGCGTACAGGCGATACTGTATGCATGAGTGTCATCTACAGCAAATGAACAATGCCGGATGCGGCGCGAGCGCCTTGTCCGGCCTACAATTTTCACATATTTCATTTAGTTAATCGAAACCAGCGTCGCATCAGTCGTATACGCTTACACACACACTCCCCCCGTCGCCCGCAGCAATAAATCATTCTGGATCTGCTCTGACAACCGTACGCCGCCGCGCGTATCTAACATCACCTGACACCACGCCTGCGCCATTGGCGGAGAAGCATATTTCAACATTTGCGCACCGCAGCCCAGCAGGAATAGCTGATGAGTAATCTCTCGCCCCAGTTCTTCAGCTGGCTTACGCAGCTGCTGCTGTAAACGACGAACCGCGCGATCAAAATAGCGATCCTGCCCTTTCACTTCCACAAATGCTTCCGACAATAAGTCGTATACGCCCGCTTGCTTATTGAGAACGCGCAACACATCCAGGCACATAATATTGCCGGAACCTTCCCAAATACTGTTTACCGGCATCTCCCGGTAAAGCCGCGGCAGCTCACTCTCCTCGCAATAACCAATGCCGCCCAGCACCTCCATCGCTTCGGCCACAAACGGCATACCTCTTTTGCAGATCACAAATTTTGCCGCAGGCGTAAATAAACGCGCCCACAGGGCTTCTTTGGCATCGGCACGCCGGTCCCACGCTCGCGCAAGACGAAACAGCAACGCCGTTTGCCCTTCAAGCTGAAGTGCCATGCGACTTAAGACATGACGCATAAGGGGCTGTTGGATCAATGGATTACCAAAAACATGGCGTTGATGTGCATGATAAATCGCCAGCGAAAATGCACGGCGCATCATGGCATGGCTACCCAGGGCGCAATCAAAACGCGTCATCCCACCCATTTTCAGGATCAGACGAATTCCTTCCCCTTCCAGCCCCAACAACCAACCAATGGCATCCTGAAACTCCACTTCGCAACTGGCGTTAGAGCGATTACCCAGCTTATCTTTCAGCCGCTCGAGGCGAATCGCGTTGCGTTGCCCGTCAGGCAAAAAGCGCGGCACAAAAAAGCAGGACAGACCACCCGCGGTCTGCGCCAGCACCAGATGCGCATCGCTTTGCGGAACCGAGAAAAACCATTTATGCCCCACCAGCCGATAAGAGCCATCTTCCAGACGCTCTGCACGGGTGGTGTTGCTCATAACATCGGAACCGCCCTGCTTTTCCGTCATTCCCATGCCAATCAACAAACCGCGTTTTTGCCCACCTGGCAATAAGTGAGAATCGTAGCGATCGCTCAATAGCGGCGTGGTCCAGTCCTGAAACGGCGCGGGTAACATCTGTAACAACAATGGCGTGGCGGCAAAGGTCATGGTTATCGGACATAACGACCCTGCCTCAACCTGCGCATGTAACATAAAACGCGCCGCGCGCGCCACAAATGCGCCGGAGCGAGCGTCTTCTTCCCAGGCAAGATTGTGCACCCGATTGGTACATAGCGCCTGCATCAGCAGGTGCCAGGCGGGGTGAAAACGCACATCGTCCAGACGGCGTCCTTGCGCATCGTAGCGCAATAATTCAGGCGGATTCACATTCGCCAGCCGCCCCAGTTCAAGGGATTCAGCCGTTCCTAACTGCTGACCAATACTGGCTAGAAAATCGCTATCCCAGCCAGCACCTTCACGCGTTACCGCTTCGCAGAGCGCGCCATCAGACAGGTATAAGTTGCTGTTATTTAATGGTATAGGTTGATTAAAAACGGTGTGAGTTTGCCAGTGCACTGTGTCTCCCTCCATCAATGGCAGTCACCATTAGTATGGTCACTGCCATGGATTCATGACAGAAAGAGATTGCTAAAACATTCTTATCCAGCAATGTAATGCGGAACGAAACGTGAGGTGTCTTTGGTGATCAGTGTGTTATCTTCACGGATCCCCATTCCGCACGCTTCATCATCGACAATCCAGCTACCGATGAGTGTGTAGCTATCGCCAAACCGCGGCAGAGGTTGAAACGCCTGGTAGATCATCGGTTCATCGGCGTAATCACCATCAGCGTGGTCAACAACGTTATTCTTGCCGTCAAAAATGGTGACGTTGCCGCCTTCGCGCGAATAGATTGGTTTACGCACATAGCTTTCGCCAGCGGCAATCTGCGGTTTCTCGCCATCGAACCAGGACGCAAGAAGATTAGGATGACCAGGGAAGAAGCGCCAAAGCAGCGGCATTAGCCCTTTATTACTCAAGATACTTTTCCATAACGGCTCCACCCATTGCTCGCGACGCTTGCGCAGCAGCGGACCGTTATCGTCACGCATCATCCACTCCAGCGGATACAGCTTAAATGCACGCTGGATGACATTATCATCAAGATCGGTCAGTACGCCGCCGACGCCCAAACCGAGATCTTCAATGTAGATAAACCGCGACTCCTGCCCTGCCTGCTGGGCGCAGTCCTGCAAGTACAGCACGGTACTCCTGTCTTCATCGGTGTCCTGACAGCAGCAAAAATAAAACGGTTCCCGACTGTAAAGCTCGCTAAAGCGTGAAATCAGGCGTTCCTGAATAGCATTGTACTGATCGGCATCACGCGGAATAATGCCGCTGCGCCGGGCATCCTCCAGCCACAGCCACTGGAAATAAGCCGACTCGTACAATGAAGTTGGCGTATCGGCGTTGTACTCCAGCAGCTTCACCGGCGCATTGCCACACCAGGCAAAATCCATCCGTCCATACAGCGAAGGATCGCGGGCGCGCCAGCTTTCAGCGATCACATCCCAGTACAACGGCGGGATCGCCAGTTGCGTCAGGATCTCTTCATCTTTTACCGCGCGATCCACCACCTCAAGGCACATCTGATGCAGTTCCGCAGTCGGTTTTTCGATCTGCTCTTCAATCTGGCGCAGAGTAAAACGGTAAGCCCGACTCTCATCCCAATAGATTTCATTGTCGATGATATGAAAGTCGAAACCGTTGTCAGCGGCGATCTGGTCCAGATCCCGTCGCACAGGAACGTTGTGTCTCAGCATGATTAGCCTCCCCAATGCCCACGGGCGCTGGAAGAACGACCATAGCCGCCGCGAGAAACGGTAGACGCTTTTTTGGTGGTAAAGCCGCCCGAAGAGTAAGACTCTTTTTTGCCAGAACCGGAGCGCCAGGAGTAATCACCAGAAGTGCTGCGCCAGACTGGGCGCGAAGCAAAAGAGGAACCGCCGCTGTTATAAACAAACGGCTCATCGCGATCTTTACGAATAACCCGGCTTAACAAAAATCCAGAAACGACCGGGATCCAACTCTGTTCAACATTGTCGTAGTAGCAATTTTCGTACTTAGACTGACAGTTCTGCTGAGTCATATTCTTCGGAACATCGGCATAAAATGCCGTTTTGGCGTTGTTCCAGCCACGCGCGCAAATGTCGGCATTATTACCGTCATCAATACAATCCTGCACTGTCGCGTAAAACGTTCCGTCGCCGTCGTTATCGACATCGCTACTATCGCTACAACCTTTCAATACGAAAAAAGCGGCACCGCCCATTAGCGCCAACGTTAAGTATTTTTGTGCGTAGCGATTGCGGCACGGTTCAAAAGGATTATTCGGTCTACCAATGCGACTAATCGCTCCGTGACCAATTTTACTGTTATTTCTTGATTTGCGTTTTCTGGCCATGATCGTCCATTACCATGTCATACAAGCTGCGTTAAAAATACCGCCAGCCAGCGCGGCGGTTCCCATAAACATTCCTGCTGCGGTATTGTGATTAATAATTTTTTCGCTTAATGCGGGCATATAAAGTCTGACACCAGCAAAAACGAGTAACTGAATCACCAGTGCAATCCCGCCCCAGGCGAAATAGTCTGGAATACTCACCGCATTGATTGCCGCACTGGATAAGGGGATAACGTAACCCAACAATGTACCGCTGAACGCCAGTGACGCCGCGGTATTATTGTTTTTGATTAACTGCCATTCGTTGTGCGGTGTAATTTTAGAATAGATAAACAGAAAAATAATCACCATAGCCACGCCAATAAAAAAATAGGCGCTAAAGGCAAGAAGTGAATCCAGTATATGCATTGTAAACGTCCTTGTAATTAACCAATAATATGAAGTGATGTCAGCGGGATATCGACACCCAACGCGCGCGAAAATAACCACTCTGGCTCGCCGAGATCGTTAAAAGATTCCTCACCATTTAGCAGCAAATATTCGTAAGTATCTTCGGTTACTTGCCGTTGGTAGCCCATGGTAAAATTATGGACCTCCCATTTGGCATGGTTTTGATTTTCTACTTTTTCCAGCATGTAGACGGGTTCGATATTTCCCGGTTCTTCGCTGTTAAAAAATCGCTGCCAGCGTTTTTCCTGCCAGTTTAAGGTCATTGCTCCCATCGCTTTGGCGTTGATCGCCTCGCGCCAGTGACTCTCTTTACTGATACCGTAGCTCTCTTCATAGACAAAGAGCTTGATATCATCAATGTCATCAATATCTTCGCCGCCGGTGGTATTGATTTGCAGAAACTCATCGCCTGAAGTGTAGTAGCGGAAAATCTGACTACCGCCGCCCAGATCGATGTGGCTGACGGCGGCTACCGTAAATTCCTCACCCGGCAGCGCAATCAGCAATTCATCTTCCAGCAAACGAAACGCTAACGTGTCGAGCGTAAAGCCACTATTGAGATGAAGTCCCAGCGGACCACGAGCGATAGCTGGCTTATTATCCTTGCCAAACAGACGCTGGAAAAAACCAGACATATTTTATCCTTAAAATAGTTGCCAGCCTTTTAGCGAGGCTGGCAAAAAAATTACTCGCCCTGTTGGCGTTGCAGTCTTGCTAATACATCCTGGGCGCTACTTTTATTGCTACCGCCAATTCCGGCTTCCGCCAGCTTTTCGTCAAGGTCGCGACCGTCTGCGACTTTCTCCAACTGTGCGGCAGCATCCAGGCGAGCCTGACGTTCGGCCTGACGCGTTTGCAGGCGTTTTAAGGATTCTGCCGCTGTCGAAACGCTGGAAGATGCGCCAACGGTAGAGGTTGTTACAGCCTGTTGTGCACGCTGCATGGCTTCAGTGGCTTTAACGACTTCCATTTGCTGCTCAAACTGAGCGATACGCTGCGCTGTCGCTGTAACTGCTTTTTCCACGCCATCACGGGAGACTTCCAGATTCGACAACACTTGCTCTTCAGCGGTAATGAGATTCTCAAGGCGCGCGATTTCTTCAGCAACTTCGTTAATCAACGACGGATTAACGTTCTTGCTTAACGCTTCCAGCGCACGGGCTTCCAGACTGGCTTTGCGCTCACGTAAATCTTTCAGCTTATCGTGACTTAATTTCACCCGCGCCAGCAGATCAACGCGAGATTTTCCGGCTTTATCGAGTTCAGCTTTAGCGTCACGAATATGCTGTTCCAGCATGCGCACGCCCTGGGTTTCTTCAATGGATTCTTCCGCCTGGGAGATAAACGATTTCCCCAGCGTAAATAAACTTTTTAAAATTCCCATAAATACCCGTCCTTGTAATTAGTGTGAATATTCTTCTGTAATTTCAGCCAGATCCAATGCGTTATCTACCAGCGAGGTTATCTCAAGCAGGATATCTTCAAGAGAAGATTTAAGCGATAACGCCCCGAAAACAATGTAATACTCTTCCTGTTGTACGCTGGAGATCCCTACCGATGACAACGGCATCATCTTCTGATTTCTTAATAAGAAGGTATTAAATTCATCTGGATTGCTGATACTACTCACCGGACAAATAAAGGTTTCGATAATCATTTGGCGGGAAGTAAAAAGAATATTGATTTGCAAATCGCCATAATCATTCATTTTAATAATTAATGCGTTCTCGCTATTTGTTACATCAATATTTTGTTCAGGTACAGTTTGCAGCGCCGTCGCTAGCGCCAACGGATTCCATGTCATATATTATTCCATATAGATTAAGTTTAAATATTAATAAAATGAATATTTGCAATACGTAATTATCTTACCAGCTATAGACAAAAAAAAACCATCCAAATCTGGATGGCTTTTCATAATTCTGAGAAATTAGCTGCGCTGGCGCACCGCTTCAAATAAGCAAATTCCGGTCGCAACCGAAACGTTCAGGGAAGAAACGCTTCCAGCCATCGGGATGCTGATCAACTCATCGCAATGTTCACGAGTCAGGCGACGCATACCTTCACCTTCCGCACCCATCACCAGCGCCAGGCGTCCGGTCATTTTGCTCTGATAGAGTGTATGATCCGCCTCGCCTGCCGTACCGACGATCCAGATATTCTCTTCCTGCAACATACGCATGGTGCGCGCAAGGTTAGTCACCCGAATCAGCGGAACGCTTTCTGCCGCGCCGCAGGCTACTTTTTTCGCCGTAGCGTTGAGCTGTGCGGAGCGATCTTTCGGCACAATCACCGCATGAACGCCTGCGGCGTCCGCACTACGCAGGCACGCGCCGAGGTTGTGCGGATCGGTTACGCCGTCGAGGATCAGCAGGAACGGTTGATCGAGCGAAGCGATCAGATCCGGCAGATCGTTTTCCTGATACTGACGTCCTGGCTTCACGCGGGCGATAATGCCCTGATGCACGGCACCGTCGCTTTTCTCGTCGAGATATTGGCGGTTTGCCAACTGGATAACCACGCCCTGAGACTCAAGGGCGTGAATCAGCGGTAACAGACGTTTATCTTCACGGCCTTTTAAAATAAAGACTTCCTGAAAACGTTCAGGGGCGCGCTCCAGCAGGGCCTGCACTGCGTGGATGCCGTAAATCATTTCGCTCATTAATGTACTCGTTGTTTACGTTTTGCCTGATGCACTACGTTTATCCGGCCTACATGATCTCTGCAATATATTGAATTTGCGTGATTATGTAGGCCGGATAAGGCGTTCACGCCGCATCCGGCATGAACAAAGCGCATTTTGTCAGCAATCTAACCCTCTTCTTTTAAAGAGGGTATTGATCACTCTGCCACTTTTTTCTTCGCCGCACGCTTCGCTTTGGTCGCTGCAGCTATTTTCTGCGTTTTCGCCGATGGCTTTTTCGCTTTTCTCGCGTCTTTCTTCGCCGCTTTCGGCTTCGTTTTTTTCTCACCGCGGAAGGCGCTGTCTGGCTCAAAGTTTACCTTTTTACCGACCTGACGACGCTTGCCGCCTTTTTTACCTGCATCGCCTTTTTTCGCTTTCTCGCGCGCCGTTTTACCGACGTTGCGCGGTGCGCGTTCGCTGGAGATCAGGCTAAAGTCGATTTTGCGCTCGTCCATATTAACCGCTTCGACGCGAACTTCCACGCGATCGCCCAGGCGATAAGTCTGGCCGCTGGATTCCCCCATCAGGCGTTGCCCTACCTGGTCAAAGCGATAGTAGTCATTGTCCAGCGAAGAGACATGGACCAGACCATCAATGAACAAGTCGTCCAGACGGACGAAGAAGCCAAAGCCAGTGACGCTGGAAATTACGCCTTTAAAGACGTTACCTACCTGGTCGAGCATGAAGTCACACTTCAGCCAGTCAGCCACATCGCGCGTTGCTTCGTCGGCACGACGTTCCGCCATCGAACAGTGCTGACCCAGTTGCAGCATCTCTTCCATCGAATAATGGTAGCCGCCGGTTTCAGTGGTGTTGCCCTGATGCCCCTGCTCTTTCGCCAGCAGATATTTAATGGCGCGGTGCAGCGTCAGGTCTGGATAACGACGAATCGGCGAAGTAAAGTGCGCATAGGACTGCAATGCCAGGCCAAAGTGACCACGGTTTTCTGGATCGTAAATCGCCTGTTTCATCGAGCGCAGCAGCATGGTTTGCAGCATTTCTGCATCAGGACGATCGGCAACCGACTCCAGCAGCTCCGCGTAGTCACGCGGTTCCGGCTTGTTACCGCCCGGCAGTTCCAGCCCCAGCTCCGCCAGCACTGAACGGAAAGAGGTAATCGCTTCGGTGCTCGGCTTGTCGTGAATACGGAACAGTGCCGGTTCTTTCGCTTTCTCAACGAAACGCGCCGCCGAGATATTCGCCAGAATCATGCACTCTTCAATTAATTTGTGCGCGTCGTTACGCTGGGTCTGTTCGATACGTTCAATACGGCGTTCAGCGTTGAAAATGAACTTCGCTTCTTCGCTCTCAAATGAGATCCCACCGCGTTCTTCACGGGCTTTATCCAGCACTTTATAGAGGTTATGCAACTCTTCGAGATGCTTAACCAGCGGGGCGTACTGCTCGCGCAGATCCTGATCGCCCTGCAGAATATGCCAGACTTTGGTGTAGGTCAGACGCGCGTGAGAGCTCATCACCGCTTCGTAGAATTTGTAGCCCGTCAGGCGGCCTTTCGACGAAACCGTCATCTCGCACACCATACACAGGCGGTCTACCTGCGGGTTGAGCGAACACAGGCCGTTAGAGAGCACTTCCGGCAGCATCGGGATAACCTGCGAAGGGAAGTACACCGACGTGCCACGGTTACGCGCTTCTCTGTCCAGCGGCGTTGACGGACGCACATAGTAGCTGACGTCGGCAATCGCGACCCATAAACGCCAGCCGCCGCCGCGTTTTTTCTCGCAGTAAACTGCATCGTCAAAGTCACGGGCGTCTTCGCCATCAATGGTGACCAGCGGTAAATCGCGCAGATCAACACGGCCCGCTTTTGCTTCTTCCGGCACTTCTTCTTTCAGCCCGGCAACCTGTTGCTCAACAGCCTGCGGCCAGATGTACGGAATTTCATGGGTACGCAGAGCGATATCAACCGCCATGCCGGTGCCCATATTGTCGCCCAGCACTTCGACGATTTTACCCACCGCTTTGGTGCGGCGAGTCGGACGCTGAGTCAGTTCGACTACGACCACAAAGCCCATCCGCGCGCCCATGATCTGATCGGGCGGGATTAAGATATCGAAGCTCAGACGGCTGTCGTCAGGAACCACAAAGCCGACGCCCGCTTCGGTAAAGTAGCGACCAACAATCTGGCTGGTTTTTGGCACCAGTACGCGGACAATACGCGCTTCACGACGACCTTTACGGTCAGCACCCAGCGGCTGAGCCAGCACCTGATCGCCATGAATGCAGGTTTTCATCTGCTCGCTGGAGAGATACAAATCATCTTTACGCCCTTCAACCCGCAGAAAGCCGTAGCCATCACGGTGGCCAATAACGGTACCTTTCACCAGGTCGAGGCGTTCCGGCAGCGCATAGCACTGACGACGAGTGAAGACCAGTTGACCATCGCGCTCCATCGCGCGCAGGCGGCGACGCAGGCCTTCAAGCTGCTCTTCGCCTTCAATGTGCAGTTCTACCGCCAGCTCATCACGGCTGGCCGGTTTTTCACGTTTGGTTAAATGTTCGAGGATAAATTCCCGACTAGGGATGGGATTCGCGTATTTTTCAGCTTCGCGTTCCTGGAAAGGATCTTGTGACATCTCGGTTCCTCCGTTGTCATCTCTGATGAAGATTTTCGTCACTCCACCAGCAATAATTTATAAAGCGGTTGATTCTCTTCAACCAAATCGGCAAGCGTGTAGTTATCCAGTTCCGTAAGAAAACTTTGCACGGCCTTAGAAAGTGCCTGTTTCAACCTACAGGCAGGTGTAATGTGGCAAAACTCACTGCTGCAATTCACCAGCGATAAGGGCTCCAGCTCGCGCACCACATCACCAATACGTATCGCACTCGCCGGTTTACCCAGGCGAATGCCGCCATTTTTTCCACGAACAGCAGTCACGTAGCCGGCACGACTAAGTTGATTGATTATTTTGACCATATGATTACGGGAGACGCCGTAGACGTCAGTCACTTCAGAAATACTGGTCATCCGCCCTTCTGGCAATGACGCCATGTAGATCAGCGCACGTAATCCGTAATCAGTGAAACTCGTTAACTGCACATCAACCTCAAAAGGGAAATCGGGAAAAAATACATTTATATTGATGATAAACCAGCCACAAGCTGTGTCGCTAATTTATTTCAGTTCGGGGAAGGAAAAAAGCGAGGATTTAACACTGTGTCGGATAGCGTAATGGCTTACCCGACACAGCAATGTATCACACTGTTCGCCCGGCAGGCAAAATATCTGCCAGGCGTACCAGAATTACGCGTCGAACGGGTCGCGCAGAATCATGGTTTCAGTACGATCCGGACCGGTAGAGATGATATCGATCGGCACACCAGTCAGCTCTTCAATACGCTTGATATAGTTCAGCGCCGCCTGCGGCAGGCCGCTACGATCTTTCACGCCGAAGGTGGATTCAGACCAGCCCGGCATGGTTTCGTAAATCGGCTCTACACCTTTCCAGTCGTCAGCTGCCAGCGGAGTGGTAGTCACTTCGCGACCATCCGGCATACGGTAAGCCACGCAGAGTTTAACCTCTTTCAGGCCATCCAGAACGTCCAGTTTAGTCAGGCAGAAGCCAGACAGGGAGTTCAGCTGTACCGCACGACGAACGGCAACGGTGTCCAGCCAGCCGGTACGACGACGACGCCCCGTAGTTGCGCCGAATTCGTTACCCTGCTTGCAGAGGAACTCGCCAGTTTCATCAAACAGTTCGGTCGGGAACGGACCTGCACCTACACGAGTGGAGTAAGCTTTGAGGATACCCAGAACGTAATCAACATAACGCGGGCCCAGGCCGGAACCGGTCGCCACGCCACCAGCAGTGGTGTTGGAAGAAGTTACGTACGGATAAGTACCGTGGTCGATATCCAGCAGCGTACCCTGCGCACCTTCAAACATGACGAAATCGCCACGCTGACGCGCCTGGTCGAGCAGGTCAGAAACGTCAACCACCATAGAAGTCAGGATGTCGGCAACAGCCATCGTATCATCCAGAACTTTCTGGTAATCAACCGCTTCAGCTTTGTAGTAGTTAACCAACTGGAAGTTGTGATATTCCATCACTTCTTTCAGTTTTTCAGCGAAGGTTTCTTTGTCGAAAAGGTCGCCAACACGCAGACCGCGACGTGCTACTTTATCTTCATAAGCAGGCCCGATACCACGACCGGTGGTGCCGATCGCTTTCGCGCCACGCGCTTTCTCACGCGCGTTATCCAGCGCAACGTGATAATCAAGGATCAGCGGACATGCTTCAGACAGCAGCAGACGCTCACGAACGGGGATGCCACGGTCTTCCAGTTCTTTCATCTCTTTCATCAGCGCGGCCGGAGACAGCACAACACCGTTACCGATGATGCTGGTTACATTCTCGCGGAGAATACCTGATGGAATAAGATGGAGAACGGTTTTTTCACCGTTGATTACGAGAGTATGGCCTGCGTTGTGACCGCCCTGGT +>419433556 +ACCAGGGCGGTCACAACGCAGGCCATACTCTCGTAATCAACGGTGAAAAAACCGTTCTCCATCTTATTCCATCAGGTATTCTCCGCGAGAATGTAACCAGCATCATCGGTAACGGTGTTGTGCTGTCTCCGGCCGCGCTGATGAAAGAGATGAAAGAACTGGAAGACCGTGGCATCCCCGTTCGTGAGCGTCTGCTGCTGTCTGAAGCATGTCCGCTGATCCTTGATTATCACGTTGCGCTGGATAACGCGCGTGAGAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGGCCTGCTTATGAAGATAAAGTAGCACGTCGCGGTCTGCGTGTTGGCGACCTTTTCGACAAAGAAACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTTAACTACTACAAAGCTGAAGCGGTTGATTACCAGAAAGTTCTGGATGATACGATGGCTGTTGCCGACATCCTGACTTCTATGGTGGTTGACGTTTCTGACCTGCTCGACCAGGCGCGTCAGCGTGGCGATTTCGTCATGTTTGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTACTTATCCGTACGTAACTTCTTCCAACACCACTGCTGGTGGCGTGGCGACCGGTTCCGGCCTGGGCCCGCGTTATGTTGATTACGTTCTGGGTATCCTCAAAGCTTACTCCACTCGTGTAGGTGCAGGTCCGTTCCCGACCGAACTGTTTGATGAAACTGGCGAGTTCCTCTGCAAGCAGGGTAACGAATTCGGCGCAACTACGGGGCGTCGTCGTCGTACCGGCTGGCTGGACACCGTTGCCGTTCGTCGTGCGGTACAGCTGAACTCCCTGTCTGGCTTCTGCCTGACTAAACTGGACGTTCTGGATGGCCTGAAAGAGGTTAAACTCTGCGTGGCTTACCGTATGCCGGATGGTCGCGAAGTGACTACCACTCCGCTGGCAGCTGACGACTGGAAAGGTGTAGAGCCGATTTACGAAACCATGCCGGGCTGGTCTGAATCCACCTTCGGCGTGAAAGATCGTAGCGGCCTGCCGCAGGCGGCGCTGAACTATATCAAGCGTATTGAAGAGCTGACTGGTGTGCCGATCGATATCATCTCTACCGGTCCGGATCGTACTGAAACCATGATTCTGCGCGACCCGTTCGACGCGTAATTCTGGTACGCCTGGCAGATATTTTGCCTGCCGGGCGAACAGTGTGATACATTGCTGTGTCGGGTAAGCCATTACGCTATCCGACACAGTGTTAAATCCTCGCTTTTTTCCTTCCCCGAACTGAAATAAATTAGCGACACAGCTTGTGGCTGGTTTATCATCAATATAAATGTATTTTTTCCCGATTTCCCTTTTGAGGTTGATGTGCAGTTAACGAGTTTCACTGATTACGGATTACGTGCGCTGATCTACATGGCGTCATTGCCAGAAGGGCGGATGACCAGTATTTCTGAAGTGACTGACGTCTACGGCGTCTCCCGTAATCATATGGTCAAAATAATCAATCAACTTAGTCGTGCCGGCTACGTGACTGCTGTTCGTGGAAAAAATGGCGGCATTCGCCTGGGTAAACCGGCGAGTGCGATACGTATTGGTGATGTGGTGCGCGAGCTGGAGCCCTTATCGCTGGTGAATTGCAGCAGTGAGTTTTGCCACATTACACCTGCCTGTAGGTTGAAACAGGCACTTTCTAAGGCCGTGCAAAGTTTTCTTACGGAACTGGATAACTACACGCTTGCCGATTTGGTTGAAGAGAATCAACCGCTTTATAAATTATTGCTGGTGGAGTGACGAAAATCTTCATCAGAGATGACAACGGAGGAACCGAGATGTCACAAGATCCTTTCCAGGAACGCGAAGCTGAAAAATACGCGAATCCCATCCCTAGTCGGGAATTTATCCTCGAACATTTAACCAAACGTGAAAAACCGGCCAGCCGTGATGAGCTGGCGGTAGAACTGCACATTGAAGGCGAAGAGCAGCTTGAAGGCCTGCGTCGCCGCCTGCGCGCGATGGAGCGCGATGGTCAACTGGTCTTCACTCGTCGTCAGTGCTATGCGCTGCCGGAACGCCTCGACCTGGTGAAAGGTACCGTTATTGGCCACCGTGATGGCTACGGCTTTCTGCGGGTTGAAGGGCGTAAAGATGATTTGTATCTCTCCAGCGAGCAGATGAAAACCTGCATTCATGGCGATCAGGTGCTGGCTCAGCCGCTGGGTGCTGACCGTAAAGGTCGTCGTGAAGCGCGTATTGTCCGCGTACTGGTGCCAAAAACCAGCCAGATTGTTGGTCGCTACTTTACCGAAGCGGGCGTCGGCTTTGTGGTTCCTGACGACAGCCGTCTGAGCTTCGATATCTTAATCCCGCCCGATCAGATCATGGGCGCGCGGATGGGCTTTGTGGTCGTAGTCGAACTGACTCAGCGTCCGACTCGCCGCACCAAAGCGGTGGGTAAAATCGTCGAAGTGCTGGGCGACAATATGGGCACCGGCATGGCGGTTGATATCGCTCTGCGTACCCATGAAATTCCGTACATCTGGCCGCAGGCTGTTGAGCAACAGGTTGCCGGGCTGAAAGAAGAAGTGCCGGAAGAAGCAAAAGCGGGCCGTGTTGATCTGCGCGATTTACCGCTGGTCACCATTGATGGCGAAGACGCCCGTGACTTTGACGATGCAGTTTACTGCGAGAAAAAACGCGGCGGCGGCTGGCGTTTATGGGTCGCGATTGCCGACGTCAGCTACTATGTGCGTCCGTCAACGCCGCTGGACAGAGAAGCGCGTAACCGTGGCACGTCGGTGTACTTCCCTTCGCAGGTTATCCCGATGCTGCCGGAAGTGCTCTCTAACGGCCTGTGTTCGCTCAACCCGCAGGTAGACCGCCTGTGTATGGTGTGCGAGATGACGGTTTCGTCGAAAGGCCGCCTGACGGGCTACAAATTCTACGAAGCGGTGATGAGCTCTCACGCGCGTCTGACCTACACCAAAGTCTGGCATATTCTGCAGGGCGATCAGGATCTGCGCGAGCAGTACGCCCCGCTGGTTAAGCATCTCGAAGAGTTGCATAACCTCTATAAAGTGCTGGATAAAGCCCGTGAAGAACGCGGTGGGATCTCATTTGAGAGCGAAGAAGCGAAGTTCATTTTCAACGCTGAACGCCGTATTGAACGTATCGAACAGACCCAGCGTAACGACGCGCACAAATTAATTGAAGAGTGCATGATTCTGGCGAATATCTCGGCGGCGCGTTTCGTTGAGAAAGCGAAAGAACCGGCACTGTTCCGTATTCACGACAAGCCGAGCACCGAAGCGATTACCTCTTTCCGTTCAGTGCTGGCGGAGCTGGGGCTGGAACTGCCGGGCGGTAACAAGCCGGAACCGCGTGACTACGCGGAGCTGCTGGAGTCGGTTGCCGATCGTCCTGATGCAGAAATGCTGCAAACCATGCTGCTGCGCTCGATGAAACAGGCGATTTACGATCCAGAAAACCGTGGTCACTTTGGCCTGGCATTGCAGTCCTATGCGCACTTTACTTCGCCGATTCGTCGTTATCCAGACCTGACGCTGCACCGCGCCATTAAATATCTGCTGGCGAAAGAGCAGGGGCATCAGGGCAACACCACTGAAACCGGCGGCTACCATTATTCGATGGAAGAGATGCTGCAACTGGGTCAGCACTGTTCGATGGCGGAACGTCGTGCCGACGAAGCAACGCGCGATGTGGCTGACTGGCTGAAGTGTGACTTCATGCTCGACCAGGTAGGTAACGTCTTTAAAGGCGTAATTTCCAGCGTCACTGGCTTTGGCTTCTTCGTCCGTCTGGACGACTTGTTCATTGATGGTCTGGTCCATGTCTCTTCGCTGGACAATGACTACTATCGCTTTGACCAGGTAGGGCAACGCCTGATGGGGGAATCCAGCGGCCAGACTTATCGCCTGGGCGATCGCGTGGAAGTTCGCGTCGAAGCGGTTAATATGGACGAGCGCAAAATCGACTTTAGCCTGATCTCCAGCGAACGCGCACCGCGCAACGTCGGTAAAACGGCGCGCGAGAAAGCGAAAAAAGGCGATGCAGGTAAAAAAGGCGGCAAGCGTCGTCAGGTCGGTAAAAAGGTAAACTTTGAGCCAGACAGCGCCTTCCGCGGTGAGAAAAAAACGAAGCCGAAAGCGGCGAAGAAAGACGCGAGAAAAGCGAAAAAGCCATCGGCGAAAACGCAGAAAATAGCTGCAGCGACCAAAGCGAAGCGTGCGGCGAAGAAAAAAGTGGCAGAGTGATCAATACCCTCTTTAAAAGAAGAGGGTTAGATTGCTGACAAAATGCGCTTTGTTCATGCCGGATGCGGCGTGAACGCCTTATCCGGCCTACATAATCACGCAAATTCAATATATTGCAGAGATCATGTAGGCCGGATAAACGTAGTGCATCAGGCAAAACGTAAACAACGAGTACATTAATGAGCGAAATGATTTACGGCATCCACGCAGTGCAGGCCCTGCTGGAGCGCGCCCCTGAACGTTTTCAGGAAGTCTTTATTTTAAAAGGCCGTGAAGATAAACGTCTGTTACCGCTGATTCACGCCCTTGAGTCTCAGGGCGTGGTTATCCAGTTGGCAAACCGCCAATATCTCGACGAGAAAAGCGACGGTGCCGTGCATCAGGGCATTATCGCCCGCGTGAAGCCAGGACGTCAGTATCAGGAAAACGATCTGCCGGATCTGATCGCTTCGCTCGATCAACCGTTCCTGCTGATCCTCGACGGCGTAACCGATCCGCACAACCTCGGCGCGTGCCTGCGTAGTGCGGACGCCGCAGGCGTTCATGCGGTGATTGTGCCGAAAGATCGCTCCGCACAGCTCAACGCTACGGCGAAAAAAGTAGCCTGCGGCGCGGCAGAAAGCGTTCCGCTGATTCGGGTGACTAACCTTGCGCGCACCATGCGTATGTTGCAGGAAGAGAATATCTGGATCGTCGGTACGGCAGGCGAGGCGGATCATACACTCTATCAGAGCAAAATGACCGGACGCCTGGCGCTGGTGATGGGTGCGGAAGGTGAAGGTATGCGTCGCCTGACTCGTGAACATTGCGATGAGTTGATCAGCATCCCGATGGCTGGAAGCGTTTCTTCCCTGAACGTTTCGGTTGCGACCGGAATTTGCTTATTTGAAGCGGTGCGCCAGCGCAGCTAATTTCTCAGAATTATGAAAAGCCATCCAGATTTGGATGGTTTTTTTTTGTCTATAGCTGGTAAGATAATTACGTATTGCAAATATTCATTTTATTAATATTTAAACTTAATCTATATGGAATAATATATGACATGGAATCCGTTGGCGCTAGCGACGGCGCTGCAAACTGTACCTGAACAAAATATTGATGTAACAAATAGCGAGAACGCATTAATTATTAAAATGAATGATTATGGCGATTTGCAAATCAATATTCTTTTTACTTCCCGCCAAATGATTATCGAAACCTTTATTTGTCCGGTGAGTAGTATCAGCAATCCAGATGAATTTAATACCTTCTTATTAAGAAATCAGAAGATGATGCCGTTGTCATCGGTAGGGATCTCCAGCGTACAACAGGAAGAGTATTACATTGTTTTCGGGGCGTTATCGCTTAAATCTTCTCTTGAAGATATCCTGCTTGAGATAACCTCGCTGGTAGATAACGCATTGGATCTGGCTGAAATTACAGAAGAATATTCACACTAATTACAAGGACGGGTATTTATGGGAATTTTAAAAAGTTTATTTACGCTGGGGAAATCGTTTATCTCCCAGGCGGAAGAATCCATTGAAGAAACCCAGGGCGTGCGCATGCTGGAACAGCATATTCGTGACGCTAAAGCTGAACTCGATAAAGCCGGAAAATCTCGCGTTGATCTGCTGGCGCGGGTGAAATTAAGTCACGATAAGCTGAAAGATTTACGTGAGCGCAAAGCCAGTCTGGAAGCCCGTGCGCTGGAAGCGTTAAGCAAGAACGTTAATCCGTCGTTGATTAACGAAGTTGCTGAAGAAATCGCGCGCCTTGAGAATCTCATTACCGCTGAAGAGCAAGTGTTGTCGAATCTGGAAGTCTCCCGTGATGGCGTGGAAAAAGCAGTTACAGCGACAGCGCAGCGTATCGCTCAGTTTGAGCAGCAAATGGAAGTCGTTAAAGCCACTGAAGCCATGCAGCGTGCACAACAGGCTGTAACAACCTCTACCGTTGGCGCATCTTCCAGCGTTTCGACAGCGGCAGAATCCTTAAAACGCCTGCAAACGCGTCAGGCCGAACGTCAGGCTCGCCTGGATGCTGCCGCACAGTTGGAGAAAGTCGCAGACGGTCGCGACCTTGACGAAAAGCTGGCGGAAGCCGGAATTGGCGGTAGCAATAAAAGTAGCGCCCAGGATGTATTAGCAAGACTGCAACGCCAACAGGGCGAGTAATTTTTTTGCCAGCCTCGCTAAAAGGCTGGCAACTATTTTAAGGATAAAATATGTCTGGTTTTTTCCAGCGTCTGTTTGGCAAGGATAATAAGCCAGCTATCGCTCGTGGTCCGCTGGGACTTCATCTCAATAGTGGCTTTACGCTCGACACGTTAGCGTTTCGTTTGCTGGAAGATGAATTGCTGATTGCGCTGCCGGGTGAGGAATTTACGGTAGCCGCCGTCAGCCACATCGATCTGGGCGGCGGTAGTCAGATTTTCCGCTACTACACTTCAGGCGATGAGTTTCTGCAAATCAATACCACCGGCGGCGAAGATATTGATGACATTGATGATATCAAGCTCTTTGTCTATGAAGAGAGCTACGGTATCAGTAAAGAGAGTCACTGGCGCGAGGCGATCAACGCCAAAGCGATGGGAGCAATGACCTTAAACTGGCAGGAAAAACGCTGGCAGCGATTTTTTAACAGCGAAGAACCGGGAAATATCGAACCCGTCTACATGCTGGAAAAAGTAGAAAATCAAAACCATGCCAAATGGGAGGTCCATAATTTTACCATGGGCTACCAACGGCAAGTAACCGAAGATACTTACGAATATTTGCTGCTAAATGGTGAGGAATCTTTTAACGATCTCGGCGAGCCAGAGTGGTTATTTTCGCGCGCGTTGGGTGTCGATATCCCGCTGACATCACTTCATATTATTGGTTAATTACAAGGACGTTTACAATGCATATACTGGATTCACTTCTTGCCTTTAGCGCCTATTTTTTTATTGGCGTGGCTATGGTGATTATTTTTCTGTTTATCTATTCTAAAATTACACCGCACAACGAATGGCAGTTAATCAAAAACAATAATACCGCGGCGTCACTGGCGTTCAGCGGTACATTGTTGGGTTACGTTATCCCCTTATCCAGTGCGGCAATCAATGCGGTGAGTATTCCAGACTATTTCGCCTGGGGCGGGATTGCACTGGTGATTCAGTTACTCGTTTTTGCTGGTGTCAGACTTTATATGCCCGCATTAAGCGAAAAAATTATTAATCACAATACCGCAGCAGGAATGTTTATGGGAACCGCCGCGCTGGCTGGCGGTATTTTTAACGCAGCTTGTATGACATGGTAATGGACGATCATGGCCAGAAAACGCAAATCAAGAAATAACAGTAAAATTGGTCACGGAGCGATTAGTCGCATTGGTAGACCGAATAATCCTTTTGAACCGTGCCGCAATCGCTACGCACAAAAATACTTAACGTTGGCGCTAATGGGCGGTGCCGCTTTTTTCGTATTGAAAGGTTGTAGCGATAGTAGCGATGTCGATAACGACGGCGACGGAACGTTTTACGCGACAGTGCAGGATTGTATTGATGACGGTAATAATGCCGACATTTGCGCGCGTGGCTGGAACAACGCCAAAACGGCATTTTATGCCGATGTTCCGAAGAATATGACTCAGCAGAACTGTCAGTCTAAGTACGAAAATTGCTACTACGACAATGTTGAACAGAGTTGGATCCCGGTCGTTTCTGGATTTTTGTTAAGCCGGGTTATTCGTAAAGATCGCGATGAGCCGTTTGTTTATAACAGCGGCGGTTCCTCTTTTGCTTCGCGCCCAGTCTGGCGCAGCACTTCTGGTGATTACTCCTGGCGCTCCGGTTCTGGCAAAAAAGAGTCTTACTCTTCGGGCGGCTTTACCACCAAAAAAGCGTCTACCGTTTCTCGCGGCGGCTATGGTCGTTCTTCCAGCGCCCGTGGGCATTGGGGAGGCTAATCATGCTGAGACACAACGTTCCTGTGCGACGGGATCTGGACCAGATCGCCGCTGACAACGGTTTCGACTTTCATATCATCGACAATGAAATCTATTGGGATGAGAGTCGGGCTTACCGTTTTACTCTGCGCCAGATTGAAGAGCAGATCGAAAAACCGACTGCGGAACTGCATCAGATGTGCCTTGAGGTGGTGGATCGCGCGGTAAAAGATGAAGAGATCCTGACGCAACTGGCGATCCCGCCGTTGTACTGGGATGTGATCGCTGAAAGCTGGCGCGCCCGCGATCCTTCGCTGTATGGACGGATGGATTTTGCCTGGTGTGGCAATGCGCCGGTGAAGCTGCTGGAGTACAACGCCGATACGCCAACTTCATTGTACGAGTCGGCTTATTTCCAGTGGCTGTGGCTGGAGGATGCCCGGCGCAGCGGCATTATTCCGCGTGATGCCGATCAGTACAATGCTATTCAGGAACGCCTGATTTCACGCTTTAGCGAGCTTTACAGTCGGGAACCGTTTTATTTTTGCTGCTGTCAGGACACCGATGAAGACAGGAGTACCGTGCTGTACTTGCAGGACTGCGCCCAGCAGGCAGGGCAGGAGTCGCGGTTTATCTACATTGAAGATCTCGGTTTGGGCGTCGGCGGCGTACTGACCGATCTTGATGATAATGTCATCCAGCGTGCATTTAAGCTGTATCCGCTGGAGTGGATGATGCGTGACGATAACGGTCCGCTGCTGCGCAAGCGTCGCGAGCAATGGGTGGAGCCGTTATGGAAAAGTATCTTGAGTAATAAAGGGCTAATGCCGCTGCTTTGGCGCTTCTTCCCTGGTCATCCTAATCTTCTTGCGTCCTGGTTCGATGGCGAGAAACCGCAGATTGCCGCTGGCGAAAGCTATGTGCGTAAACCAATCTATTCGCGCGAAGGCGGCAACGTCACCATTTTTGACGGCAAGAATAACGTTGTTGACCACGCTGATGGTGATTACGCCGATGAACCGATGATCTACCAGGCGTTTCAACCTCTGCCGCGGTTTGGCGATAGCTACACACTCATCGGTAGCTGGATTGTCGATGATGAAGCGTGCGGAATGGGGATCCGTGAAGATAACACACTGATCACCAAAGACACCTCACGTTTCGTTCCGCATTACATTGCTGGATAAGAATGTTTTAGCAATCTCTTTCTGTCATGAATCCATGGCAGTGACCATACTAATGGTGACTGCCATTGATGGAGGGAGACACAGTGCACTGGCAAACTCACACCGTTTTTAATCAACCTATACCATTAAATAACAGCAACTTATACCTGTCTGATGGCGCGCTCTGCGAAGCGGTAACGCGTGAAGGTGCTGGCTGGGATAGCGATTTTCTAGCCAGTATTGGTCAGCAGTTAGGAACGGCTGAATCCCTTGAACTGGGGCGGCTGGCGAATGTGAATCCGCCTGAATTATTGCGCTACGATGCGCAAGGACGCCGTCTGGACGATGTGCGTTTTCACCCCGCCTGGCACCTGCTGATGCAGGCGCTATGTACCAATCGGGTGCACAATCTTGCCTGGGAAGAAGACGCTCGCTCCGGCGCATTTGTGGCGCGCGCGGCGCGTTTTATGTTACATGCGCAGGTTGAGGCAGGGTCGTTATGTCCGATAACCATGACCTTTGCCGCCACGCCATTGTTGTTACAGATGTTACCCGCGCCGTTTCAGGACTGGACCACGCCGCTATTGAGCGATCGCTACGATTCTCACTTATTGCCAGGTGGGCAAAAACGCGGTTTGTTGATTGGCATGGGAATGACGGAAAAGCAGGGCGGTTCCGATGTTATGAGCAACACCACCCGTGCAGAGCGTCTGGAAGATGGCTCTTATCGGCTGGTGGGGCATAAATGGTTTTTCTCGGTTCCGCAAAGCGATGCGCATCTGGTGCTGGCGCAGACCGCGGGTGGTCTGTCCTGCTTTTTTGTGCCGCGCTTTTTGCCTGACGGGCAACGCAACGCGATTCGCCTCGAGCGGCTGAAAGATAAGCTGGGTAATCGCTCTAACGCCAGTTGCGAAGTGGAGTTTCAGGATGCCATTGGTTGGTTGTTGGGGCTGGAAGGGGAAGGAATTCGTCTGATCCTGAAAATGGGTGGGATGACGCGTTTTGATTGCGCCCTGGGTAGCCATGCCATGATGCGCCGTGCATTTTCGCTGGCGATTTATCATGCACATCAACGCCATGTTTTTGGTAATCCATTGATCCAACAGCCCCTTATGCGTCATGTCTTAAGTCGCATGGCACTTCAGCTTGAAGGGCAAACGGCGTTGCTGTTTCGTCTTGCGCGAGCGTGGGACCGGCGTGCCGATGCCAAAGAAGCCCTGTGGGCGCGTTTATTTACGCCTGCGGCAAAATTTGTGATCTGCAAAAGAGGTATGCCGTTTGTGGCCGAAGCGATGGAGGTGCTGGGCGGCATTGGTTATTGCGAGGAGAGTGAGCTGCCGCGGCTTTACCGGGAGATGCCGGTAAACAGTATTTGGGAAGGTTCCGGCAATATTATGTGCCTGGATGTGTTGCGCGTTCTCAATAAGCAAGCGGGCGTATACGACTTATTGTCGGAAGCATTTGTGGAAGTGAAAGGGCAGGATCGCTATTTTGATCGCGCGGTTCGTCGTTTACAGCAGCAGCTGCGTAAGCCAGCTGAAGAACTGGGGCGAGAGATTACTCATCAGCTATTCCTGCTGGGCTGCGGTGCGCAAATGTTGAAATATGCTTCTCCGCCAATGGCGCAGGCGTGGTGTCAGGTGATGTTAGATACGCGCGGCGGCGTACGGTTGTCAGAGCAGATCCAGAATGATTTATTGCTGCGGGCGACGGGGGGAGTGTGTGTGTAAGCGTATACGACTGATGCGACGCTGGTTTCGATTAACTAAATGAAATATGTGAAAATTGTAGGCCGGACAAGGCGCTCGCGCCGCATCCGGCATTGTTCATTTGCTGTAGATGACACTCATGCATACAGTATCGCCTGTACGCGCCAGTTATCAGGCTGCTGGTCTTCGTACATGGTAATAATGCGGTACCATGACGCACCTTGTTCATCGGCCTTTAACGCCACGATACGTTCCACGTCCTGCGGATCCCCTGAAATATTGCTAACTGAGATCTGACCTATTTCATTCAGGCCCGTCACGCAGTCAGCACTGGCGAATTCTGCAGACTGTGCCGTGGCGCTCGCAGGATAAGTCACTAATTGCAGCGAGGGTGAGGCAAGTAATTGTTTCATCGTCAGCTCCATTTTTCGTTTCCCCACATGTTGCAGGGCATTCCATCGCGGTAATAAACGCCCAGGCTTCCAGCCGGTAATTTATTGTGCACAGGTAAACGCATAGGGACGCAAAGCAATGTAAAGCTGACTATGTAAGTTGTCTGATTTATTTACGGTACAAAATGGCTTGTGAATACCACTGTCCTGTCACGATGGTTTCGTCCACCATGACGACAACGTAATAATCAGCTTTTGCAGCGACAGCTTTCGCTTTGATTTCTGCTAATGCGTCATCCGGAGAACCCCGAACCATCGTGCTTACGCTACCTATTCGCTGTAACCCTTGCGTCTGGTCGCGGCGAATCTCTTGCGGATGGTCCGTTACTGGCGGTGCTGGCTGTGGCGTACCTTGCAGTGCGCTACAGGCACTTAACATCAACACCAATAATAAACTGGCAAACCGGTAAATAACGCTATTACGTTTCCTGCTAACCATAGTGTAGTGCCTTATTAATTTAACTTTGGGGGAATGTTCCCGAATTGTTACCTGGTACGCGATTTTCGAATGAAAACGTGACGAAAGCGTAACCCACATCTCAACATTATGAACTAATACCGCCAGCAGAAACTTGTGCTAAGGATTTCTTCATGTCATACCAGACAAAAAGGAGAGACAGATGATTGAAATAGAATCACGCGAGCTGGCAGATATTCCCGTTCTTCATGCTTATCCTGTCGGGCAAAAAGATACCCCGTTACCGTGCGTAATTTTTTATCACGGCTTTACTTCATCCAGTCTGGTGTATAGCTATTTTGCCGTTGCGCTGGCGCAGGCTGGTTTGCGGGTGATCATGCCGGATGCGCCCGATCACGGTAGCCGTTTTAGTGGTGACGCAGCGCGGCGGTTAAATCAATTCTGGCAAATCTTGCTACAAAGTATGCAGGAATTCACTACTTTACGTGCGGCAATAGCCGAAGAAAACTGGCTGCTTGATGACCGTCTGGCAGTCGGTGGCGCGTCGATGGGCGCGATGACGGCACTGGGGATTACCGCTCGCCACCCCACGGTGAGATGTACCGCCAGCATGATGGGATCGGGCTATTTTACATCACTCGCCCGTTCACTGTTTCCACCGCTGATACCTGAAACGGCAGCACAGCAGAATGAATTCAATAACATTGTCGCGCCACTGGCAGAGTGGGAAGCGACAAACCACCTGGAACAACTTAGTGACAGACCTCTACTGCTGTGGCATGGCCTCGACGATGACGTTGTGCCTGCCGACGAATCACTACGTTTGCAGCAGGCCTTAAGCGAGACGGGACGGGATAAACTGCTAACCTGTTCATGGCAGCCAGGCGTGCGTCACCGCATTACGCCTGAGGCGTTAGATGCTGCCGTGACATTTTTCCGCCAGCATCTTTAAACACGCAGAATGCTGACCCCTTGCGCTTCCAGTTGTTGCAGGATTTCCGGGTTAGCATTTTTGCCGGTGATAAGCATATCGATTTGATCGGCACGGCTAAAAAGCATTCCCGCGCGTTCGCCAATCTTACTGCTATCAACCAGTACCACCAGTTTCCCTACCACGCTCAGCATCTTCTGCTCTGCCATTGCTGTCAGCATATCGGTTTTATACAACCCTTCTGCGGTCAGCCCTTTTCCGCTGGTAAACATCCAGTGCCCGGCATAGAGACTGTTTTCGCTGCCCTGCGGGCTTAAAGTGATGGACTGACTTTTGTTGTACTGTCCGCCCATAATGATCACGCTGTCATGTTCTTGATCGATCAGGTAATTTGCCAGCGGTAGATAATTAGTGATGATTTGCACTGGCTTGCCACACATTTCCCGCCCAAGCAGAAACGCGGTGGAGCCGCAGTTGATGACTACGCTTTCGCCCGGATTAACCAGCTGCGAGGCCGCTTTAGCGATACGTACTTTTTCATCGTGATTCTGCGCCTGATGCAGATTCATCGGCGTCCAGCGCGGGCGCTGTTGGGTAATAGCTTCTGCGCCATTGCGCACTTTTTTCAGTTTGCCGCTTTCGTCAAGTTTATTGATATCGCGTCGCGCAGTGGCAGGCGAAATTCCCAGACGCTCAACGACTTTCTCAACGGTCACAAAGCCCAATTGTGCGAGCATTTCCAGGAGGATTTGATGTCTTTGTGCTTCAGTCATGAGCTATTCCGATAAAATTTGATTTTTTTAGATGATATTTGAAATAGCCAGGAAATACTACGCCGGATAGCGCGAACTTCTCCACGCTACCCGGTATAAACGACAGATTACAGGAATGACTTGAACGGCAGATCCGGTTCAATAGTGAAGCAATCATCGAAACCGCGCGGATAGTGGTACTCGAAGTTGTCTTTATCCAGCGGCCAGGTAAATTTGCCACCCACCTGCCAGATAAACGGCTTGAAGCCATACTTCAGGCGATCTTTTTTCATCTCCCACAGCACGCGGATCTCTTGCGGATCGGCCTGGAAGTTTGACCAGATATCGTGGTGGAACGGGATCACTACTTTCGCATTCAGCGCTTCACCCATACGCAGCATATCGGCGCTGGTCATTTTGTCGGTGATACCGCGCGGGTTTTCGCCGTACGATCCTAACGCCACGTCGATCTGATGTTCGTTACCGTGCTTCGCATAATAGTTAGAGTAGTGGGAGTCGCCGCTGTGATACAGGGAGCCGCCAGGCGTTTTGAACAGGTAGTTCACCGCGCGATCGTCCATGCCATCTGGCAGTACGCCAGCCGCTTTTTGATCGGCAGGCAGGGTGATCAGTGCAGTACGGTCGAAAGCATCAAGCGCATGAATTTCAATGTCTTTCACTTTTACTACATCGCCCGGTTTGACCACGATGCAACGCTCTTTCGGTACGCCCCAGCCAATCCACAAATCCACACAGGTTTTCGGTCCGATAAACGGTACGTCATCTGCACAATTCTGCATCACGGCAGCAGCGACGTTAACGTCGATATGATCGTTGTGATCGTGAGTCGCCAGTACCGCGTCGATCTGGCGAATCGCAAACGGATCAAGAACAAACGGGGTGGTACGCAGGTTTGGCTGCAGTTTTTTCACGCCAGCCATGCGCTGCATCTGGTGACCCTGTTTCATTAACGGGTTACCGTGACTTTGTTTGCCAGTGCCGCACCAGAAATCAACGCAAACGTTGGTGCCACCTTCCGATTTCAACCAGATCCCGGTGCAGCCAAGCCACCACATCGCAAATGTGCCAGGAGCGACCTGTTCTTGTTCAATTTCTTCATTCAACCAGCTACCCCACTCCGGGAAAGTGCTCAGGATCCAGGATTCACGGGTGATACTTTTCACTTTACTCATCGCCATTTACCTTCATGATAGTTCAATTCGAATCAATATGTGATTGGTTTTGATTAATCCTGACACTATTTTTTCAGGAAGGCAATGACCATTTTTTGACTTTTTGCCAGGGAAGTTGTTGTTGATTTTGAGTATGGAAAGATTTAATGGAATGTGTAATTCAATTAACTAAATGAATTTAAATGGATAATTGTTTCGTTGTGTGAATCCCACTCTATCCATGTGGAATTATTTGCGGGTCGCGTCACATTTAATCATAAATAATCTTGTTGTGATTACTTTTGAAAATTAGAGTGAGTGCACAACATTCCGGGTGTGTGGAATACCCGGTTACCTCTTCTTCAGGAGATCGTTATGGAGATCCTCTACAACATCTTTACCGTGTTTTTTAACCAGGTCATGACCAATGCCCCGTTGTTGCTGGGTATTGTGACCTGTCTGGGCTACATCCTACTGCGCAAAAGTGTCAGCGTTATTATTAAAGGCACGATTAAAACCATAATTGGTTTCATGTTGTTGCAGGCAGGGTCCGGCATCCTCACCAGCACCTTCAAACCGGTGGTGGCGAAAATGTCCGAAGTCTACGGCATTAACGGCGCAATTTCCGATACCTACGCTTCAATGATGGCAACCATCGACCGCATGGGCGATGCCTATAGCTGGGTGGGTTACGCCGTATTGTTAGCGCTGGCGCTGAACATCTGTTACGTGCTGTTGCGTCGCATTACCGGCATTCGCACAATCATGTTGACCGGCCACATCATGTTCCAGCAGGCCGGGTTGATTGCCGTTACGCTGTTTATCTTCGGCTACTCCATGTGGACCACCATTATCTGTACCGCGATTCTGGTTTCGCTCTACTGGGGCATCACTTCCAACATGATGTACAAGCCGACTCAGGAAGTGACGGATGGCTGTGGTTTCTCCATCGGTCACCAGCAGCAGTTTGCATCATGGATTGCCTATAAAGTCGCGCCGTTCCTCGGCAAAAAAGAGGAGAGCGTTGAAGACCTCAAATTGCCGGGCTGGCTGAACATTTTCCACGACAACATCGTCTCCACGGCGATTGTGATGACCATCTTCTTTGGTGCCATTCTGCTCTCCTTCGGTATCGACACCGTGCAGGCGATGGCAGGCAAAGTGCACTGGACGGTGTACATCCTGCAAACTGGTTTCTCCTTTGCGGTGGCGATCTTCATCATCACGCAGGGTGTGCGCATGTTTGTGGCGGAACTCTCTGAAGCATTTAACGGCATTTCCCAGCGCCTGATCCCAGGTGCGGTTCTGGCGATTGACTGTGCAGCTATCTATAGCTTCGCGCCGAACGCCGTGGTCTGGGGCTTTATGTGGGGCACCATCGGTCAGCTGATTGCGGTTGGCATCCTGGTCGCCTGCGGCTCCTCGATCCTGATTATTCCTGGCTTTATCCCGATGTTCTTCTCTAACGCCACCATCGGCGTGTTCGCTAACCACTTCGGCGGCTGGCGTGCGGCGCTGAAGATTTGTCTGGTGATGGGGATGATCGAAATCTTTGGTTGCGTCTGGGCGGTGAAACTCACCGGTATGAGTGCCTGGATGGGCATGGCGGACTGGTCGATTCTGGCACCGCCGATGATGCAAGGCTTCTTCTCCATCGGTATCGCCTTTATGGCCGTCATCATTGTAATTGCACTGGCTTATATGTTCTTCGCTGGCCGCGCGCTGCGCGCAGAAGAAGATGCAGAAAAACAACTGGCAGAACAGTCTGCTTAATAAGGAGTTTTGATTATGACCGTACGTATTCTGGCTGTGTGTGGCAACGGACAAGGCAGTTCCATGATCATGAAGATGAAAGTGGACCAGTTTTTAACCCAATCAAACATTGACCATACGGTAAACAGCTGCGCGGTTGGCGAGTACAAAAGCGAGTTGAGTGGCGCGGATATCATCATCGCTTCTACGCACATTGCGGGCGAAATCACCGTGACCGGCAACAAATACGTGGTTGGCGTGCGCAACATGCTCTCTCCTGCCGACTTTGGCCCGAAACTGCTGGAAGTGATCAAAGAGCATTTCCCGCAGGATGTGAAGTAAGGACGCGCCATGAAATTACGTGATTCGCTGGCGGAAAATAAATCCATCCGCCTGCAGGCTGAAGCAGAGACATGGCAGGAAGCGGTGAAAATCGGCGTTGACCTGCTGGTGGCGGCAGATGTGGTAGAGCCGCGTTACTACCAGGCGATTCTGGATGGCGTTGAACAGTTCGGTCCTTATTTCGTTATCGCTCCGGGCCTGGCAATGCCGCACGGGCGTCCGGAAGAGGGCGTTAAGAAAACCGGTTTCTCTCTGGTAACGCTGAAAAAGCCGCTGGAGTTCAACCACGATGACAACGATCCGGTGGATATCCTCATCACCATGGCGGCGGTCGATGCCAATACTCACCAGGAAGTGGGCATCATGCAGATCGTCAACCTGTTTGAAGATGAAGAGAATTTTGACCGTTTACGCGCCTGCCGTACCGAGCAGGAAGTACTGGATCTCATTGACCGCACCAACGCGGCAGCTTAAGAAGGAATTGAACATGTCATTACCGATGTTGCAAGTCGCGCTGGACAACCAGACTATGGATAGCGCCTACGAAACCACTCGCCTGATTGCCGAAGAAGTCGACATTATCGAAGTGGGCACCATTCTGTGCGTGGGCGAAGGCGTGCGTGCGGTTCGTGACCTGAAAGCGCTCTACCCGCACAAAATCGTACTGGCAGACGCCAAAATTGCCGATGCAGGCAAAATCCTTTCGCGTATGTGCTTCGAAGCCAACGCTGACTGGGTGACGGTAATTTGCTGTGCGGATATCAACACCGCCAAAGGCGCGCTGGACGTGGCAAAAGAGTTTAACGGCGACGTGCAGATCGAACTGACCGGTTACTGGACCTGGGAACAGGCGCAACAGTGGCGCGATGCAGGCATTGGGCAGGTGGTTTATCACCGCAGCCGTGACGCGCAGGCCGCAGGCGTGGCGTGGGGCGAAGCGGACATCACCGCGATCAAACGTCTTTCCGATATGGGCTTCAAAGTCACCGTCACCGGAGGCCTGGCGCTGGAAGATCTGCCGCTGTTCAAGGGTATTCCGATTCACGTCTTTATCGCGGGCCGTAGTATCCGTGATGCCGCTTCTCCGGTGGAAGCCGCACGTCAGTTCAAACGTTCCATCGCTGAACTGTGGGGCTAAGGAGCGGATATGTTGTCCAAACAAATCCCGCTTGGCATCTATGAAAAAGCGCTCCCCGCCGGGGAGTGCTGGCTGGAACGCCTGCAACTGGCAAAAACGTTAGGCTTCGATTTTGTCGAAATGTCGGTAGATGAAACTGACGATCGCCTGTCGCGCCTCAACTGGAGCCGCGAGCAGCGTCTGGCGCTGGTCAATGCGATTGTTGAAACCGGCGTGCGCGTGCCGTCCATGTGCCTTTCTGCTCATCGTCGTTTCCCGCTGGGCAGTGAAGATGACGCGGTGCGGGCGCAGGGGCTGGAGATTATGCGTAAAGCTATCCAGTTCGCCCAGGATGTCGGTATTCGCGTGATCCAGCTGGCGGGCTATGACGTTTACTATCAGGAAGCCAATAACGAAACGCGTCGTCGTTTCCGTGACGGCCTGAAAGAGAGCGTTGAGATGGCAAGCCGCGCGCAGGTGACGCTGGCGATGGAGATCATGGATTATCCGTTGATGAGCTCCATCAGCAAGGCGCTGGGATACGCGCACTATCTCAACAATCCGTGGTTCCAGCTCTACCCGGATATCGGCAACCTGTCGGCGTGGGACAACGATGTGCAGATGGAGTTGCAGGCCGGAATCGGGCATATCGTCGCGGTACATGTGAAAGACACCAAACCTGGCGTCTTCAAAAACGTGCCGTTTGGCGAAGGTGTAGTGGATTTCGAACGTTGTTTCGAAACGCTCAAACAGAGTGGCTATTGCGGGCCGTACCTGATTGAGATGTGGAGCGAAACGGCGGAAGACCCGGCGGCAGAAGTGGCGAAAGCGCGTGATTGGGTGAAAGCGCGCATGGCGAAAGCGGGCATGGTGGAGGCGGCATAATGCAAAAGCTAAAACAGCAGGTATTTGAAGCCAACATGGAGCTGCCGCGCTACGGGCTGGTGACCTTTACCTGGGGCAACGTCAGCGCTATCGACCGCGAACGCGGGCTGGTGGTGATCAAGCCCAGCGGCGTTGCCTACGAAACCATGAAAGCGGCCGATATGGTGGTGGTTGATATGAGCGGCAAGGTGGTGGAAGGGGAGTATCGCCCATCTTCCGACACTGCGACGCATCTCGAACTCTACCGTCGTTACCCGTCGCTTGGTGGCATTGTCCATACCCACTCCACTCATGCCACCGCATGGGCGCAGGCGGGGCTGGCGATCCCGGCGTTAGGCACCACGCACGCCGACTACTTCTTTGGCGACATTCCGTGTACGCGCGGGTTAAGCGAAGAAGAGGTGCAGGGCGAGTATGAACTGAACACCGGCAAAGTGATTATCGAAACGCTGGGCAACGCCGAGCCGCTGCATACGCCGGGAATTGTGGTGTATCAGCACGGGCCGTTCGCCTGGGGGAAAGATGCTCACGATGCGGTGCATAACGCGGTGGTGATGGAAGAAGTGGCGAAAATGGCGTGGATTGCCCGCGGCATTAACCCACAACTCAATCACATCGACAGCTTCCTGATGAATAAACACTTCATGCGTAAACACGGTCCTAACGCTTATTACGGGCAGAAGTAGAACACGCGCTGCGGAAATTTCCTTCCTCGGGAGATAACTGGTCTAATTCCGCAGCCGTTTTTCAAAAAAAAGCCCCCTGCGAAGGGGGCAAAGCAAACTATGGCAATGTTTCGTTGGTTATACCTGGTGCTAGCGATAAATATCCGCGCTGGCGTGCATATTGCCGTTACTCCCCGGTTCCCGCATCAGAATTACGTGGTAGTACGTTGCGCCTTGCGCATCTGTTTCTTCATTTAATGCCTGACGTGCTTCGCTTTCAGTGGCGAAATTATGATTGATATAAATCACGCCTAAGCTTTGCACATCGTCCATATTTCTGGCCTGGTGGTTATTAATTTCAATGGCTGCCCATGTATTTGCACTTAGCAAAAGCACAGCCAGAAGGGCTAAAACACGACTGAACATAGATACCTCCTCGACGGCTGACTTTGTGTGCTCTCCTCTGTGATGATCTTCTGATTTAATTTTAATCAATGATAAAGAAGTTGATGGTGACCATTTCTGATGCAGTTGTTCAAAAAAACACCATGATGAAGTGTGATGAACTTCAAATCAGCGTGTTAGAGGTTAATTGCGAAAGGGGAGATTTATTTCGGCTCTGCCCTTGAGTTTAGCGAGGCATACAAGTACTATAACGCGTCATTTTTCAGCCGACCTTTAACACGTTCCTTGCCTCCCCGGGATTCGGCTGACCCAGACAGGAGGCTGAATAATCCGTAAGGAGCAATTCGATGCGTCA +>419434025 +ACGGTTTTTTCACCGTTGATTACGAGAGTATGGCCTGCGTTGTGACCGCCCTGGTAGCGTACAACATATTTAGCCCGTTCAGTCAGAAGATCGACGATCTTACCTTTACCTTCGTCACCCCATTGGGTGCCCAGTACGACGACGTTGTTACCCATTTTT +>419434026 +AAAAATGGGTAACAACGTCGTCGTACTGGGCACCCAATGGGGTGACGAAGGTAAAGGTAAGATCGTCGATCTTCTGACTGAACGGGCTAAATATGTTGTACGCTACCAGGGCGGTCACAACGCAGGCCATACTCTCGTAATCAACGGTGAAAAAACCGT +>419442075 +ACCAGGGCGGTCACAACGCAGGCCATACTCTCGTAATCAACGGTGAAAAAACCGTCCTCCATCTTATTCCATCAGGCATTCTCCGCGAAAACGTCACCAGCATCATCGGTAACGGCGTAGTGCTGTCTCCGGCTGCGCTGATGAAAGAGATGAAAGGTCTGGAAGACCGTGGTATCCCGGTTCGCGAGCGTCTGCTGCTCTCCGAAGCCTGCCCGCTGATCCTTGATTATCACGTGGCGCTGGACGTTGCGCGTGAAAAAGCGCGTGGCGCGAAAGCGATCGGCACCACCGGTCGTGGTATCGGCCCGGCTTATGAAGACAAAGTGGCTCGTCGCGGTCTGCGCGTGGGCGACCTGTTCGATAAAGCCACCTTCGCTGAAAAACTGAAAGAAGTGATGGAATATCACAACTTCCAGTTGGTGAACTTCTACAAAGCAGACGCTGTTGACTACCAGAAAGTGCTGGATGATGTCATGGCGATTGCAGACATTCTGACCGGCATGGTAGTTGATGTGTCCGATCTGCTGGACCAGGCGCGCAAGCGTGGCGATTTCGTCATGTTCGAAGGTGCGCAGGGTACGCTGCTGGATATCGACCACGGTACCTATCCGTACGTAACCTCCTCTAACACCACCGCAGGTGGCGTGGCGACCGGCTCTGGCCTGGGTCCACGTTATGTGGATTACGTTCTGGGCATCATCAAAGCGTACTCCACTCGCGTGGGTGCGGGTCCATTCCCGACCGAACTGTTTGATGAAACCGGCGAGTTCCTGTGCAAGCAGGGTAACGAGTTTGGCGCGACCACCGGTCGTCGTCGTCGTACCGGCTGGCTGGATGCAGTGGCTGTGCGTCGTGCAGTGCAGATCAACTCCCTGTCTGGCTTCTGCCTGACCAAACTGGACGTCCTGGACGGGCTGAAAGAGGTGAAAATCTGCGTCGGCTACCGCATGCCAGATGGCCGTGAAGTGACCACCACTCCGCTGGCTGCTGACGACTGGGAAGGTATCGAGCCAATCTACGAAACCATGCCAGGCTGGTCTGAGACCACTTTCGGTGTGAAAGAGCGTAGCGGCCTGCCACAGGCAGCGCTGGATTACATCAAGCGCATTGAAGAACTGACCGAAGTGCCGATCGACATTATCTCTACCGGCCCGGATCGTACTGAAACGATGATCCTGCGCGACCCGTTCGACGCATAATCTTCGTGACTGCTGGCCTGCGGGGAAACCCGTAGGCCGGATAAGCGTAGCGCCATCCGGCGACCGCTCCTGGATATCCGCTTTTTCGCCCCGTCTGTCAAATAAATTAGCCCCTAACTATCTGGCTGGTTTATCATCATTAATGAATATCTCTGCGGTTTTACCGCGTTTTCCCTTTTTTCCTGAGGTTGATGTGCAGTTAACAAGTTTCACCGATTACGGCTTACGCGCGCTAATCTACATGGCGTCGTTACCCGATGGGAAAATGACCAGTATCTCTGAAGTGACAGAGGTCTACGGCGTGTCCCGTAATCATATGGTCAAAATAATCAATCAACTTAGTCGTGCCGGATACGTTGCTGCCGTCCGCGGGAAAAATGGTGGGATCCGTCTCGGTAAACCGGCGCAAAGTATTCGTATTGGGGATGTGGTGCGTGAACTGGAGCCTTTGTCTCTGGTGAACTGCAGCAGTGAGTTCTGCCACATTACACCCGCTTGTCGCCTGAAACAGGCGCTTTCTCTGGCCGTGCAAAGTTTTCTTAAGGAACTGGATAACTACACGCTGGCCGATTTGGTTGAAGAGAATCAACCGCTTTATAAATTATTGCTGGTGGAATGAAGAAAATTTCCACCGGAGCTGACAACGGAGGAACCGATATGTCACATGATCCTTTCCAGGAACGCGAAGCTGAAAAATACGCGAATCCTATCCCCAGCCGCGAGTTCATCATTGAACACTTAACAAAACGCGAAAAACCCGCCAATCGTGAAGAACTTGCTGTTGAATTAAACATTGAAGGTGAAGAGCAAACTGAAGCCCTTCGCCGCCGCCTGCGCGCCATGGAGCGCGACGGGCAACTGGTCTTTACCCGTCGCCAGTGCTATGCGCTGCCAGAACGCCTCGACCTGTTGAAAGGGACCGTCATTGGTCACCGCGACGGTTACGGTTTTTTGCGCGTAGAGGGCCGCAAAGACGACCTCTACCTCTCATCCGAACAGATGAAAATGTGTATTCACGGCGACCAGATCCTGGCGCAGCCGCTGGGTGCCGATCGTAAAGGCCGTCGCGAAGCACGCGTGGTTCGCGTGCTGGTGCCAAAAACCAGCCAGATCGTTGGCCGCTACTTTACCGATGCCGGTGTAGGCTTTGTGGTACCGGATGACAGCCGTCTGAGCTTCGATATCCTGATCCCGCCTGAAGAGGTGATGGGCGCGCGCATGGGCTTTGTGGTAGTAGTGGAGCTTACCCAACGTCCAACGCGTCGCACCAAAGCGGTAGGTAAAATCGTCGAAGTGCTGGGCGATAACATGGGTACCGGCATGGCTGTTGATATGGCCCTGCGTACACATGAGATCCCCTACATCTGGCCAAAAGCGGTTGAAGAGCAGATTGTGGGTCTGAAGGAAGAGGTTCCGGAAGAGTCCAAAGCGGGACGCGTGGATCTCCGCAGTTTGCCGCTGGTCACCATTGATGGAGAAGATGCCCGCGACTTTGATGACGCCGTATACTGCGAGAAAAAACGCGGTGGCGGCTGGCGTCTGTGGGTTGCTATCGCCGACGTGAGCTATTACGTTCGTCCCCACACGCCGCTCGATAACGAAGCGCGCAGCCGTGGTACGTCGGTTTACTTCCCGTCTCAGGTGGTACCGATGCTGCCGGAAGTGCTCTCCAACGGCCTCTGTTCCCTGAACCCGCAGGTTGATCGCCTGTGTATGGTTTGCGAGATGACCATCTCCAGTAAAGGTCGCTTAACCGGTTATAAATTCTACGAAGCGGTAATGAGCTCCCATGCGCGTCTGACCTATACCAAGGTCTGGCATATGCTGCAGGGCGATCAGGACCTGCGTGAACAGTATGCGCCGCTGGTGAAGCACATCGAAGAGTTGCATAACCTCTACAAAACGCTGGATCAGGCGCGTGAAGAGCGCGGTGGGATCTCGTTTGAGAGCGAAGAGGCGAAGTTCATTTTCAACGCTGAACGCCGCATTGAGCGTATTGAACAGACCCAGCGTAACGATGCGCATAAGCTGATCGAAGAGTGTATGATCCTGGCGAACATCTCGGCGGCCCGTTTTGTCGAGAAAGCTAAAGAGCCGGCGCTGTTCCGTATCCACGATAAACCGTCGACGGAAGCCATTACCGCGTTCCGCTCCGTACTGGCTGAACTCGGTCTGGAGCTGCCTGGCGGTAACAAGCCAGAGCCGCGCGATTATGCCGAGCTGCTGGAATCCATTGGCGATCGTCCGGATGCAGAAATGCTGCAGACCATGCTGCTGCGCTCCATGAAACAGGCGATTTACGACCCGGAAAACCGGGGTCACTTCGGTCTGGCGCTGCAGTCTTACGCGCACTTTACGTCGCCGATCCGCCGTTATCCGGACCTGTCTTTGCACCGTGCGATCAAGTATCTGCTGGCGCAGGAGCAGGGCCATAAAGGGAACACGACTGAAACCGGCGGGTACCACTATTCAATGGAAGAGATGCTGCAGCTTGGCCAGCACTGTTCCATGACCGAACGCCGTGCCGATGAAGCGACGCGCGAAGTCTCTGACTGGCTGAAATGCGACTTTATGCTGGATCAGGTCGGTAACATTTTCAAAGGTGTTATCGCCAGCGTGACCGGGTTTGGTTTCTTTGTTCGCCTTGACGAGCTGTTTATCGACGGTCTGGTGCATGTCTCCAGCCTCGATAACGACTATTATCGTTTCGACCAGGTTGGACAGCGCCTGATTGGTGAGTCGGGTGGACAGACCTATCGCCTTGGCGACCGGGTGGAAGTCAAAGTCGAAGCCGTCAACATGGACGAGCGTAAGATTGACTTCAGTCTGATCTCCAGCGAGCGCGCGCCGCGCAACGTAGGTAAAACCGCGCGTGAGAAGGCGAAAAAAGGCGGTAACGGCAACGCGAGCGGCAAACGTCGTCAGGCGGGTAAGAAAGTGAACTTCGAGCCAGACAGCGCCTTCCGCGGTGAGAAAAAGCAGAAGCCAAAAGCGGCGAAGAAAGAGGCTCGCAAGGCGAAAAAGCCGTCCGCGAAAACACAGAAAATTGCTGCTGCGACCAAAGCGAAGCGCGCAGCGAAGAAACAGCAGGCGGAGTAAATTCTCCCCTCACCCTGACCCTCTCCCCACTGGGGAGAGGGGGAAATACCTATTATGAGAATCATCAATGAGTGAAATGATTTACGGCATCCACGCGGTGCAGGCCCTGCTGGAGCGCGCACCGGAGCGTTTTCAGGAAGTGTTCATTCTGAAAGGGCGTGAAGACAAACGTCTGATGCCGCTGATCCACGCACTGGAAGCGCAGGGCGTGGTGATCCAGCTGGCCAACCGTCAGTTCCTGGATGAGAAAAGTGAAGGCGCGGTTCACCAGGGGATTATTGCCCGCGTGAAGCCAGGCCGTCAGTATCAGGAAAACGATCTGCCGGATTTGATCGCTGCACTGGATAACCCGTTCTTCCTGATCCTTGATGGCGTGACCGATCCGCACAACCTCGGCGCGTGCCTGCGCAGCGCCGATGCGGCGGGCGTGCACGCGGTGATCGTACCGAAAGATCGTTCTGCACAACTGAACGCAACGGCGAAGAAAGTGGCCTGCGGTGCAGCAGAAAACGTTCCGCTGATCCGCGTCACCAACCTGGCGCGCACCATGCGTATGCTGCAGGAAGAGAATATCTGGATCGTCGGTACCGCCGGTGAAGCGGATCATACTCTGTATCAGAGCAAAATGACCGGCCGTCTGGCGCTGGTGATGGGTGCGGAAGGTGAAGGCATGCGTCGCCTGACGCGTGAGCACTGCGACGAGCTGATAAGCATCCCGATGGCGGGCAGCGTGTCGTCCCTGAACGTTTCTGTTGCAACGGGTATTTGCCTGTTTGAAGCGGTACGTCAGCGCGGCGAATAAACAGCAAGGCCGTCCTGTGGACGGCCTTTTGTGTTTTATCTCTCTCCCGGTGGGTGAGGGCACCAGACGGTGACAACCTTACTCCTCTAACGTCCGCAAATGGTCATCCTTCCTGAGCGTCATCAGCGCCACAATACTCACCACCGCCGTCGCCATGACGTAATATGCCGGAATGTCGAGGTTTCCCGTCTCCTTAATCAACCCGGTAATAATAAGCCCCGCACACCCCGAGAAAATGGCATTCGACAAGGAATACGCCAGACCAAGCCCGGTATAGCGCACACGCGTCGGGAACATCTCTGAGAGCATTGCCGGCCCTGGACCTGCCAGCATGCCCACCAGACCGCCAGCTATCAATACCACGATCGCTTTTACCGCGAGGGTACTGGACTCCGCCTGCAGAATTTTCAGCAGCGGCAGGGCGAGGATCAGCAGTAACGCGGTGGCGATAATCATCACCGTGCGGCGGCCAATCCTGTCGCTCAGGATCCCCGACGGAATAATGGTGAGCGCAAAGCCGACATTCGAAATCACGGCTATCAACAGTGCCTGGTTAAATCCGGTGTGCAGTGCCGATTGCAGATAGGTCGGCATAATCACCAGATAGGTATATCCCGCCGCCGACCAGACCATCACCCGGCCAATTCCCATCACGATCGCTTTAATTGTGGCGACGGTACTGGCCTGCGCGATCAGCGGTTTTTCCTGCTGCTGCACGAAGCTCGGCGTCTCTTCCATACTCACCCGCAACCAGAGCGCCACCACCCCCATAGGCAGGGCCAGGAAAAATGGGATACGCCAGCCCCAGTCATGCAGGGCTTCCGGTGTCAGTAAGGCAGAAAGCAAGGCGACAATACCCGCGCCCGCTAATAGCCCCAGCGCCACCGTGAACGATTGCCATGCGCCATACAGACCACGTTTACCGCGTGGTGCAAATTCCGTCATCAAGGAAACCGCACCACCGTACTCACCCCCCGCGAACAATCCCTGCAGGATGCGCAGCAACGTAACGATCAGCGGCGCAGCGATACCGATGCTGGCATAGACTGGCACAACACCGATGGCGGCGGTGGCGAGCGTCATCAGCACCAGCACAATAATCAGCGTCGGTTTGCGGCCAATACGGTCACCGATGCGGCCAAACACCACCGCCCCCAGTGGGCGGAAAAAGAAAGCGATGGCAAACGAGGCGTAGGTGAGGATCAGGCTGGTCAGCCCCGACTCGCCTTCAAGCTGGAAGAAGTTTTTCGCAATCACGGTGGCCAGAAAGCCGTAGACCGCAAACTCATACCACTCGATGAAATTACCAATGGAGCCTGCAATTAACGCACGCTTATGCGCATTCGGTTGCATAACTTTCCTCACTGAAAGGGGTAAGAATAAATTATTCAATAAAAGTGAAGTGATGAAATAGTTTATTCTTATGTTCTGTGAGCTATTTCACGAATGATATCAGCGGGATCGCTACCGTGTGACCTTACTCCCATGCAGCGTGCACAGGGTCTGTCCATACTTATCTTCATTGCCACTGAAGGAGGGAGACAGCATGCACTGGCAAACACATACCGTTTTTAACCAACCCGCACCGCTTTCGAACAGCAACCTTTTTCTCTCTGATTGTGCCCTGCGTGACGCGGTGGCCCGCGAAGGCGCGGAGTGGGATAGCGAGTTGCTGGCCAGTATAGGCCAGCAGTTAGGTACGGCGGAGTCGCTGGAGCTTGGCAGGCTGGCGAACGTCAATCCGCCGGAGTTACTTCGCTATGACCCCACCGGTGAGCGGCTGGATGATGTCCGTTTTCACCCCGCCTGGCATCTTCTGATGCAGGGGCTGTGCGCTAACCGGGTGCATAACCTGGCGTGGGAAGAGGAGGCGCGTAAAGGCTCCTTTGTCGCTCGCGCCGCGCGCTTCGTGTTGCATGCTCAGGTTGAGGCCGGAACGCTGTGCCCCGTGACCATGACGTTTGCCGCCACGCCGCTGCTGCAACAGGCGCTCCCTGAACCTTTCCAGAACTGGCTCACGCCGCTACTGAGCGATCGTTATGATCCTCACCTTGCCCCTGGAGCCCAAAAGCGCGGCCTGCTGATCGGCATGGGGATGACGGAAAAACAGGGCGGCTCGGACGTGCTCAGCAACACTACCAAAGCAGAAAAGTGCAGCGACGGCAGCTACCGGCTGGTGGGTCATAAGTGGTTTTTCTCTGTGCCACAAAGCGATGCGCATCTGGTGCTGGCGCAGGCGAAAGGCGGGTTGTCCTGCTTTTTTGTTCCTCGCTTTTTGCCTGACGGGCAGCGCAACGCCGTGCGCCTGGAGCGGCTGAAAGACAAGCTCGGCAACCGCTCTAACGCCAGCAGTGAAGCGGAGTTTCTGGATGCGTCCGGCTGGCTGTTAGGGGAAGAGGGGGAAGGGGTACGACAGATCCTTCGCATGGGCGGGTTAACGCGTTTTGACTGCGCTCTGGGCAGCCATGGGTTGATGCGTCGGGCATTATCCGTGGCGCTGTACCACGCCCATCAGCGGCAGACGTTCGGGAAAAACCTCATCGATCAGCCACTGATGCGCGACGTCCTGAGCCGCATGGCGCTGGTGCTGGAGGGGCAAACGGCGCTGCTGTTTCGTCTTGCCCGGGCGTGGGATAAACGCGCCGATCCGCAGGAGGCGGCCTGGGCGCGGCTTTTCACGCCAGCCGCGAAATTCAGCGTCTGCAAAGCGGGTATCCCGTTCGTGGCAGAGGCGATGGAGGTGTTGGGCGGCATCGGGTACTGTGAGGAGAGTGAACTCCCCCGCCTGTATCGCGAGATGCCGGTGAACAGCATCTGGGAAGGCTCAGGGAACATCATGTGTCTGGATGTTCTGCGCGTTCTGGCGAAGCAGCCGGGTATGTTCGATCTGCTGGCAGAGGATTTTGCGCAGGTGAAAGGTCAGGACAGACACTTCGATCGTAGCTGGCGGCAGCTGCAGCAAAAACTGCGTAAACCGCAGGAGGCGCAGGGGCGGGAGATTGCGCATCAGCTATTCCTGCTGGGCGCCGGAAGCCAGATGCTGCGGCACGCCTCTCCGCCCGTCGCGCAGGCGTGGTGCCGCATGATGCTGGATACCCGTGGCGGCACGCTGATGACAGAGCAGGTTCAAAGCGATCTTCTGCTGCGCGCCACGGGCAGGGTAGGCTAGTTTTTAAGCTGGAACAGGCTCACCAGTTGAGTGAGGTGCGACCCTTTCTCGCGCAGTGATTGTGCGGTTTGCTCGCTGCGCGAGACGCGATCGGCGTTGATGTGGGAGGCTTCACCAATATGGGTCATGGCCAGATTCACCTGACGGATCCCCGCCGACTGTTCGCGGGATGCATGGTTAATCTCGGTCACCAGTTGGTTGATGTTATCGATGTGCGCGATGATGGCATCCATCGCCAGACGCGTCTGCTCTGAGAGCGCATGGCCTTCACTCACCTCGTTCAGCGTGTCGCCAATCAGCTGCTCAATCTCTTTCACCGCATTAGCGCTGCGTGCCGCCAGGGCGCGCACTTCCTGCGCGACCACAGCAAACCCTTTTCCGTGTTCGCCGGCACGTGCTGCTTCAACCGCTGCATTAAGCGCCAGAATGTTAGTCTGGAAAGCAATCGACTCAATCACACGCGTGATGTCCTCGATGCGCTTTGAGGCGTCGCGAATATCGTCCATGGTGGAGACCGCGTGCGTCACCGTCTCGCCCCCCTGATGCACCGCGCGTGAGGTTTCCCCCACCAGCTGCTGCGTCTGTTCCATATTGGCCGCGTTTTGCTGCACGGTTGCGGCGAGTTGTTCCATACTCGCGGAGGTCTCTTCAACGCTACTGGCCTGTTTGTTGATCTGCTCTGAGATCTCACCGGTATCAGAGGCCAGGGCGTTAGTGCCCAGATGGATCTCGCCTGCGGCCTCGCGTACCTGCAGGACAATCTTTTGCAGACCGCCGCCGATCCCGTTGATGGCGTCGATAAGCTGGCCAACCTCATCCTGGCGGGTAACGGACAGGCTGGCACGCAGATCGCCTGCCGCATATTGTCGGGCAAGGTGAATGACGTTACGCAGCGGGCGCGTCAGCATGCGACGAATGATGATGACAAACAGCGCGGCAAACAGGACTGACAGCGCCACGCCTGCCAGCAGGAAGCGATCCCGCATCGTGGTGACGCTTGAGAGCAGGACCGATTTATCCACTTCCCCCACGATGGTCCAGTTCCAGCCCGGCAGCGGCGTGTAGGCCATTTTCAGTATGCGACCACCCTCACTTGTGCGTTCCAGCGTGCCCGGGGTATTGCTCAGCAACTGCTGCTGGGTGGCGCTATCCCACTCAGGAAGCTGGCCTTCCCGGTTCGAGTGGAACAGGAACTGTCCGCGCGTTTTACCGTTACTGCGATCCAGCACGAAGAAGTGTCCGCTGTCACCTAAACGACGATTGAGGATTTTTTCGCGCATCACGTTCCAGGAGTGGGTGATATCCACACCAACAAAAATGATCCCAATGACCTGGCCTTCTGCGTTTTTGAACGGCTGATACTGGGTGATATAGCGCTTACCGAACAGCAACGCCAGACCGCGATAGACCTCGCCTTTAGTGACGGCCGCATAGGCCGGGCTGGCAGTGTCCAGAACGGTACCCATTGCCCGATCGCCGTTCTCTTTGCGCAGGGAGGTGGCGACACGGACAAAGTCATTACCGCTGCGCACAAACAGGGTCGAGATTGCCCCTGTACGCGTCAGGAAGTCGTCAGAAAGTGCATTGTTTTCATGCAGGCCTGTTTCACCCCCTTTCAGAAGGGGAACGCTAAGCCCGTTAATGGTCTGGAGTTGATTGCTGTCGACGCTCAAAGGTTGCGGTAAAAAGGTGGTGAACAGCCGGGTATAGCTCTCGACCTCTTCGCTCAGACTGGTGTTAAACATCTGTACCATATCAACCATACCGGTGGACTGGTTATGCAGGTCTTCTACCGCAAGGGCTTCAAGCTGCTGGCTGGCTTTATGGCTGAGCAGAAAAGTGAAAAGCAGAAACAGCGTGGCGACACTCACGCCGGTAAGAAGCGATAGCTTTGCGCCCAGACCCGCACGGCGAAAAAAAGTGATCATAAATTGCTCATTATTGAAAAGGTATGGCTTTTCAACGGCAGGACAGAGCTAACATTTACTTAAATAATGTAACAAAATGTTATCAAATTGTTTTTAATGGGGTTGAAAGAAGATAAAAAATTTGCAAAAACGAAAAGCCACTAAAAATAAGTGGCTTTTATCGGAGGTGATTAGACGTAGAGAATAGCCTGTACGCGCCAATGATCGCTGCGGTTATCTTCCTGCATCTGAATAATTCGATACCAGGAAGCGCCCTGTTCATCGGCTTTTAGTGCCACGACACGTTCAACATCTTGCGGGCTCCCCGAAATGTTATTGACGGAGATCTGGCCAATTTCATTAAGGCCAGTGACGCAATCAGCACTGGCGAATTCTGCTGACTGTGCAGTCGTGCTCACAAGGCCTACTGAGAGCAACAGAGAGGTCAAAGCAAGAGTTCGTTTCATTGTCAGCTCCATTTCACATTTCTCCGGTATCCGCCGGGCACTCCTTCGCAAATAAACTCACTTCCATTGTCGCGGGCATGGAGTTTATTGTGGACAGGAAAATGTCTATGGACGCAAAGCAGTGTAAATGTCGTTCAAATGAAGATCGTTATTTACGGTACAATATAGCCTGTGAATACCACTGTCCCGTAATGATGGTTTCATCGATCATAATGATGACGTAATAATCTGCTTTGGCGGCGGCAGCTTGCGCCTTTATTGACTCCTCTGCGTCATCCGGGGATCCGCGAACCAGCGCGGAGACGGTACCCATTCGTTGTAATCCTTCCGTCTGATTACGACGAATCTCCTGCGGATGGTCGGCGACCGGGGGAGCGGGTTGCGGTGTCCCCTGTAGTGCACTACAGCCACTGAGCAGAACCAGCAACAATAAAGTAGCAAACCTGCGCATAACCATATCTCGTTTCCTGATAGCCATAGTGTAGTTGTCTGCGAATTTAGTTTTGGGGGAATGTTCCCGAAGTGTTATCTGGTACGTATATTTCGAATGAAAATATCGTGAAAGCGTAATCCAGTTCTCAACATTATGAATCATTCATCAGCACAGGGTCGACAATGATTGAAATTGAAACTCGCCGCCTCGGCAATCATGAAATATTACACGCTATCCCTGCGGGAAAAAGTGCACACCCGCTGCCTGTTGTGGTTTTTTATCATGGCTTTACTTCATCGAAGCTGGTGTACAGCTACTTTGCGGTGGCGCTGGCGCAGGCCGGTTTTCGGGTAGTGATGCCGGATGCGCCCGATCACGGTGCACGTTTTACCGGTGATGAACAGGTGCGACTGGGGCAGTTCTGGCAGATTTTGCACGGTAGCCTGAGCGAATTTGCCGGGCTGCGCGATGCGCTTTACCAGGCGGGGCTGGTGGCGGATCAACGGCTGTCGGTGGCCGGCGCATCGATGGGGGGAATGACGGCGCTCGGGATCATGGCTCGTCACCCTGAGGTGACCTCCGTGGCGTGCCTGATGGGCTCGGGCTATTTTACGTCATTGTCAAAAACGCTGTTTCCACCGCAAGACGCGACGGAGATTGATGCGTTGCTGGCCGAATGGGAAGTGACCCGGGCGTTACCGCGTCTGGCCGATCGCCCGTTGCTGTTGTGGCATGGTGACGCGGATGATGTGGTGCCCCCTGGCGAAACCTTCCGTCTCCAGCAGGCACTACAGCGCGAAGGGCTGGACGGCAACCTGACCTGTCTGTGGGAAGCGGGGGTTCGCCACCGTATCACGCCAGCGGCACTGGACGCGACGGTGGACTTTTTCCGCCAGCATCTTTAAACGCGCAGAATTTTGACGCCCTGATCTTCCAGCTTCTGGAGGATCTCTGGATTGGCATGTTTGCCGGTGATCACCATGTCAATCTGTTCGGCGCGGCTGAACAACATGCCAGCTCGCTCGCCGACTTTGCTGCTGTCGACCAGTACGACGAGCTTGCCCACGACGTTAAGCATATTTTGCTCTGCCATCGCGGTTAACATGTCGGTTTTGTAGAGCCCGTCCGCGGTCAGGCCTTTGCCGCTGGTAAACATCCAGTGCCCGGCATAGAGGCTGTTTTCGCTATCCTGGGGACTGAGGGTAATGGATTGGCTCTTATTATACTGGCCGCCCATGATCACCACGCTTTCATGTTCCTGATCGATAAGATAGTTAGCCAGCGGCAGATAGTTGGTGATGATTTGCACCGGTTTACCGCACATCTCGCGGCCCAGCAGAAATGCCGTGGAGCCGCAGTTGATCACCACGCTCTCACCCGGGTTGACCAGCTGTGATGCGGCTCTGGCAATGCGCACTTTTTCATCGTGGTTTTGCGCCTGATGGATGTTCATCGGCGTCCAGCGGGGGCGCTGCTGGCTGATGGCTTCCGCGCCGTTACGGACTTTTTTCAGCTTACCGCTCTCATCCAGCTTGTTGATATCGCGTCGTGCCGTCGCGGGTGAAATCCCTAAACGTTCGATCACTTTCTCGACGGTGATAAACCCTGTTTGCGCCAGGAGTTCCAGTAATATTTGATGCCGTTGCGCTTCCGTCATGAGCTATTCCGATAAGAATTGATTTGAAAAGATGATATTTGAAATAGCGTGAAATTACTAAAATAAATATGAAATCGCCAGACGCAAGGTCTGGCGATGGGGGTTCACAATCAGAGGAATGACTTGAACGGCAGGTCTGGCTCAATGGTGAAGCAATCATCGAAACCGCGTGGGTAGTGATACTCAAGATTGTCTTTGTCCAGCGGCCAGGTGAACTTGCCGCCTACCTGCCAGATAAATGGCTTGAAGCCATACTTCAGACGGTCTTTTTTCATCTCCCACAGTACGCGGATTTCCTGCGGATCGGCCTGGAAGTTTGACCAGATATCGTGGTGGAAAGGAATCACTACTTTGGTGTTCAGTGCTTCCGCCATGCGCAGCATATCGGCGCTGGTCATTTTGTCGGTAATGCCGCGTGGGTTCTCGCCGTAGGAGCCCAGCGCCACGTCAATCTGATGCTCGTTACCATGCTTCGCATAGTAGTTGGAGTAGTGAGAATCACCGCTGTGGTACAGGGAACCGCCAGGGGTTTTAAACAGATAGTTCACTGCTCGCTCATCCATGCCGTCTGGCAGGACGCCTGCGGCTTTCTGGTCAGCAGGCAGGGTGATCAGCGCGGTGCGGTCAAAGGCGTCCAGCGCATGGATTTCAATGTCTTTAATTTTCACCACGTCGCCTGGCTTCATGACGATGCAGCGCTCTTTCGGCACGCCCCAGCCAATCCAGAGATCCACGCAGGTCTGCGGTCCGATAAACGGCACGTCGTCCGCGCAGTTTTGCATCACCGCTGCCGCCACGTTTACATCGATGTGATCGTTATGATCGTGGGTGGAAAGGACGGCGTCGATCTGGCGAATGGCAAAAGGATCAAGAACAAACGGCGTGGTGCGCAGGTTCGGCTGCAATTTTTCGACACCTGCCATGCGCTGCATCTGATGACCTTTTTTCATCAGCGGATTGCCGTGGCTCTGTTTGCCGGTACCGCACCAGAAATCAACGCAAATATTGGCGCCACCCTCGGATTTCAGCCAGATGCCGGTGCAACCCAGCCACCACATTGCAAACGTACCAGGAGCAACCTGTTCCTGCTCAATCTCTTCATTCAGCCAGCTGCCCCATTCCGGGAAGGTGCTCAGAATCCATGATTCACGGGTGATGGTGTTCACTTTACTCATCGTTTTGACTCCTGGTTTAATCAAAAGTAATCATTTGGTGATTTGTTGTGATTAATGCTGGCACTCTTTGAATCGGTTTGCAACGAGTAAATTGAGGAAATTTATCTTCGCCATGTCACAGATAATCTATTTCTACGTTGTTAGAATATCCGCTATCAATGATTAATTGTATGAATTATAAGAAATAATTTATGCGTGGGAATCGATAGTGAAAATCATTACTAACACCATGGAAATAATTTGCGTGATACGTCACAAATAATCAAATGCAATCTTGTGGTGATTATTTGTGATTAATAGAGTGATGGCACCAACCGAACAGGGATTACCCCCTCGTGTTCTCATTTCTGGAGAGAGTTATGGAGATCCTCTACAACGTCTTTACCGTTTTTTTTAATCAGGTAATGACTAACGCCCCGCTTTTGCTGGGTATCGTGACGTGCCTGGGCTACATCCTTCTGCGCAAAAGCGTCAGCGTCATCATCAAAGGCACAATCAAAACCATCATCGGTTTCATGCTGTTGCAGGCGGGTTCAGGCATTCTGACCAGCACGTTTAAGCCTGTGGTTGCGAAGATGTCGGAGGTGTATGGCATCAACGGCGCCATCTCTGACACCTACGCGTCGATGATGGCCACCATTGACCGCATGGGAGATGCCTACAGTTGGGTGGGGTACGCGGTACTGCTCGCGCTGGCGTTGAACATCATTTACGTTCTGCTGCGCCGCATTACCGGCATCCGCACCATCATGCTGACTGGCCACATTATGTTCCAGCAGGCCGGGCTGATTGCGGTGTCTCTCTATATCTTCGGCTACCCCATGTGGACCACGATTATCTGCACCGCCGTGCTGGTATCGCTCTACTGGGGTATCACCTCCAACATGATGTACAAGCCGACGCAGGAGGTCACCGACGGCTGCGGTTTCTCCATCGGCCATCAGCAGCAATTCGCCGCCTGGCTCGCTTACAAGGTCGCACCGTACCTGGGTAAAAAAGAGGACAGCGTAGAAGATCTCAAGCTGCCGGGCTGGCTGAATATCTTCCACGACAACATCGTCTCAACCGCCATTGTGATGACCATCTTCTTCGGCGCCATTCTGCTCTCCTTCGGGATTGATGTGGTGCAGGCGATGGCGGGGAAAACGCACTGGACGGTTTATATCCTGCAGACCGGTTTCTCCTTCGCGGTAGCCATTTTCATCATTACCCAGGGCGTCCGTATGTTTGTTGCCGAACTGTCTGAAGCCTTTAATGGGATCTCCCAACGCCTGATTCCTGGTGCGGTGCTGGCCATTGACTGTGCGGCTATCTACAGCTTTGCGCCAAACGCGGTGGTCTGGGGCTTCATGTGGGGCACCATCGGCCAGTTGATTGCGGTAGGCATTCTGGTCGGCTGCGGCTCATCCATTCTGATTATTCCTGGCTTTATCCCAATGTTCTTCTCCAACGCCACCATTGGCGTCTTTGCTAACCACTTTGGCGGCTGGCGCGCGGCGCTCAAGATCTGTCTGGTGATGGGCATGGTGGAGATCTTCGGTTGCGTGTGGGCGGTCAAGCTCACCGGTATGAGCGCCTGGATGGGCATGGCGGACTGGTCAATCCTGGCACCGCCAATGATGCAGGGCTTTGCGTCCGTCGGGCTGGTCTTTATGGCCGTCATCATCCTGATTGCTCTGGCTTATATGTTCTTCGCTGGCCGTTCTCTGCGAGCTGAAGAAGATGCGGAAAAACAAACAGCAGAAGTTTCTGCTCACTAAGGAGTTTCGATTATGACCGTACGTATCCTGGCTGTGTGTGGCAATGGGCAAGGTAGCTCCATGATCATGAAGATGAAAGTGGACCAGTTTTTAACCCAGTCAAACATTGACCACACGGTGAACAGCTGCGCGGTGGGTGAATACAAAAGTGAACTGAACGGCGCCGATATCATCATCGCGTCTACCCATATCGCCGGTGAAATTAGCGTTTCTGGCAACAAATATGTCGTGGGGGTACGCAACATGCTCTCCCCTGCGGATTTCGGCCCAAAACTGCTGGAAGTGATCAAAGCGCACTTCCCCCAGGACGTGAAGTAAGGACGCCACATGAAACTACGTGATTCGCTGGCAGAGAATAACTCCATCCTTTTACAGGCTGAGGCCAGCACCTGGCAGGAGGCGGTCAAGCTGAGTGTGGATCTGCTGGTTAAGGCTGACGTTGTCGAGCCGCGGTATTACCAGGCCATTCTTGATGGTGTGGCGCAGCATGGTCCTTATTTTGTCATCGCGCCAGGCCTTGCAATGCCGCACGGTCGCCCGGAAGAGGGCGTGAAGAAAACCGGCTTCGCGCTGGTAACGCTGAAAACGCCTTTGGTGTTTAACCATGAAGATAACGACCCGGTCGACATCCTGATCACCATGGCGGCCGTCGATGCCAATACCCATCAGGAGGTTGGCATCATGCAGATCGTTAACCTGTTTGATGACGAAGCGAATTTTGATCGTTTACGCGCCTGCCGCACCGAGCAGGAAGTGCTGGATTTAATTGATAACGCCACAGCGGCGGCAGTTTAAGAAGGAATTGAAAATGTCATTACCAATGTTGCAGGTTGCGCTGGATAACCAGACTCTGTCCGATGCGTATGAAACCACCCGCTTGATTGCGGAAGAGGTGGATATTATCGAAGTTGGCACCATTCTGTGCGTAGGCGAAGGCGTGCGCGCCGTTCGCGATCTGAAAGCGCTCTACCCGCATAAAATCGTGCTGGCCGATGCCAAGATTGCCGATGCAGGCAAAATTCTCTCCCGCATGTGTTTTGAAGCCAATGCCGACTGGGTCACTGTGATCTGCTGTGCAGATATCAACACCGCGAAAGGCGCGCTGGATGTGGCGAAAGAGTTCAACGGCGATGTGCAGATTGAACTGACCGGTTTCTGGACCTGGGAACAGGCCCAGGCGTGGCGCGATGCTGGCATTCAGCAGGTCGTTTATCACCGCAGCCGTGATGCGCAGGCCGCCGGCGTGGCGTGGGGAGACGCGGATATCAGCGCCATTAAACGTCTTGCTGACATGGGCTTTAAAGTGACCGTTACCGGTGGCCTGGCACTGGAAGATCTGCCGCTGTTTAAAGGTATCCCTATTCACGTCTTTATCGCGGGTCGCAGCATTCGTGATGCCGCGTCTCCGGTGGAAGCGGCACGTCAGTTCAAACGTTCAATCGCTCAGCTTTGGGGCTAAGGAGCGGGTATGTTGTCTAAACAGGTCCCGCTTGGCATCTATGAAAAGGCACTCCCCGCGGGGGAGTGCTGGCTGGAGCGGCTGAAACTGGCGAAACAGTTGGGCTTCGATTTTGTCGAAATGTCCGTGGATGAGACCGATGAACGTCTTTCTCGCCTCGACTGGAGCCAGGAGCAGCGTCTGGCGCTGGTGAGTGCGGTCGCCGAAACGGGCGTACGCGTGCCGTCTATGTGCCTCAGTGCCCACCGCCGTTTTCCGCTTGGCAGTGAAGATGATACGGTGCGCGCGCAGGGGCTGGAGATCATGCGTAAAGCCATTCGTTTTGCGCAGGATGTCGGTATTCGCGTGATACAGCTTGCCGGGTATGACGTTTACTATCAGGAAGCCAATGACGAAACGCGTCGTCGTTTTCGTGATGGCCTGAAAGAGAGCGTTGAAATGGCTAGCCGCGCGCAGGTGACGCTGGCGATGGAGATCATGGACTATCCGTTAATGAACTCGATCAGCAAGGCGCTGGGCTACGCGCATTACCTGAACAATCCGTGGTTCCAGCTCTATCCTGATATCGGCAACCTGTCGGCGTGGGATAACGACGTACAAATGGAACTGCTGGCGGGGATGGGCCATATCGTCGCGGTTCATGTCAAAGATACACGTCCTGGGGTATTCAAAAACGTTCCGTTCGGCACCGGGGTGGTGGATTTCGAACGGTGCTTCCAGACGCTCAAACAGGCGGGGTATTGCGGACCTTACCTGATTGAGATGTGGAGCGAAACGTCAGATGACCCGGCAGGTGAAGTGGCTAACGCTCGCGACTGGGTACGGGAACGTATGGCACGGGCCGGCTTACTGGAGGCTGAATATGCTTAAGCTTAAGCAGCAGGTCTTTGACGCTAATATGGATCTCCCGCGCTATGGGCTGGTGACCTTCACCTGGGGAAACGTCAGTGCTATCGACCGAGAGCAGGGACTCGTGGTGATTAAGCCCAGCGGCGTGGCATATGACGCTATGAGCCCTGACGATATGGTGGTGGTTGATCTTGAGGGTAACGTTGTCGACGGCAAATGGCGCCCTTCTTCAGATACAGCCACCCATCTGGCGCTCTACCGGCGTTACCCTTCTCTGGGCGGCGTAGTGCATACGCACTCTACGCATGCCACAGCCTGGGCACAGGCGGGGCGGGCCATTCCGGCTCTGGGAACCACGCATGCGGACTACTTCTTTGGTGACATTCCGTGCACGCGGGCACTCACGCAGACCGAAGTGGAAGGCGAGTACGAGCTCAACACCGGCAACGTCATTATCGAAACGCTGGGTGAGAGCGAACCGCTGCATACGCCGGGCATTGTGGTCTATCAGCACGGTCCGTTCGCCTGGGGAAAAGATGCGCATGACGCTGTCCATAACGCGGTAGTCATGGAGGAGGTGGCACGCATGGCGTGGATTGCTCGCGGCATCAACCCACAGCTTCAGGGCATTGATGATTACCTGATGAACAAGCATTTCATGCGCAAGCACGGTCCAAACGCCTATTACGGGCAGAAGTGAATAAGTGCTCTGGAATACCGAAAACAGTTGTAACCGGTGGTATTTCAGAGCCATTCATAAAAAAAATCCCCCGGGCAGGGGGCAAGTAAAACGATGGCTATATTCTTTTTGTTGGCTTTCCTGGTGCTAGCGTCAAAGCGTTATCGGTAAATATCTGCACTGGCGTGCATGTTGCCGTTGCTGCCGGGCTCACGCATCAGAATAACGTGGTAGTAATTTGCCCCCTGAGCATCGGTTTCTTTATTAAGCGCCTGATCTGCTTCGCTCTCTGTGGCGAAATGGTGATTGATGTAAATAACGCCTAAGCTCTGCACATCATCCATGTTTCTGGCCTGTCGGCTGTCTATCTTGATGGCTGCCATCGCGTTTGCACTGAGCAAAAGCGCAGCCATTACGGCAATTGCGATTTTTTTCATGATATGCGCTCCACGACTGCGTGCTGTGAGACGGTTGTTTGCTCCCCTCCGGTATTTATATGGTCTCTGATTAAAGTGTAATCACTTATCCGGCGAGGATGCGTGACCATTTCTGATGCAGTTGTTCAAAAAAACAACGCTTCCTGATGTGGTTAATTTTAAATCACCCACTTAGACCAGCTTTGCGGTTTGTGCGAATTATTTGTGCAATCAGCTTGAGTTTCCTGGGGGGCGCAAGTATTATGACGCGTCAATTTTTCAGCCGACCTTTAACACGTTCCTTGCCTCCCCGGGCCTCGGCTGACCCAGACAGGAGGCTGAATAATCCGTAAGGAGCAATTCGATGCGTCA +>419442076 +TGACGCATCGAATTGCTCCTTACGGATTATTCAGCCTCCTGTCTGGGTCAGCCGAGGCCCGGGGAGGCAAGGAACGTGTTAAAGGTCGGCTGAAAAATTGACGCGTCATAATACTTGCGCCCCCCAGGAAACTCAAGCTGATTGCACAAATAATTCGCACAAACCGCAAAGCTGGTCTAAGTGGGTGATTTAAAATTAACCACATCAGGAAGCGTTGTTTTTTTGAACAACTGCATCAGAAATGGTCACGCATCCTCGCCGGATAAGTGATTACACTTTAATCAGAGACCATATAAATACCGGAGGGGAGCAAACAACCGTCTCACAGCACGCAGTCGTGGAGCGCATATCATGAAAAAAATCGCAATTGCCGTAATGGCTGCGCTTTTGCTCAGTGCAAACGCGATGGCAGCCATCAAGATAGACAGCCGACAGGCCAGAAACATGGATGATGTGCAGAGCTTAGGCGTTATTTACATCAATCACCATTTCGCCACAGAGAGCGAAGCAGATCAGGCGCTTAATAAAGAAACCGATGCTCAGGGGGCAAATTACTACCACGTTATTCTGATGCGTGAGCCCGGCAGCAACGGCAACATGCACGCCAGTGCAGATATTTACCGATAACGCTTTGACGCTAGCACCAGGAAAGCCAACAAAAAGAATATAGCCATCGTTTTACTTGCCCCCTGCCCGGGGGATTTTTTTTATGAATGGCTCTGAAATACCACCGGTTACAACTGTTTTCGGTATTCCAGAGCACTTATTCACTTCTGCCCGTAATAGGCGTTTGGACCGTGCTTGCGCATGAAATGCTTGTTCATCAGGTAATCATCAATGCCCTGAAGCTGTGGGTTGATGCCGCGAGCAATCCACGCCATGCGTGCCACCTCCTCCATGACTACCGCGTTATGGACAGCGTCATGCGCATCTTTTCCCCAGGCGAACGGACCGTGCTGATAGACCACAATGCCCGGCGTATGCAGCGGTTCGCTCTCACCCAGCGTTTCGATAATGACGTTGCCGGTGTTGAGCTCGTACTCGCCTTCCACTTCGGTCTGCGTGAGTGCCCGCGTGCACGGAATGTCACCAAAGAAGTAGTCCGCATGCGTGGTTCCCAGAGCCGGAATGGCCCGCCCCGCCTGTGCCCAGGCTGTGGCATGCGTAGAGTGCGTATGCACTACGCCGCCCAGAGAAGGGTAACGCCGGTAGAGCGCCAGATGGGTGGCTGTATCTGAAGAAGGGCGCCATTTGCCGTCGACAACGTTACCCTCAAGATCAACCACCACCATATCGTCAGGGCTCATAGCGTCATATGCCACGCCGCTGGGCTTAATCACCACGAGTCCCTGCTCTCGGTCGATAGCACTGACGTTTCCCCAGGTGAAGGTCACCAGCCCATAGCGCGGGAGATCCATATTAGCGTCAAAGACCTGCTGCTTAAGCTTAAGCATATTCAGCCTCCAGTAAGCCGGCCCGTGCCATACGTTCCCGTACCCAGTCGCGAGCGTTAGCCACTTCACCTGCCGGGTCATCTGACGTTTCGCTCCACATCTCAATCAGGTAAGGTCCGCAATACCCCGCCTGTTTGAGCGTCTGGAAGCACCGTTCGAAATCCACCACCCCGGTGCCGAACGGAACGTTTTTGAATACCCCAGGACGTGTATCTTTGACATGAACCGCGACGATATGGCCCATCCCCGCCAGCAGTTCCATTTGTACGTCGTTATCCCACGCCGACAGGTTGCCGATATCAGGATAGAGCTGGAACCACGGATTGTTCAGGTAATGCGCGTAGCCCAGCGCCTTGCTGATCGAGTTCATTAACGGATAGTCCATGATCTCCATCGCCAGCGTCACCTGCGCGCGGCTAGCCATTTCAACGCTCTCTTTCAGGCCATCACGAAAACGACGACGCGTTTCGTCATTGGCTTCCTGATAGTAAACGTCATACCCGGCAAGCTGTATCACGCGAATACCGACATCCTGCGCAAAACGAATGGCTTTACGCATGATCTCCAGCCCCTGCGCGCGCACCGTATCATCTTCACTGCCAAGCGGAAAACGGCGGTGGGCACTGAGGCACATAGACGGCACGCGTACGCCCGTTTCGGCGACCGCACTCACCAGCGCCAGACGCTGCTCCTGGCTCCAGTCGAGGCGAGAAAGACGTTCATCGGTCTCATCCACGGACATTTCGACAAAATCGAAGCCCAACTGTTTCGCCAGTTTCAGCCGCTCCAGCCAGCACTCCCCCGCGGGGAGTGCCTTTTCATAGATGCCAAGCGGGACCTGTTTAGACAACATACCCGCTCCTTAGCCCCAAAGCTGAGCGATTGAACGTTTGAACTGACGTGCCGCTTCCACCGGAGACGCGGCATCACGAATGCTGCGACCCGCGATAAAGACGTGAATAGGGATACCTTTAAACAGCGGCAGATCTTCCAGTGCCAGGCCACCGGTAACGGTCACTTTAAAGCCCATGTCAGCAAGACGTTTAATGGCGCTGATATCCGCGTCTCCCCACGCCACGCCGGCGGCCTGCGCATCACGGCTGCGGTGATAAACGACCTGCTGAATGCCAGCATCGCGCCACGCCTGGGCCTGTTCCCAGGTCCAGAAACCGGTCAGTTCAATCTGCACATCGCCGTTGAACTCTTTCGCCACATCCAGCGCGCCTTTCGCGGTGTTGATATCTGCACAGCAGATCACAGTGACCCAGTCGGCATTGGCTTCAAAACACATGCGGGAGAGAATTTTGCCTGCATCGGCAATCTTGGCATCGGCCAGCACGATTTTATGCGGGTAGAGCGCTTTCAGATCGCGAACGGCGCGCACGCCTTCGCCTACGCACAGAATGGTGCCAACTTCGATAATATCCACCTCTTCCGCAATCAAGCGGGTGGTTTCATACGCATCGGACAGAGTCTGGTTATCCAGCGCAACCTGCAACATTGGTAATGACATTTTCAATTCCTTCTTAAACTGCCGCCGCTGTGGCGTTATCAATTAAATCCAGCACTTCCTGCTCGGTGCGGCAGGCGCGTAAACGATCAAAATTCGCTTCGTCATCAAACAGGTTAACGATCTGCATGATGCCAACCTCCTGATGGGTATTGGCATCGACGGCCGCCATGGTGATCAGGATGTCGACCGGGTCGTTATCTTCATGGTTAAACACCAAAGGCGTTTTCAGCGTTACCAGCGCGAAGCCGGTTTTCTTCACGCCCTCTTCCGGGCGACCGTGCGGCATTGCAAGGCCTGGCGCGATGACAAAATAAGGACCATGCTGCGCCACACCATCAAGAATGGCCTGGTAATACCGCGGCTCGACAACGTCAGCCTTAACCAGCAGATCCACACTCAGCTTGACCGCCTCCTGCCAGGTGCTGGCCTCAGCCTGTAAAAGGATGGAGTTATTCTCTGCCAGCGAATCACGTAGTTTCATGTGGCGTCCTTACTTCACGTCCTGGGGGAAGTGCGCTTTGATCACTTCCAGCAGTTTTGGGCCGAAATCCGCAGGGGAGAGCATGTTGCGTACCCCCACGACATATTTGTTGCCAGAAACGCTAATTTCACCGGCGATATGGGTAGACGCGATGATGATATCGGCGCCGTTCAGTTCACTTTTGTATTCACCCACCGCGCAGCTGTTCACCGTGTGGTCAATGTTTGACTGGGTTAAAAACTGGTCCACTTTCATCTTCATGATCATGGAGCTACCTTGCCCATTGCCACACACAGCCAGGATACGTACGGTCATAATCGAAACTCCTTAGTGAGCAGAAACTTCTGCTGTTTGTTTTTCCGCATCTTCTTCAGCTCGCAGAGAACGGCCAGCGAAGAACATATAAGCCAGAGCAATCAGGATGATGACGGCCATAAAGACCAGCCCGACGGACGCAAAGCCCTGCATCATTGGCGGTGCCAGGATTGACCAGTCCGCCATGCCCATCCAGGCGCTCATACCGGTGAGCTTGACCGCCCACACGCAACCGAAGATCTCCACCATGCCCATCACCAGACAGATCTTGAGCGCCGCGCGCCAGCCGCCAAAGTGGTTAGCAAAGACGCCAATGGTGGCGTTGGAGAAGAACATTGGGATAAAGCCAGGAATAATCAGAATGGATGAGCCGCAGCCGACCAGAATGCCTACCGCAATCAACTGGCCGATGGTGCCCCACATGAAGCCCCAGACCACCGCGTTTGGCGCAAAGCTGTAGATAGCCGCACAGTCAATGGCCAGCACCGCACCAGGAATCAGGCGTTGGGAGATCCCATTAAAGGCTTCAGACAGTTCGGCAACAAACATACGGACGCCCTGGGTAATGATGAAAATGGCTACCGCGAAGGAGAAACCGGTCTGCAGGATATAAACCGTCCAGTGCGTTTTCCCCGCCATCGCCTGCACCACATCAATCCCGAAGGAGAGCAGAATGGCGCCGAAGAAGATGGTCATCACAATGGCGGTTGAGACGATGTTGTCGTGGAAGATATTCAGCCAGCCCGGCAGCTTGAGATCTTCTACGCTGTCCTCTTTTTTACCCAGGTACGGTGCGACCTTGTAAGCGAGCCAGGCGGCGAATTGCTGCTGATGGCCGATGGAGAAACCGCAGCCGTCGGTGACCTCCTGCGTCGGCTTGTACATCATGTTGGAGGTGATACCCCAGTAGAGCGATACCAGCACGGCGGTGCAGATAATCGTGGTCCACATGGGGTAGCCGAAGATATAGAGAGACACCGCAATCAGCCCGGCCTGCTGGAACATAATGTGGCCAGTCAGCATGATGGTGCGGATGCCGGTAATGCGGCGCAGCAGAACGTAAATGATGTTCAACGCCAGCGCGAGCAGTACCGCGTACCCCACCCAACTGTAGGCATCTCCCATGCGGTCAATGGTGGCCATCATCGACGCGTAGGTGTCAGAGATGGCGCCGTTGATGCCATACACCTCCGACATCTTCGCAACCACAGGCTTAAACGTGCTGGTCAGAATGCCTGAACCCGCCTGCAACAGCATGAAACCGATGATGGTTTTGATTGTGCCTTTGATGATGACGCTGACGCTTTTGCGCAGAAGGATGTAGCCCAGGCACGTCACGATACCCAGCAAAAGCGGGGCGTTAGTCATTACCTGATTAAAAAAAACGGTAAAGACGTTGTAGAGGATCTCCATAACTCTCTCCAGAAATGAGAACACGAGGGGGTAATCCCTGTTCGGTTGGTGCCATCACTCTATTAATCACAAATAATCACCACAAGATTGCATTTGATTATTTGTGACGTATCACGCAAATTATTTCCATGGTGTTAGTAATGATTTTCACTATCGATTCCCACGCATAAATTATTTCTTATAATTCATACAATTAATCATTGATAGCGGATATTCTAACAACGTAGAAATAGATTATCTGTGACATGGCGAAGATAAATTTCCTCAATTTACTCGTTGCAAACCGATTCAAAGAGTGCCAGCATTAATCACAACAAATCACCAAATGATTACTTTTGATTAAACCAGGAGTCAAAACGATGAGTAAAGTGAACACCATCACCCGTGAATCATGGATTCTGAGCACCTTCCCGGAATGGGGCAGCTGGCTGAATGAAGAGATTGAGCAGGAACAGGTTGCTCCTGGTACGTTTGCAATGTGGTGGCTGGGTTGCACCGGCATCTGGCTGAAATCCGAGGGTGGCGCCAATATTTGCGTTGATTTCTGGTGCGGTACCGGCAAACAGAGCCACGGCAATCCGCTGATGAAAAAAGGTCATCAGATGCAGCGCATGGCAGGTGTCGAAAAATTGCAGCCGAACCTGCGCACCACGCCGTTTGTTCTTGATCCTTTTGCCATTCGCCAGATCGACGCCGTCCTTTCCACCCACGATCATAACGATCACATCGATGTAAACGTGGCGGCAGCGGTGATGCAAAACTGCGCGGACGACGTGCCGTTTATCGGACCGCAGACCTGCGTGGATCTCTGGATTGGCTGGGGCGTGCCGAAAGAGCGCTGCATCGTCATGAAGCCAGGCGACGTGGTGAAAATTAAAGACATTGAAATCCATGCGCTGGACGCCTTTGACCGCACCGCGCTGATCACCCTGCCTGCTGACCAGAAAGCCGCAGGCGTCCTGCCAGACGGCATGGATGAGCGAGCAGTGAACTATCTGTTTAAAACCCCTGGCGGTTCCCTGTACCACAGCGGTGATTCTCACTACTCCAACTACTATGCGAAGCATGGTAACGAGCATCAGATTGACGTGGCGCTGGGCTCCTACGGCGAGAACCCACGCGGCATTACCGACAAAATGACCAGCGCCGATATGCTGCGCATGGCGGAAGCACTGAACACCAAAGTAGTGATTCCTTTCCACCACGATATCTGGTCAAACTTCCAGGCCGATCCGCAGGAAATCCGCGTACTGTGGGAGATGAAAAAAGACCGTCTGAAGTATGGCTTCAAGCCATTTATCTGGCAGGTAGGCGGCAAGTTCACCTGGCCGCTGGACAAAGACAATCTTGAGTATCACTACCCACGCGGTTTCGATGATTGCTTCACCATTGAGCCAGACCTGCCGTTCAAGTCATTCCTCTGATTGTGAACCCCCATCGCCAGACCTTGCGTCTGGCGATTTCATATTTATTTTAGTAATTTCACGCTATTTCAAATATCATCTTTTCAAATCAATTCTTATCGGAATAGCTCATGACGGAAGCGCAACGGCATCAAATATTACTGGAACTCCTGGCGCAAACAGGGTTTATCACCGTCGAGAAAGTGATCGAACGTTTAGGGATTTCACCCGCGACGGCACGACGCGATATCAACAAGCTGGATGAGAGCGGTAAGCTGAAAAAAGTCCGTAACGGCGCGGAAGCCATCAGCCAGCAGCGCCCCCGCTGGACGCCGATGAACATCCATCAGGCGCAAAACCACGATGAAAAAGTGCGCATTGCCAGAGCCGCATCACAGCTGGTCAACCCGGGTGAGAGCGTGGTGATCAACTGCGGCTCCACGGCATTTCTGCTGGGCCGCGAGATGTGCGGTAAACCGGTGCAAATCATCACCAACTATCTGCCGCTGGCTAACTATCTTATCGATCAGGAACATGAAAGCGTGGTGATCATGGGCGGCCAGTATAATAAGAGCCAATCCATTACCCTCAGTCCCCAGGATAGCGAAAACAGCCTCTATGCCGGGCACTGGATGTTTACCAGCGGCAAAGGCCTGACCGCGGACGGGCTCTACAAAACCGACATGTTAACCGCGATGGCAGAGCAAAATATGCTTAACGTCGTGGGCAAGCTCGTCGTACTGGTCGACAGCAGCAAAGTCGGCGAGCGAGCTGGCATGTTGTTCAGCCGCGCCGAACAGATTGACATGGTGATCACCGGCAAACATGCCAATCCAGAGATCCTCCAGAAGCTGGAAGATCAGGGCGTCAAAATTCTGCGCGTTTAAAGATGCTGGCGGAAAAAGTCCACCGTCGCGTCCAGTGCCGCTGGCGTGATACGGTGGCGAACCCCCGCTTCCCACAGACAGGTCAGGTTGCCGTCCAGCCCTTCGCGCTGTAGTGCCTGCTGGAGACGGAAGGTTTCGCCAGGGGGCACCACATCATCCGCGTCACCATGCCACAACAGCAACGGGCGATCGGCCAGACGCGGTAACGCCCGGGTCACTTCCCATTCGGCCAGCAACGCATCAATCTCCGTCGCGTCTTGCGGTGGAAACAGCGTTTTTGACAATGACGTAAAATAGCCCGAGCCCATCAGGCACGCCACGGAGGTCACCTCAGGGTGACGAGCCATGATCCCGAGCGCCGTCATTCCCCCCATCGATGCGCCGGCCACCGACAGCCGTTGATCCGCCACCAGCCCCGCCTGGTAAAGCGCATCGCGCAGCCCGGCAAATTCGCTCAGGCTACCGTGCAAAATCTGCCAGAACTGCCCCAGTCGCACCTGTTCATCACCGGTAAAACGTGCACCGTGATCGGGCGCATCCGGCATCACTACCCGAAAACCGGCCTGCGCCAGCGCCACCGCAAAGTAGCTGTACACCAGCTTCGATGAAGTAAAGCCATGATAAAAAACCACAACAGGCAGCGGGTGTGCACTTTTTCCCGCAGGGATAGCGTGTAATATTTCATGATTGCCGAGGCGGCGAGTTTCAATTTCAATCATTGTCGACCCTGTGCTGATGAATGATTCATAATGTTGAGAACTGGATTACGCTTTCACGATATTTTCATTCGAAATATACGTACCAGATAACACTTCGGGAACATTCCCCCAAAACTAAATTCGCAGACAACTACACTATGGCTATCAGGAAACGAGATATGGTTATGCGCAGGTTTGCTACTTTATTGTTGCTGGTTCTGCTCAGTGGCTGTAGTGCACTACAGGGGACACCGCAACCCGCTCCCCCGGTCGCCGACCATCCGCAGGAGATTCGTCGTAATCAGACGGAAGGATTACAACGAATGGGTACCGTCTCCGCGCTGGTTCGCGGATCCCCGGATGACGCAGAGGAGTCAATAAAGGCGCAAGCTGCCGCCGCCAAAGCAGATTATTACGTCATCATTATGATCGATGAAACCATCATTACGGGACAGTGGTATTCACAGGCTATATTGTACCGTAAATAACGATCTTCATTTGAACGACATTTACACTGCTTTGCGTCCATAGACATTTTCCTGTCCACAATAAACTCCATGCCCGCGACAATGGAAGTGAGTTTATTTGCGAAGGAGTGCCCGGCGGATACCGGAGAAATGTGAAATGGAGCTGACAATGAAACGAACTCTTGCTTTGACCTCTCTGTTGCTCTCAGTAGGCCTTGTGAGCACGACTGCACAGTCAGCAGAATTCGCCAGTGCTGATTGCGTCACTGGCCTTAATGAAATTGGCCAGATCTCCGTCAATAACATTTCGGGGAGCCCGCAAGATGTTGAACGTGTCGTGGCACTAAAAGCCGATGAACAGGGCGCTTCCTGGTATCGAATTATTCAGATGCAGGAAGATAACCGCAGCGATCATTGGCGCGTACAGGCTATTCTCTACGTCTAATCACCTCCGATAAAAGCCACTTATTTTTAGTGGCTTTTCGTTTTTGCAAATTTTTTATCTTCTTTCAACCCCATTAAAAACAATTTGATAACATTTTGTTACATTATTTAAGTAAATGTTAGCTCTGTCCTGCCGTTGAAAAGCCATACCTTTTCAATAATGAGCAATTTATGATCACTTTTTTTCGCCGTGCGGGTCTGGGCGCAAAGCTATCGCTTCTTACCGGCGTGAGTGTCGCCACGCTGTTTCTGCTTTTCACTTTTCTGCTCAGCCATAAAGCCAGCCAGCAGCTTGAAGCCCTTGCGGTAGAAGACCTGCATAACCAGTCCACCGGTATGGTTGATATGGTACAGATGTTTAACACCAGTCTGAGCGAAGAGGTCGAGAGCTATACCCGGCTGTTCACCACCTTTTTACCGCAACCTTTGAGCGTCGACAGCAATCAACTCCAGACCATTAACGGGCTTAGCGTTCCCCTTCTGAAAGGGGGTGAAACAGGCCTGCATGAAAACAATGCACTTTCTGACGACTTCCTGACGCGTACAGGGGCAATCTCGACCCTGTTTGTGCGCAGCGGTAATGACTTTGTCCGTGTCGCCACCTCCCTGCGCAAAGAGAACGGCGATCGGGCAATGGGTACCGTTCTGGACACTGCCAGCCCGGCCTATGCGGCCGTCACTAAAGGCGAGGTCTATCGCGGTCTGGCGTTGCTGTTCGGTAAGCGCTATATCACCCAGTATCAGCCGTTCAAAAACGCAGAAGGCCAGGTCATTGGGATCATTTTTGTTGGTGTGGATATCACCCACTCCTGGAACGTGATGCGCGAAAAAATCCTCAATCGTCGTTTAGGTGACAGCGGACACTTCTTCGTGCTGGATCGCAGTAACGGTAAAACGCGCGGACAGTTCCTGTTCCACTCGAACCGGGAAGGCCAGCTTCCTGAGTGGGATAGCGCCACCCAGCAGCAGTTGCTGAGCAATACCCCGGGCACGCTGGAACGCACAAGTGAGGGTGGTCGCATACTGAAAATGGCCTACACGCCGCTGCCGGGCTGGAACTGGACCATCGTGGGGGAAGTGGATAAATCGGTCCTGCTCTCAAGCGTCACCACGATGCGGGATCGCTTCCTGCTGGCAGGCGTGGCGCTGTCAGTCCTGTTTGCCGCGCTGTTTGTCATCATCATTCGTCGCATGCTGACGCGCCCGCTGCGTAACGTCATTCACCTTGCCCGACAATATGCGGCAGGCGATCTGCGTGCCAGCCTGTCCGTTACCCGCCAGGATGAGGTTGGCCAGCTTATCGACGCCATCAACGGGATCGGCGGCGGTCTGCAAAAGATTGTCCTGCAGGTACGCGAGGCCGCAGGCGAGATCCATCTGGGCACTAACGCCCTGGCCTCTGATACCGGTGAGATCTCAGAGCAGATCAACAAACAGGCCAGTAGCGTTGAAGAGACCTCCGCGAGTATGGAACAACTCGCCGCAACCGTGCAGCAAAACGCGGCCAATATGGAACAGACGCAGCAGCTGGTGGGGGAAACCTCACGCGCGGTGCATCAGGGGGGCGAGACGGTGACGCACGCGGTCTCCACCATGGACGATATTCGCGACGCCTCAAAGCGCATCGAGGACATCACGCGTGTGATTGAGTCGATTGCTTTCCAGACTAACATTCTGGCGCTTAATGCAGCGGTTGAAGCAGCACGTGCCGGCGAACACGGAAAAGGGTTTGCTGTGGTCGCGCAGGAAGTGCGCGCCCTGGCGGCACGCAGCGCTAATGCGGTGAAAGAGATTGAGCAGCTGATTGGCGACACGCTGAACGAGGTGAGTGAAGGCCATGCGCTCTCAGAGCAGACGCGTCTGGCGATGGATGCCATCATCGCGCACATCGATAACATCAACCAACTGGTGACCGAGATTAACCATGCATCCCGCGAACAGTCGGCGGGGATCCGTCAGGTGAATCTGGCCATGACCCATATTGGTGAAGCCTCCCACATCAACGCCGATCGCGTCTCGCGCAGCGAGCAAACCGCACAATCACTGCGCGAGAAAGGGTCGCACCTCACTCAACTGGTGAGCCTGTTCCAGCTTAAAAACTAGCCTACCCTGCCCGTGGCGCGCAGCAGAAGATCGCTTTGAACCTGCTCTGTCATCAGCGTGCCGCCACGGGTATCCAGCATCATGCGGCACCACGCCTGCGCGACGGGCGGAGAGGCGTGCCGCAGCATCTGGCTTCCGGCGCCCAGCAGGAATAGCTGATGCGCAATCTCCCGCCCCTGCGCCTCCTGCGGTTTACGCAGTTTTTGCTGCAGCTGCCGCCAGCTACGATCGAAGTGTCTGTCCTGACCTTTCACCTGCGCAAAATCCTCTGCCAGCAGATCGAACATACCCGGCTGCTTCGCCAGAACGCGCAGAACATCCAGACACATGATGTTCCCTGAGCCTTCCCAGATGCTGTTCACCGGCATCTCGCGATACAGGCGGGGGAGTTCACTCTCCTCACAGTACCCGATGCCGCCCAACACCTCCATCGCCTCTGCCACGAACGGGATACCCGCTTTGCAGACGCTGAATTTCGCGGCTGGCGTGAAAAGCCGCGCCCAGGCCGCCTCCTGCGGATCGGCGCGTTTATCCCACGCCCGGGCAAGACGAAACAGCAGCGCCGTTTGCCCCTCCAGCACCAGCGCCATGCGGCTCAGGACGTCGCGCATCAGTGGCTGATCGATGAGGTTTTTCCCGAACGTCTGCCGCTGATGGGCGTGGTACAGCGCCACGGATAATGCCCGACGCATCAACCCATGGCTGCCCAGAGCGCAGTCAAAACGCGTTAACCCGCCCATGCGAAGGATCTGTCGTACCCCTTCCCCCTCTTCCCCTAACAGCCAGCCGGACGCATCCAGAAACTCCGCTTCACTGCTGGCGTTAGAGCGGTTGCCGAGCTTGTCTTTCAGCCGCTCCAGGCGCACGGCGTTGCGCTGCCCGTCAGGCAAAAAGCGAGGAACAAAAAAGCAGGACAACCCGCCTTTCGCCTGCGCCAGCACCAGATGCGCATCGCTTTGTGGCACAGAGAAAAACCACTTATGACCCACCAGCCGGTAGCTGCCGTCGCTGCACTTTTCTGCTTTGGTAGTGTTGCTGAGCACGTCCGAGCCGCCCTGTTTTTCCGTCATCCCCATGCCGATCAGCAGGCCGCGCTTTTGGGCTCCAGGGGCAAGGTGAGGATCATAACGATCGCTCAGTAGCGGCGTGAGCCAGTTCTGGAAAGGTTCAGGGAGCGCCTGTTGCAGCAGCGGCGTGGCGGCAAACGTCATGGTCACGGGGCACAGCGTTCCGGCCTCAACCTGAGCATGCAACACGAAGCGCGCGGCGCGAGCGACAAAGGAGCCTTTACGCGCCTCCTCTTCCCACGCCAGGTTATGCACCCGGTTAGCGCACAGCCCCTGCATCAGAAGATGCCAGGCGGGGTGAAAACGGACATCATCCAGCCGCTCACCGGTGGGGTCATAGCGAAGTAACTCCGGCGGATTGACGTTCGCCAGCCTGCCAAGCTCCAGCGACTCCGCCGTACCTAACTGCTGGCCTATACTGGCCAGCAACTCGCTATCCCACTCCGCGCCTTCGCGGGCCACCGCGTCACGCAGGGCACAATCAGAGAGAAAAAGGTTGCTGTTCGAAAGCGGTGCGGGTTGGTTAAAAACGGTATGTGTTTGCCAGTGCATGCTGTCTCCCTCCTTCAGTGGCAATGAAGATAAGTATGGACAGACCCTGTGCACGCTGCATGGGAGTAAGGTCACACGGTAGCGATCCCGCTGATATCATTCGTGAAATAGCTCACAGAACATAAGAATAAACTATTTCATCACTTCACTTTTATTGAATAATTTATTCTTACCCCTTTCAGTGAGGAAAGTTATGCAACCGAATGCGCATAAGCGTGCGTTAATTGCAGGCTCCATTGGTAATTTCATCGAGTGGTATGAGTTTGCGGTCTACGGCTTTCTGGCCACCGTGATTGCGAAAAACTTCTTCCAGCTTGAAGGCGAGTCGGGGCTGACCAGCCTGATCCTCACCTACGCCTCGTTTGCCATCGCTTTCTTTTTCCGCCCACTGGGGGCGGTGGTGTTTGGCCGCATCGGTGACCGTATTGGCCGCAAACCGACGCTGATTATTGTGCTGGTGCTGATGACGCTCGCCACCGCCGCCATCGGTGTTGTGCCAGTCTATGCCAGCATCGGTATCGCTGCGCCGCTGATCGTTACGTTGCTGCGCATCCTGCAGGGATTGTTCGCGGGGGGTGAGTACGGTGGTGCGGTTTCCTTGATGACGGAATTTGCACCACGCGGTAAACGTGGTCTGTATGGCGCATGGCAATCGTTCACGGTGGCGCTGGGGCTATTAGCGGGCGCGGGTATTGTCGCCTTGCTTTCTGCCTTACTGACACCGGAAGCCCTGCATGACTGGGGCTGGCGTATCCCATTTTTCCTGGCCCTGCCTATGGGGGTGGTGGCGCTCTGGTTGCGGGTGAGTATGGAAGAGACGCCGAGCTTCGTGCAGCAGCAGGAAAAACCGCTGATCGCGCAGGCCAGTACCGTCGCCACAATTAAAGCGATCGTGATGGGAATTGGCCGGGTGATGGTCTGGTCGGCGGCGGGATATACCTATCTGGTGATTATGCCGACCTATCTGCAATCGGCACTGCACACCGGATTTAACCAGGCACTGTTGATAGCCGTGATTTCGAATGTCGGCTTTGCGCTCACCATTATTCCGTCGGGGATCCTGAGCGACAGGATTGGCCGCCGCACGGTGATGATTATCGCCACCGCGTTACTGCTGATCCTCGCCCTGCCGCTGCTGAAAATTCTGCAGGCGGAGTCCAGTACCCTCGCGGTAAAAGCGATCGTGGTATTGATAGCTGGCGGTCTGGTGGGCATGCTGGCAGGTCCAGGGCCGGCAATGCTCTCAGAGATGTTCCCGACGCGTGTGCGCTATACCGGGCTTGGTCTGGCGTATTCCTTGTCGAATGCCATTTTCTCGGGGTGTGCGGGGCTTATTATTACCGGGTTGATTAAGGAGACGGGAAACCTCGACATTCCGGCATATTACGTCATGGCGACGGCGGTGGTGAGTATTGTGGCGCTGATGACGCTCAGGAAGGATGACCATTTGCGGACGTTAGAGGAGTAAGGTTGTCACCGTCTGGTGCCCTCACCCACCGGGAGAGAGATAAAACACAAAAGGCCGTCCACAGGACGGCCTTGCTGTTTATTCGCCGCGCTGACGTACCGCTTCAAACAGGCAAATACCCGTTGCAACAGAAACGTTCAGGGACGACACGCTGCCCGCCATCGGGATGCTTATCAGCTCGTCGCAGTGCTCACGCGTCAGGCGACGCATGCCTTCACCTTCCGCACCCATCACCAGCGCCAGACGGCCGGTCATTTTGCTCTGATACAGAGTATGATCCGCTTCACCGGCGGTACCGACGATCCAGATATTCTCTTCCTGCAGCATACGCATGGTGCGCGCCAGGTTGGTGACGCGGATCAGCGGAACGTTTTCTGCTGCACCGCAGGCCACTTTCTTCGCCGTTGCGTTCAGTTGTGCAGAACGATCTTTCGGTACGATCACCGCGTGCACGCCCGCCGCATCGGCGCTGCGCAGGCACGCGCCGAGGTTGTGCGGATCGGTCACGCCATCAAGGATCAGGAAGAACGGGTTATCCAGTGCAGCGATCAAATCCGGCAGATCGTTTTCCTGATACTGACGGCCTGGCTTCACGCGGGCAATAATCCCCTGGTGAACCGCGCCTTCACTTTTCTCATCCAGGAACTGACGGTTGGCCAGCTGGATCACCACGCCCTGCGCTTCCAGTGCGTGGATCAGCGGCATCAGACGTTTGTCTTCACGCCCTTTCAGAATGAACACTTCCTGAAAACGCTCCGGTGCGCGCTCCAGCAGGGCCTGCACCGCGTGGATGCCGTAAATCATTTCACTCATTGATGATTCTCATAATAGGTATTTCCCCCTCTCCCCAGTGGGGAGAGGGTCAGGGTGAGGGGAGAATTTACTCCGCCTGCTGTTTCTTCGCTGCGCGCTTCGCTTTGGTCGCAGCAGCAATTTTCTGTGTTTTCGCGGACGGCTTTTTCGCCTTGCGAGCCTCTTTCTTCGCCGCTTTTGGCTTCTGCTTTTTCTCACCGCGGAAGGCGCTGTCTGGCTCGAAGTTCACTTTCTTACCCGCCTGACGACGTTTGCCGCTCGCGTTGCCGTTACCGCCTTTTTTCGCCTTCTCACGCGCGGTTTTACCTACGTTGCGCGGCGCGCGCTCGCTGGAGATCAGACTGAAGTCAATCTTACGCTCGTCCATGTTGACGGCTTCGACTTTGACTTCCACCCGGTCGCCAAGGCGATAGGTCTGTCCACCCGACTCACCAATCAGGCGCTGTCCAACCTGGTCGAAACGATAATAGTCGTTATCGAGGCTGGAGACATGCACCAGACCGTCGATAAACAGCTCGTCAAGGCGAACAAAGAAACCAAACCCGGTCACGCTGGCGATAACACCTTTGAAAATGTTACCGACCTGATCCAGCATAAAGTCGCATTTCAGCCAGTCAGAGACTTCGCGCGTCGCTTCATCGGCACGGCGTTCGGTCATGGAACAGTGCTGGCCAAGCTGCAGCATCTCTTCCATTGAATAGTGGTACCCGCCGGTTTCAGTCGTGTTCCCTTTATGGCCCTGCTCCTGCGCCAGCAGATACTTGATCGCACGGTGCAAAGACAGGTCCGGATAACGGCGGATCGGCGACGTAAAGTGCGCGTAAGACTGCAGCGCCAGACCGAAGTGACCCCGGTTTTCCGGGTCGTAAATCGCCTGTTTCATGGAGCGCAGCAGCATGGTCTGCAGCATTTCTGCATCCGGACGATCGCCAATGGATTCCAGCAGCTCGGCATAATCGCGCGGCTCTGGCTTGTTACCGCCAGGCAGCTCCAGACCGAGTTCAGCCAGTACGGAGCGGAACGCGGTAATGGCTTCCGTCGACGGTTTATCGTGGATACGGAACAGCGCCGGCTCTTTAGCTTTCTCGACAAAACGGGCCGCCGAGATGTTCGCCAGGATCATACACTCTTCGATCAGCTTATGCGCATCGTTACGCTGGGTCTGTTCAATACGCTCAATGCGGCGTTCAGCGTTGAAAATGAACTTCGCCTCTTCGCTCTCAAACGAGATCCCACCGCGCTCTTCACGCGCCTGATCCAGCGTTTTGTAGAGGTTATGCAACTCTTCGATGTGCTTCACCAGCGGCGCATACTGTTCACGCAGGTCCTGATCGCCCTGCAGCATATGCCAGACCTTGGTATAGGTCAGACGCGCATGGGAGCTCATTACCGCTTCGTAGAATTTATAACCGGTTAAGCGACCTTTACTGGAGATGGTCATCTCGCAAACCATACACAGGCGATCAACCTGCGGGTTCAGGGAACAGAGGCCGTTGGAGAGCACTTCCGGCAGCATCGGTACCACCTGAGACGGGAAGTAAACCGACGTACCACGGCTGCGCGCTTCGTTATCGAGCGGCGTGTGGGGACGAACGTAATAGCTCACGTCGGCGATAGCAACCCACAGACGCCAGCCGCCACCGCGTTTTTTCTCGCAGTATACGGCGTCATCAAAGTCGCGGGCATCTTCTCCATCAATGGTGACCAGCGGCAAACTGCGGAGATCCACGCGTCCCGCTTTGGACTCTTCCGGAACCTCTTCCTTCAGACCCACAATCTGCTCTTCAACCGCTTTTGGCCAGATGTAGGGGATCTCATGTGTACGCAGGGCCATATCAACAGCCATGCCGGTACCCATGTTATCGCCCAGCACTTCGACGATTTTACCTACCGCTTTGGTGCGACGCGTTGGACGTTGGGTAAGCTCCACTACTACCACAAAGCCCATGCGCGCGCCCATCACCTCTTCAGGCGGGATCAGGATATCGAAGCTCAGACGGCTGTCATCCGGTACCACAAAGCCTACACCGGCATCGGTAAAGTAGCGGCCAACGATCTGGCTGGTTTTTGGCACCAGCACGCGAACCACGCGTGCTTCGCGACGGCCTTTACGATCGGCACCCAGCGGCTGCGCCAGGATCTGGTCGCCGTGAATACACATTTTCATCTGTTCGGATGAGAGGTAGAGGTCGTCTTTGCGGCCCTCTACGCGCAAAAAACCGTAACCGTCGCGGTGACCAATGACGGTCCCTTTCAACAGGTCGAGGCGTTCTGGCAGCGCATAGCACTGGCGACGGGTAAAGACCAGTTGCCCGTCGCGCTCCATGGCGCGCAGGCGGCGGCGAAGGGCTTCAGTTTGCTCTTCACCTTCAATGTTTAATTCAACAGCAAGTTCTTCACGATTGGCGGGTTTTTCGCGTTTTGTTAAGTGTTCAATGATGAACTCGCGGCTGGGGATAGGATTCGCGTATTTTTCAGCTTCGCGTTCCTGGAAAGGATCATGTGACATATCGGTTCCTCCGTTGTCAGCTCCGGTGGAAATTTTCTTCATTCCACCAGCAATAATTTATAAAGCGGTTGATTCTCTTCAACCAAATCGGCCAGCGTGTAGTTATCCAGTTCCTTAAGAAAACTTTGCACGGCCAGAGAAAGCGCCTGTTTCAGGCGACAAGCGGGTGTAATGTGGCAGAACTCACTGCTGCAGTTCACCAGAGACAAAGGCTCCAGTTCACGCACCACATCCCCAATACGAATACTTTGCGCCGGTTTACCGAGACGGATCCCACCATTTTTCCCGCGGACGGCAGCAACGTATCCGGCACGACTAAGTTGATTGATTATTTTGACCATATGATTACGGGACACGCCGTAGACCTCTGTCACTTCAGAGATACTGGTCATTTTCCCATCGGGTAACGACGCCATGTAGATTAGCGCGCGTAAGCCGTAATCGGTGAAACTTGTTAACTGCACATCAACCTCAGGAAAAAAGGGAAAACGCGGTAAAACCGCAGAGATATTCATTAATGATGATAAACCAGCCAGATAGTTAGGGGCTAATTTATTTGACAGACGGGGCGAAAAAGCGGATATCCAGGAGCGGTCGCCGGATGGCGCTACGCTTATCCGGCCTACGGGTTTCCCCGCAGGCCAGCAGTCACGAAGATTATGCGTCGAACGGGTCGCGCAGGATCATCGTTTCAGTACGATCCGGGCCGGTAGAGATAATGTCGATCGGCACTTCGGTCAGTTCTTCAATGCGCTTGATGTAATCCAGCGCTGCCTGTGGCAGGCCGCTACGCTCTTTCACACCGAAAGTGGTCTCAGACCAGCCTGGCATGGTTTCGTAGATTGGCTCGATACCTTCCCAGTCGTCAGCAGCCAGCGGAGTGGTGGTCACTTCACGGCCATCTGGCATGCGGTAGCCGACGCAGATTTTCACCTCTTTCAGCCCGTCCAGGACGTCCAGTTTGGTCAGGCAGAAGCCAGACAGGGAGTTGATCTGCACTGCACGACGCACAGCCACTGCATCCAGCCAGCCGGTACGACGACGACGACCGGTGGTCGCGCCAAACTCGTTACCCTGCTTGCACAGGAACTCGCCGGTTTCATCAAACAGTTCGGTCGGGAATGGACCCGCACCCACGCGAGTGGAGTACGCTTTGATGATGCCCAGAACGTAATCCACATAACGTGGACCCAGGCCAGAGCCGGTCGCCACGCCACCTGCGGTGGTGTTAGAGGAGGTTACGTACGGATAGGTACCGTGGTCGATATCCAGCAGCGTACCCTGCGCACCTTCGAACATGACGAAATCGCCACGCTTGCGCGCCTGGTCCAGCAGATCGGACACATCAACTACCATGCCGGTCAGAATGTCTGCAATCGCCATGACATCATCCAGCACTTTCTGGTAGTCAACAGCGTCTGCTTTGTAGAAGTTCACCAACTGGAAGTTGTGATATTCCATCACTTCTTTCAGTTTTTCAGCGAAGGTGGCTTTATCGAACAGGTCGCCCACGCGCAGACCGCGACGAGCCACTTTGTCTTCATAAGCCGGGCCGATACCACGACCGGTGGTGCCGATCGCTTTCGCGCCACGCGCTTTTTCACGCGCAACGTCCAGCGCCACGTGATAATCAAGGATCAGCGGGCAGGCTTCGGAGAGCAGCAGACGCTCGCGAACCGGGATACCACGGTCTTCCAGACCTTTCATCTCTTTCATCAGCGCAGCCGGAGACAGCACTACGCCGTTACCGATGATGCTGGTGACGTTTTCGCGGAGAATGCCTGATGGAATAAGATGGAGGACGGTTTTTTCACCGTTGATTACGAGAGTATGGCCTGCGTTGTGACCGCCCTGGT