Skip to content

Commit

Permalink
Perform filtering on-fly during DE
Browse files Browse the repository at this point in the history
  • Loading branch information
asl committed Dec 20, 2024
1 parent e8b4e55 commit 00e00e4
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 42 deletions.
1 change: 0 additions & 1 deletion src/common/paired_info/distance_estimation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,4 +177,3 @@ void DistanceEstimator::ProcessEdge(EdgeId e1, const InPairedIndex &pi, Buffer &
}

} // namespace omnigraph::de

22 changes: 17 additions & 5 deletions src/common/paired_info/distance_estimation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
#define DISTANCE_ESTIMATION_HPP_

#include "paired_info.hpp"
#include "pair_info_filters.hpp"
#include "concurrent_pair_info_buffer.hpp"

#include "assembly_graph/core/graph.hpp"
#include "utils/parallel/openmp_wrapper.h"

#include "math/xmath.h"

Expand Down Expand Up @@ -51,14 +51,18 @@ class AbstractDistanceEstimator {
typedef PairedInfoIndexT<debruijn_graph::Graph> OutPairedIndex;
typedef typename InPairedIndex::HistProxy InHistogram;
typedef typename OutPairedIndex::Histogram OutHistogram;
typedef AbstractPairInfoChecker<debruijn_graph::Graph> PairInfoChecker;


public:
AbstractDistanceEstimator(const debruijn_graph::Graph &graph,
const InPairedIndex &index,
const GraphDistanceFinder &distance_finder,
const PairInfoChecker &pair_info_checker,
size_t linkage_distance = 0)
: graph_(graph), index_(index),
distance_finder_(distance_finder), linkage_distance_(linkage_distance) { }
distance_finder_(distance_finder), pair_info_checker_(pair_info_checker),
linkage_distance_(linkage_distance) { }

virtual void Estimate(PairedInfoIndexT<debruijn_graph::Graph> &result, size_t nthreads) const = 0;

Expand All @@ -80,13 +84,19 @@ class AbstractDistanceEstimator {

template<class Buffer>
void AddToResult(const OutHistogram &clustered, EdgePair ep, Buffer &result) const {
result.AddMany(ep.first, ep.second, clustered);
OutHistogram filtered;
for (Point p : clustered)
if (pair_info_checker_.Check(ep.first, ep.second, p))
filtered.insert(p);

result.AddMany(ep.first, ep.second, filtered);
}

private:
const debruijn_graph::Graph &graph_;
const InPairedIndex &index_;
const GraphDistanceFinder &distance_finder_;
const PairInfoChecker &pair_info_checker_;
const size_t linkage_distance_;

virtual const std::string Name() const = 0;
Expand All @@ -111,10 +121,12 @@ class DistanceEstimator : public AbstractDistanceEstimator {
DistanceEstimator(const debruijn_graph::Graph &graph,
const InPairedIndex &index,
const GraphDistanceFinder &distance_finder,
const PairInfoChecker &checker,
size_t linkage_distance, size_t max_distance)
: base(graph, index, distance_finder, linkage_distance), max_distance_(max_distance) { }
: base(graph, index, distance_finder, checker, linkage_distance),
max_distance_(max_distance) { }

virtual ~DistanceEstimator() { }
virtual ~DistanceEstimator() = default;

void Init() const {
INFO("Using " << this->Name() << " distance estimator");
Expand Down
16 changes: 5 additions & 11 deletions src/common/paired_info/distance_estimation_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,9 @@ using namespace debruijn_graph;
using namespace omnigraph::de;

void EstimateWithEstimator(PairedInfoIndexT<Graph> &clustered_index,
const AbstractDistanceEstimator &estimator,
AbstractPairInfoChecker<Graph> &checker) {
const AbstractDistanceEstimator &estimator) {
INFO("Estimating distances");

estimator.Estimate(clustered_index, omp_get_max_threads());

INFO("Filtering info");
PairInfoFilter<Graph>(checker).Filter(clustered_index);
INFO("Info Filtered");
}

// Postprocessing, checking that clusters do not intersect
Expand Down Expand Up @@ -106,15 +100,15 @@ void EstimateScaffoldingDistances(PairedInfoIndexT<Graph> &scaffolding_index,
PairInfoWeightChecker<Graph> checker(graph, 0.);
DEBUG("Weight Filter Done");

SmoothingDistanceEstimator estimator(graph, paired_index, dist_finder,
SmoothingDistanceEstimator estimator(graph, paired_index, dist_finder, checker,
[&] (int i) {return wrapper.CountWeight(i);},
linkage_distance, max_distance,
ade.threshold, ade.range_coeff,
ade.delta_coeff, ade.cutoff,
ade.min_peak_points,
ade.percentage,
ade.derivative_threshold);
EstimateWithEstimator(scaffolding_index, estimator, checker);
EstimateWithEstimator(scaffolding_index, estimator);
}

void EstimatePairedDistances(PairedInfoIndexT<Graph> &clustered_index,
Expand All @@ -130,9 +124,9 @@ void EstimatePairedDistances(PairedInfoIndexT<Graph> &clustered_index,

PairInfoWeightChecker<Graph> checker(graph, de_config.clustered_filter_threshold);

DistanceEstimator estimator(graph, paired_index, dist_finder, linkage_distance, max_distance);
DistanceEstimator estimator(graph, paired_index, dist_finder, checker, linkage_distance, max_distance);

EstimateWithEstimator(clustered_index, estimator, checker);
EstimateWithEstimator(clustered_index, estimator);

INFO("Refining clustered pair information "); // this procedure checks, whether index
RefinePairedInfo(clustered_index, graph); // contains intersecting paired info clusters,
Expand Down
44 changes: 22 additions & 22 deletions src/common/paired_info/pair_info_filters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@
#define PAIR_INFO_FILTERS_HPP_

#include "paired_info_helpers.hpp"
#include "sequence/sequence.hpp"

namespace omnigraph {

namespace de {

template<class Graph>
class AbstractPairInfoChecker{
class AbstractPairInfoChecker {
private:
typedef typename Graph::VertexId VertexId;
typedef typename Graph::EdgeId EdgeId;
Expand All @@ -28,52 +29,51 @@ class AbstractPairInfoChecker{
public:
AbstractPairInfoChecker(const Graph &graph) : graph_(graph) { }

virtual bool Check(const PairInfoT&) {
virtual bool Check(EdgeId, EdgeId, Point) const {
return true;
}

virtual bool Check(EdgeId, EdgeId) {
virtual bool Check(EdgeId, EdgeId) const {
return true;
}

virtual ~AbstractPairInfoChecker() { }
virtual ~AbstractPairInfoChecker() = default;
};

template<class Graph>
class PairInfoWeightChecker : public AbstractPairInfoChecker<Graph>{
private:
typedef typename Graph::EdgeId EdgeId;
typedef PairInfo<EdgeId> PairInfoT;
double weight_threshold_;
DEWeight weight_threshold_;

public:
PairInfoWeightChecker(const Graph& graph, double weight_threshold) :
AbstractPairInfoChecker<Graph>(graph), weight_threshold_(weight_threshold) {
}

bool Check(const PairInfoT& info) {
return math::ge(info.weight(), weight_threshold_);
bool Check(EdgeId, EdgeId, Point p) const override {
return math::ge(p.weight, weight_threshold_);
}
};

template<class Graph>
class PairInfoWeightCheckerWithCoverage: public AbstractPairInfoChecker<Graph> {
private:
private:
typedef typename Graph::EdgeId EdgeId;
typedef PairInfo<EdgeId> PairInfoT;
double weight_threshold_;
DEWeight weight_threshold_;

public:
PairInfoWeightCheckerWithCoverage(const Graph& graph, double weight_threshold) :
AbstractPairInfoChecker<Graph>(graph), weight_threshold_(weight_threshold){
}
public:
PairInfoWeightCheckerWithCoverage(const Graph& graph, double weight_threshold)
: AbstractPairInfoChecker<Graph>(graph), weight_threshold_(weight_threshold) { }

bool Check(const PairInfoT& info) {
double info_weight = info.weight();
bool Check(EdgeId e1, EdgeId e2, Point p) const override {
double info_weight = p.weight;
return math::ge(info_weight, weight_threshold_)
|| (math::ge(info_weight, 0.1 * this->graph_.coverage(info.first)))
|| (math::ge(info_weight, 0.1 * this->graph_.coverage(info.second)));
}
|| (math::ge(info_weight, 0.1 * this->graph_.coverage(e1)))
|| (math::ge(info_weight, 0.1 * this->graph_.coverage(e2)));
}
};

template <class Graph>
Expand Down Expand Up @@ -148,7 +148,6 @@ class AmbiguousPairInfoChecker : public AbstractPairInfoChecker<Graph> {
}

bool InnerCheck(const PairInfoT& info){

EdgeId edge1 = info.first;
EdgeId edge2 = info.second;

Expand Down Expand Up @@ -182,14 +181,15 @@ class AmbiguousPairInfoChecker : public AbstractPairInfoChecker<Graph> {
relative_length_threshold_(relative_length_threshold),
relative_seq_threshold_(relative_seq_threshold) { }

bool Check(const PairInfoT& info) {
TRACE(this->graph_.int_id(info.first) << " " << this->graph_.int_id(info.second));
bool Check(EdgeId e1, EdgeId e2, Point p) const override {
PairInfoT info(e1, e2, p);
TRACE(this->graph_.int_id(e1) << " " << this->graph_.int_id(e2));
if (EdgesAreFromSimpleBulgeWithAmbPI(info)){
TRACE("Forward directed edges form a simple bulge");
return InnerCheck(info);
}

if (EdgesAreFromSimpleBulgeWithAmbPI(BackwardInfo(info))){
if (EdgesAreFromSimpleBulgeWithAmbPI(BackwardInfo(info))) {
TRACE("Backward directed edges form a simple bulge");
return InnerCheck(BackwardInfo(info));
}
Expand Down
3 changes: 2 additions & 1 deletion src/common/paired_info/smoothing_distance_estimation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,15 @@ class SmoothingDistanceEstimator : public WeightedDistanceEstimator {
SmoothingDistanceEstimator(const debruijn_graph::Graph &graph,
const InPairedIndex &histogram,
const GraphDistanceFinder &dist_finder,
const PairInfoChecker &checker,
std::function<double(int)> weight_f,
size_t linkage_distance, size_t max_distance, size_t threshold,
double range_coeff, double delta_coeff,
size_t cutoff,
size_t min_peak_points,
double percentage,
double derivative_threshold) :
base(graph, histogram, dist_finder, weight_f, linkage_distance, max_distance),
base(graph, histogram, dist_finder, checker, weight_f, linkage_distance, max_distance),
threshold_(threshold),
range_coeff_(range_coeff),
delta_coeff_(delta_coeff),
Expand Down
6 changes: 4 additions & 2 deletions src/common/paired_info/weighted_distance_estimation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,13 @@ class WeightedDistanceEstimator : public DistanceEstimator {
WeightedDistanceEstimator(const debruijn_graph::Graph &graph,
const InPairedIndex &histogram,
const GraphDistanceFinder &distance_finder,
const PairInfoChecker &checker,
std::function<double(int)> weight_f,
size_t linkage_distance, size_t max_distance) :
base(graph, histogram, distance_finder, linkage_distance, max_distance), weight_f_(weight_f) { }
base(graph, histogram, distance_finder, checker, linkage_distance, max_distance),
weight_f_(weight_f) { }

virtual ~WeightedDistanceEstimator() { }
virtual ~WeightedDistanceEstimator() = default;

protected:

Expand Down

0 comments on commit 00e00e4

Please sign in to comment.