From 987e5bb3f70cae3910eafe5863d26e44051a43a1 Mon Sep 17 00:00:00 2001
From: 'qubsq01' <'martin.raden@uni-tuebingen.de'>
Date: Tue, 15 Feb 2022 09:36:12 +0100
Subject: [PATCH] v3.3.0
---
ChangeLog | 28 +++++++++++++++++++++++
README.md | 15 ++++++++++++
src/bin/CommandLineParsing.cpp | 42 ++++++++++++++++++++++++++++++++--
src/bin/CommandLineParsing.h | 19 +++++++++++++++
src/bin/IntaRNA.cpp | 4 +++-
5 files changed, 105 insertions(+), 3 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index bc61f4a..d8b5201 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -10,6 +10,34 @@
# changes in development version since last release
################################################################################
+################################################################################
+################################################################################
+
+
+################################################################################
+### version 3.3.0
+################################################################################
+
+# IntaRNA
+- `--outPairwise` enables pairwise sequence processing instead of all-vs-all
+- input range for `--q|tIdxPos0` is now +-2,000,000,000 (was +-9,999,999) to
+ enable genomic position settings
+
+################################################################################
+################################################################################
+
+
+220215 Martin Raden
+ * bin/CommandLineParsing :
+ + outPairwise : switch to trigger pairwise vs. all-vs-all sequence processing
+ + getQueryNumberForTarget() : target-specific query sequence number
+ + getQueryIndexForTarget() : index getter for target-specific query sequences
+ * bin/IntaRNA :
+ * query iteration now based on target-specific sequence number to enable
+ pairwise sequence processing
+220214 Martin Raden
+ * bin/CommandLineParsing :
+ * idxpos0 range now +-2,000,000,000 (was +-9,999,999) for genomic positions
################################################################################
### version 3.2.2
diff --git a/README.md b/README.md
index 66abf95..87e9c59 100644
--- a/README.md
+++ b/README.md
@@ -106,6 +106,7 @@ The following topics are covered by this documentation:
- [SHAPE reactivity data to enhance accessibility computation](#shape)
- [Output Setup](#outputSetup)
- [Output modes](#outmodes)
+ - [Pairwise vs. all-vs-all](#outpairwise)
- [Sequence indexing](#idxPos0)
- [Suboptimal RNA-RNA interaction prediction and output restrictions](#subopts)
- [Energy parameters and temperature](#energy)
@@ -1500,6 +1501,20 @@ IntaRNA --model=P --mode=M --outMode=E ...
+
+
+
+## Pairwise vs. all-vs-all
+
+When multiple query and target sequences are provided, IntaRNA predicts interactions for
+all pairs of query-target combinations, i.e. **all-vs-all**.
+
+Alternatively, you can enforce **pairwise** processing using `--outPairwise`.
+When given, sequences are combined based on their input order, i.e. the 5th target is
+(only) considered for interaction prediction with the 5th query sequence. Thus, you
+have to provide the same number of query and target sequences.
+
+
diff --git a/src/bin/CommandLineParsing.cpp b/src/bin/CommandLineParsing.cpp
index e73cfee..4022fac 100644
--- a/src/bin/CommandLineParsing.cpp
+++ b/src/bin/CommandLineParsing.cpp
@@ -106,7 +106,7 @@ CommandLineParsing::CommandLineParsing( const Personality personality )
queryArg(""),
query(),
- qIdxPos0("qIdxPos0",-9999999,9999999,1),
+ qIdxPos0("qIdxPos0",-2000000000,2000000000,1), // 4 byte range of long = -2,147,483,648 bis 2,147,483,647
qSet(),
qSetString(""),
qAcc("qAcc", "NCPE", 'C'),
@@ -125,7 +125,7 @@ CommandLineParsing::CommandLineParsing( const Personality personality )
targetArg(""),
target(),
- tIdxPos0("tIdxPos0",-9999999,9999999,1),
+ tIdxPos0("tIdxPos0",-2000000000,2000000000,1),
tSet(),
tSetString(""),
tAcc("tAcc","NCPE", 'C'),
@@ -205,6 +205,7 @@ CommandLineParsing::CommandLineParsing( const Personality personality )
outSep(";"),
outCsvCols(outCsvCols_default),
outPerRegion(false),
+ outPairwise(false),
outSpotProbSpots(""),
outNeedsZall(false),
outNeedsBPs(true),
@@ -992,6 +993,12 @@ CommandLineParsing::CommandLineParsing( const Personality personality )
->implicit_value(true)
, "output : if given (or true), best interactions are reported independently"
" for all region combinations; otherwise only the best for each query-target combination")
+ ("outPairwise"
+ , value(&outPairwise)
+ ->default_value(outPairwise)
+ ->implicit_value(true)
+ , "output : if given (or true), interactions are only computed for each corresponding query-target pair (same index) "
+ "instead of all-vs-all")
("verbose,v", "verbose output") // handled via easylogging++
("default-log-file", value(&(logFileName)), "file to be used for log output (INFO, WARNING, VERBOSE, DEBUG)")
;
@@ -1190,6 +1197,11 @@ parse(int argc, char** argv)
parseSequences("query",qId,queryArg,query,qSet,qIdxPos0.val);
parseSequences("target",tId,targetArg,target,tSet,tIdxPos0.val);
+ // check if same number if pairwise mode
+ if (outPairwise && query.size() != target.size()) {
+ throw error("--outPairwise requires same number of query and target sequences");
+ }
+
// validate accessibility input from file (requires parsed sequences)
validate_qAccFile( qAccFile );
validate_tAccFile( tAccFile );
@@ -1804,6 +1816,32 @@ getQuerySequences() const
////////////////////////////////////////////////////////////////////////////
+const size_t
+CommandLineParsing::
+getQueryIndexForTarget( const size_t i, const size_t targetIndex ) const
+{
+ checkIfParsed();
+#if INTARNA_IN_DEBUG_MODE
+ if (i>=query.size())
+ throw std::runtime_error("CommandLineParsing::getQueryIndexForTarget("+toString(i)+") out of bounds");
+#endif
+ // depends on pairwise or all-vs-all prediction
+ return outPairwise ? targetIndex : i;
+}
+
+////////////////////////////////////////////////////////////////////////////
+
+const size_t
+CommandLineParsing::
+getQueryNumberForTarget( const size_t targetIndex ) const
+{
+ checkIfParsed();
+ // depends on pairwise or all-vs-all prediction
+ return outPairwise ? 1 : query.size();
+}
+
+////////////////////////////////////////////////////////////////////////////
+
const CommandLineParsing::RnaSequenceVec &
CommandLineParsing::
getTargetSequences() const
diff --git a/src/bin/CommandLineParsing.h b/src/bin/CommandLineParsing.h
index 1c48f13..2c182db 100644
--- a/src/bin/CommandLineParsing.h
+++ b/src/bin/CommandLineParsing.h
@@ -161,6 +161,22 @@ class CommandLineParsing {
*/
const RnaSequenceVec& getQuerySequences() const;
+ /**
+ * Provides the number of query sequences to be considered for a target sequence.
+ * @param targetIndex index of the target sequence wrt. getTargetSequences().
+ * @return the number of query sequences for this target
+ */
+ const size_t getQueryNumberForTarget( const size_t targetIndex ) const;
+
+ /**
+ * Provides the index (wrt. getQuerySequences()) of the i-th (wrt. getQueryNumberForTarget())
+ * query sequence that is to be considered for a given target sequence.
+ * @param i the i-th target-specific query sequence (wrt. getQueryNumberForTarget())
+ * @param targetIndex index of the target sequence wrt. getTargetSequences().
+ * @return the index of i-th query sequence (wrt. getQuerySequences()) for this target
+ */
+ const size_t getQueryIndexForTarget( const size_t i, const size_t targetIndex ) const;
+
/**
* Parses the target parameter and returns all parsed sequences.
* @return the set of parsed target sequences
@@ -718,6 +734,9 @@ class CommandLineParsing {
//! for all region combinations or only the best for each query-target
//! combination
bool outPerRegion;
+ //! whether or not each query-target combinations should be considered pairwise
+ //! instead of all-vs-all
+ bool outPairwise;
//! for SpotProb output : spots to be tracked
std::string outSpotProbSpots;
//! whether or not Zall is needed for output generation
diff --git a/src/bin/IntaRNA.cpp b/src/bin/IntaRNA.cpp
index 438e55e..aaf92f7 100644
--- a/src/bin/IntaRNA.cpp
+++ b/src/bin/IntaRNA.cpp
@@ -191,8 +191,10 @@ int main(int argc, char **argv){
// this parallelization should only be enabled if the outer target-loop is not parallelized
# pragma omp parallel for schedule(dynamic) num_threads( parameters.getThreads() ) shared(queryAcc,reportedInteractions,exceptionPtrDuringOmp,exceptionInfoDuringOmp,targetAcc,targetNumber) if(parallelizeQueryLoop)
#endif
- for ( size_t queryNumber = 0; queryNumber < parameters.getQuerySequences().size(); ++queryNumber )
+ for ( size_t queryIdx = 0; queryIdx < parameters.getQueryNumberForTarget(targetNumber); ++queryIdx )
{
+ // get index of this query wrt. getQuerySequence() and queryAcc()
+ const size_t queryNumber = parameters.getQueryIndexForTarget(queryIdx, targetNumber);
#if INTARNA_MULITHREADING
#pragma omp flush (threadAborted)
// explicit try-catch-block due to missing OMP exception forwarding