From 987e5bb3f70cae3910eafe5863d26e44051a43a1 Mon Sep 17 00:00:00 2001 From: 'qubsq01' <'martin.raden@uni-tuebingen.de'> Date: Tue, 15 Feb 2022 09:36:12 +0100 Subject: [PATCH] v3.3.0 --- ChangeLog | 28 +++++++++++++++++++++++ README.md | 15 ++++++++++++ src/bin/CommandLineParsing.cpp | 42 ++++++++++++++++++++++++++++++++-- src/bin/CommandLineParsing.h | 19 +++++++++++++++ src/bin/IntaRNA.cpp | 4 +++- 5 files changed, 105 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index bc61f4a..d8b5201 100644 --- a/ChangeLog +++ b/ChangeLog @@ -10,6 +10,34 @@ # changes in development version since last release ################################################################################ +################################################################################ +################################################################################ + + +################################################################################ +### version 3.3.0 +################################################################################ + +# IntaRNA +- `--outPairwise` enables pairwise sequence processing instead of all-vs-all +- input range for `--q|tIdxPos0` is now +-2,000,000,000 (was +-9,999,999) to + enable genomic position settings + +################################################################################ +################################################################################ + + +220215 Martin Raden + * bin/CommandLineParsing : + + outPairwise : switch to trigger pairwise vs. all-vs-all sequence processing + + getQueryNumberForTarget() : target-specific query sequence number + + getQueryIndexForTarget() : index getter for target-specific query sequences + * bin/IntaRNA : + * query iteration now based on target-specific sequence number to enable + pairwise sequence processing +220214 Martin Raden + * bin/CommandLineParsing : + * idxpos0 range now +-2,000,000,000 (was +-9,999,999) for genomic positions ################################################################################ ### version 3.2.2 diff --git a/README.md b/README.md index 66abf95..87e9c59 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,7 @@ The following topics are covered by this documentation: - [SHAPE reactivity data to enhance accessibility computation](#shape) - [Output Setup](#outputSetup) - [Output modes](#outmodes) + - [Pairwise vs. all-vs-all](#outpairwise) - [Sequence indexing](#idxPos0) - [Suboptimal RNA-RNA interaction prediction and output restrictions](#subopts) - [Energy parameters and temperature](#energy) @@ -1500,6 +1501,20 @@ IntaRNA --model=P --mode=M --outMode=E ... +

+ + +## Pairwise vs. all-vs-all + +When multiple query and target sequences are provided, IntaRNA predicts interactions for +all pairs of query-target combinations, i.e. **all-vs-all**. + +Alternatively, you can enforce **pairwise** processing using `--outPairwise`. +When given, sequences are combined based on their input order, i.e. the 5th target is +(only) considered for interaction prediction with the 5th query sequence. Thus, you +have to provide the same number of query and target sequences. + +

diff --git a/src/bin/CommandLineParsing.cpp b/src/bin/CommandLineParsing.cpp index e73cfee..4022fac 100644 --- a/src/bin/CommandLineParsing.cpp +++ b/src/bin/CommandLineParsing.cpp @@ -106,7 +106,7 @@ CommandLineParsing::CommandLineParsing( const Personality personality ) queryArg(""), query(), - qIdxPos0("qIdxPos0",-9999999,9999999,1), + qIdxPos0("qIdxPos0",-2000000000,2000000000,1), // 4 byte range of long = -2,147,483,648 bis 2,147,483,647 qSet(), qSetString(""), qAcc("qAcc", "NCPE", 'C'), @@ -125,7 +125,7 @@ CommandLineParsing::CommandLineParsing( const Personality personality ) targetArg(""), target(), - tIdxPos0("tIdxPos0",-9999999,9999999,1), + tIdxPos0("tIdxPos0",-2000000000,2000000000,1), tSet(), tSetString(""), tAcc("tAcc","NCPE", 'C'), @@ -205,6 +205,7 @@ CommandLineParsing::CommandLineParsing( const Personality personality ) outSep(";"), outCsvCols(outCsvCols_default), outPerRegion(false), + outPairwise(false), outSpotProbSpots(""), outNeedsZall(false), outNeedsBPs(true), @@ -992,6 +993,12 @@ CommandLineParsing::CommandLineParsing( const Personality personality ) ->implicit_value(true) , "output : if given (or true), best interactions are reported independently" " for all region combinations; otherwise only the best for each query-target combination") + ("outPairwise" + , value(&outPairwise) + ->default_value(outPairwise) + ->implicit_value(true) + , "output : if given (or true), interactions are only computed for each corresponding query-target pair (same index) " + "instead of all-vs-all") ("verbose,v", "verbose output") // handled via easylogging++ ("default-log-file", value(&(logFileName)), "file to be used for log output (INFO, WARNING, VERBOSE, DEBUG)") ; @@ -1190,6 +1197,11 @@ parse(int argc, char** argv) parseSequences("query",qId,queryArg,query,qSet,qIdxPos0.val); parseSequences("target",tId,targetArg,target,tSet,tIdxPos0.val); + // check if same number if pairwise mode + if (outPairwise && query.size() != target.size()) { + throw error("--outPairwise requires same number of query and target sequences"); + } + // validate accessibility input from file (requires parsed sequences) validate_qAccFile( qAccFile ); validate_tAccFile( tAccFile ); @@ -1804,6 +1816,32 @@ getQuerySequences() const //////////////////////////////////////////////////////////////////////////// +const size_t +CommandLineParsing:: +getQueryIndexForTarget( const size_t i, const size_t targetIndex ) const +{ + checkIfParsed(); +#if INTARNA_IN_DEBUG_MODE + if (i>=query.size()) + throw std::runtime_error("CommandLineParsing::getQueryIndexForTarget("+toString(i)+") out of bounds"); +#endif + // depends on pairwise or all-vs-all prediction + return outPairwise ? targetIndex : i; +} + +//////////////////////////////////////////////////////////////////////////// + +const size_t +CommandLineParsing:: +getQueryNumberForTarget( const size_t targetIndex ) const +{ + checkIfParsed(); + // depends on pairwise or all-vs-all prediction + return outPairwise ? 1 : query.size(); +} + +//////////////////////////////////////////////////////////////////////////// + const CommandLineParsing::RnaSequenceVec & CommandLineParsing:: getTargetSequences() const diff --git a/src/bin/CommandLineParsing.h b/src/bin/CommandLineParsing.h index 1c48f13..2c182db 100644 --- a/src/bin/CommandLineParsing.h +++ b/src/bin/CommandLineParsing.h @@ -161,6 +161,22 @@ class CommandLineParsing { */ const RnaSequenceVec& getQuerySequences() const; + /** + * Provides the number of query sequences to be considered for a target sequence. + * @param targetIndex index of the target sequence wrt. getTargetSequences(). + * @return the number of query sequences for this target + */ + const size_t getQueryNumberForTarget( const size_t targetIndex ) const; + + /** + * Provides the index (wrt. getQuerySequences()) of the i-th (wrt. getQueryNumberForTarget()) + * query sequence that is to be considered for a given target sequence. + * @param i the i-th target-specific query sequence (wrt. getQueryNumberForTarget()) + * @param targetIndex index of the target sequence wrt. getTargetSequences(). + * @return the index of i-th query sequence (wrt. getQuerySequences()) for this target + */ + const size_t getQueryIndexForTarget( const size_t i, const size_t targetIndex ) const; + /** * Parses the target parameter and returns all parsed sequences. * @return the set of parsed target sequences @@ -718,6 +734,9 @@ class CommandLineParsing { //! for all region combinations or only the best for each query-target //! combination bool outPerRegion; + //! whether or not each query-target combinations should be considered pairwise + //! instead of all-vs-all + bool outPairwise; //! for SpotProb output : spots to be tracked std::string outSpotProbSpots; //! whether or not Zall is needed for output generation diff --git a/src/bin/IntaRNA.cpp b/src/bin/IntaRNA.cpp index 438e55e..aaf92f7 100644 --- a/src/bin/IntaRNA.cpp +++ b/src/bin/IntaRNA.cpp @@ -191,8 +191,10 @@ int main(int argc, char **argv){ // this parallelization should only be enabled if the outer target-loop is not parallelized # pragma omp parallel for schedule(dynamic) num_threads( parameters.getThreads() ) shared(queryAcc,reportedInteractions,exceptionPtrDuringOmp,exceptionInfoDuringOmp,targetAcc,targetNumber) if(parallelizeQueryLoop) #endif - for ( size_t queryNumber = 0; queryNumber < parameters.getQuerySequences().size(); ++queryNumber ) + for ( size_t queryIdx = 0; queryIdx < parameters.getQueryNumberForTarget(targetNumber); ++queryIdx ) { + // get index of this query wrt. getQuerySequence() and queryAcc() + const size_t queryNumber = parameters.getQueryIndexForTarget(queryIdx, targetNumber); #if INTARNA_MULITHREADING #pragma omp flush (threadAborted) // explicit try-catch-block due to missing OMP exception forwarding