Refactor ext args as pipeline params

Signed-off-by: Ben Sherman <[email protected]>
nf-core · Oct 16, 2023 · 143af87 · 143af87
1 parent 0e34e31
commit 143af87
Show file tree

Hide file tree

Showing 8 changed files with 90 additions and 25 deletions.
diff --git a/conf/modules.config b/conf/modules.config
@@ -10,16 +10,74 @@
 ----------------------------------------------------------------------------------------
 */
 
-def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : []
-
-//
-// General configuration options
-//
-
 process {
     publishDir = [
         path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
         mode: params.publish_dir_mode,
         saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
     ]
 }
+
+params {
+
+    //
+    // Genome preparation options
+    //
+
+    prepare_genome_untar_args = '--no-same-owner'
+    prepare_genome_gffread_args = '--keep-exon-attrs -F -T'
+    prepare_genome_rsem_args = '--star'
+    prepare_genome_bbsplit_args = 'build=1'
+
+    //
+    // Read subsampling and strand inferring options
+    //
+
+    subsample_fq_args = '--record-count 1000000 --seed 1'
+    subsample_salmon_args = '--skipQuant'
+
+    //
+    // Read QC and trimming options
+    //
+
+    fastqc_args = '--quiet'
+
+    //
+    // Contaminant removal options
+    //
+
+    bbsplit_args = 'build=1 ambiguous2=all maxindel=150000'
+    sortmerna_args = '--num_alignments 1 -v'
+
+    //
+    // General alignment options
+    //
+
+    picard_args = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp'
+    bedtools_args = '-split -du'
+
+    //
+    // STAR Salmon alignment options
+    //
+
+    samtools_sort_args = '-n'
+
+    //
+    // STAR RSEM alignment options
+    //
+
+    //
+    // HISAT2 alignment options
+    //
+
+    //
+    // Post-alignment QC options
+    //
+
+    preseq_lcextrap_args = '-verbose -bam -seed 1 -seg_len 100000000'
+
+    //
+    // Salmon pseudo-alignment options
+    //
+
+}
diff --git a/lib/Utils.groovy b/lib/Utils.groovy
@@ -44,4 +44,11 @@ class Utils {
                 "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
         }
     }
+
+    public static Map publishDir(def params, String subPath, Map opts=[:]) {
+        final defaults = [
+            mode: params.publish_dir_mode
+        ]
+        return defaults + [path: "${params.outdir}/${subPath}"] + opts
+    }
 }
diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
@@ -99,7 +99,7 @@ workflow PREPARE_GENOME {
             ch_gff = Channel.value(file(gff))
         }
 
-        GFFREAD.config.ext.args   = '--keep-exon-attrs -F -T'
+        GFFREAD.config.ext.args   = params.prepare_genome_gffread_args
         GFFREAD.config.publishDir = genome_publish_dir
         ch_gtf      = GFFREAD ( ch_gff ).gtf
         ch_versions = ch_versions.mix(GFFREAD.out.versions)
@@ -184,7 +184,7 @@ workflow PREPARE_GENOME {
     if ('bbsplit' in prepare_tool_indices) {
         if (bbsplit_index) {
             if (bbsplit_index.endsWith('.tar.gz')) {
-                UNTAR_BBSPLIT_INDEX.config.ext.args2  = '--no-same-owner'
+                UNTAR_BBSPLIT_INDEX.config.ext.args2  = params.prepare_genome_untar_args
                 UNTAR_BBSPLIT_INDEX.config.publishDir = genome_index_publish_dir
                 ch_bbsplit_index = UNTAR_BBSPLIT_INDEX ( [ [:], bbsplit_index ] ).untar.map { it[1] }
                 ch_versions      = ch_versions.mix(UNTAR_BBSPLIT_INDEX.out.versions)
@@ -201,7 +201,7 @@ workflow PREPARE_GENOME {
                 .collect { [ it ] } // Collect entries as a list to pass as "tuple val(short_names), path(path_to_fasta)" to module
                 .set { ch_bbsplit_fasta_list }
 
-            BBMAP_BBSPLIT.config.ext.args   = 'build=1'
+            BBMAP_BBSPLIT.config.ext.args   = params.prepare_genome_bbsplit_args
             BBMAP_BBSPLIT.config.publishDir = genome_index_publish_dir
             ch_bbsplit_index = BBMAP_BBSPLIT ( [ [:], [] ], [], ch_fasta, ch_bbsplit_fasta_list, true ).index
             ch_versions      = ch_versions.mix(BBMAP_BBSPLIT.out.versions)
@@ -215,7 +215,7 @@ workflow PREPARE_GENOME {
     if ('star_salmon' in prepare_tool_indices) {
         if (star_index) {
             if (star_index.endsWith('.tar.gz')) {
-                UNTAR_STAR_INDEX.config.ext.args2  = '--no-same-owner'
+                UNTAR_STAR_INDEX.config.ext.args2  = params.prepare_genome_untar_args
                 UNTAR_STAR_INDEX.config.publishDir = genome_index_publish_dir
                 ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map { it[1] }
                 ch_versions   = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
@@ -242,15 +242,15 @@ workflow PREPARE_GENOME {
     if ('star_rsem' in prepare_tool_indices) {
         if (rsem_index) {
             if (rsem_index.endsWith('.tar.gz')) {
-                UNTAR_RSEM_INDEX.config.ext.args2  = '--no-same-owner'
+                UNTAR_RSEM_INDEX.config.ext.args2  = params.prepare_genome_untar_args
                 UNTAR_RSEM_INDEX.config.publishDir = genome_index_publish_dir
                 ch_rsem_index = UNTAR_RSEM_INDEX ( [ [:], rsem_index ] ).untar.map { it[1] }
                 ch_versions   = ch_versions.mix(UNTAR_RSEM_INDEX.out.versions)
             } else {
                 ch_rsem_index = Channel.value(file(rsem_index))
             }
         } else {
-            RSEM_PREPAREREFERENCE_GENOME.config.ext.args   = '--star'
+            RSEM_PREPAREREFERENCE_GENOME.config.ext.args   = params.prepare_genome_rsem_args
             RSEM_PREPAREREFERENCE_GENOME.config.publishDir = genome_index_publish_dir
             ch_rsem_index = RSEM_PREPAREREFERENCE_GENOME ( ch_fasta, ch_gtf ).index
             ch_versions   = ch_versions.mix(RSEM_PREPAREREFERENCE_GENOME.out.versions)
@@ -272,7 +272,7 @@ workflow PREPARE_GENOME {
         }
         if (hisat2_index) {
             if (hisat2_index.endsWith('.tar.gz')) {
-                UNTAR_HISAT2_INDEX.config.ext.args2  = '--no-same-owner'
+                UNTAR_HISAT2_INDEX.config.ext.args2  = params.prepare_genome_untar_args
                 UNTAR_HISAT2_INDEX.config.publishDir = genome_index_publish_dir
                 ch_hisat2_index = UNTAR_HISAT2_INDEX ( [ [:], hisat2_index ] ).untar.map { it[1] }
                 ch_versions     = ch_versions.mix(UNTAR_HISAT2_INDEX.out.versions)
@@ -292,7 +292,7 @@ workflow PREPARE_GENOME {
     ch_salmon_index = Channel.empty()
     if (salmon_index) {
         if (salmon_index.endsWith('.tar.gz')) {
-            UNTAR_SALMON_INDEX.config.ext.args2  = '--no-same-owner'
+            UNTAR_SALMON_INDEX.config.ext.args2  = params.prepare_genome_untar_args
             UNTAR_SALMON_INDEX.config.publishDir = genome_index_publish_dir
             ch_salmon_index = UNTAR_SALMON_INDEX ( [ [:], salmon_index ] ).untar.map { it[1] }
             ch_versions     = ch_versions.mix(UNTAR_SALMON_INDEX.out.versions)

diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf
diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf b/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf
diff --git a/workflows/rnaseq.nf b/workflows/rnaseq.nf
@@ -350,7 +350,7 @@ workflow RNASEQ {
     // MODULE: Remove genome contaminant reads
     //
     if (!params.skip_bbsplit) {
-        BBMAP_BBSPLIT.config.ext.args   = 'build=1 ambiguous2=all maxindel=150000'
+        BBMAP_BBSPLIT.config.ext.args   = params.bbsplit_args
         BBMAP_BBSPLIT.config.publishDir = [
             [
                 path: "${params.outdir}/bbsplit",
@@ -383,7 +383,7 @@ workflow RNASEQ {
     if (params.remove_ribo_rna) {
         ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect()
 
-        SORTMERNA.config.ext.args   = '--num_alignments 1 -v'
+        SORTMERNA.config.ext.args   = params.sortmerna_args
         SORTMERNA.config.publishDir = [
             [
                 path: "${params.outdir}/sortmerna",
@@ -548,7 +548,7 @@ workflow RNASEQ {
             )
 
             // Name sort BAM before passing to Salmon
-            SAMTOOLS_SORT.config.ext.args   = '-n'
+            SAMTOOLS_SORT.config.ext.args   = params.samtools_sort_args
             SAMTOOLS_SORT.config.ext.prefix = { "${meta.id}.umi_dedup.transcriptome" }
             SAMTOOLS_SORT.config.publishDir = [
                 path: "${params.outdir}/${params.aligner}",
@@ -780,7 +780,7 @@ workflow RNASEQ {
     //
     ch_preseq_multiqc = Channel.empty()
     if (!params.skip_alignment && !params.skip_qc && !params.skip_preseq) {
-        PRESEQ_LCEXTRAP.config.ext.args   = '-verbose -bam -seed 1 -seg_len 100000000'
+        PRESEQ_LCEXTRAP.config.ext.args   = params.preseq_lcextrap_args
         PRESEQ_LCEXTRAP.config.publishDir = [
             [
                 path: "${params.outdir}/${params.aligner}/preseq",
@@ -895,7 +895,7 @@ workflow RNASEQ {
     //
     if (!params.skip_alignment && !params.skip_bigwig) {
 
-        BEDTOOLS_GENOMECOV.config.ext.args   = '-split -du'
+        BEDTOOLS_GENOMECOV.config.ext.args   = params.bedtools_args
         BEDTOOLS_GENOMECOV.config.publishDir = [
             path: { "${params.outdir}/bedtools/${meta.id}" },
             enabled: false