From 143af876548b804628582d03c3e7a6c512c2627f Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 16 Oct 2023 07:53:38 -0500 Subject: [PATCH] Refactor ext args as pipeline params Signed-off-by: Ben Sherman --- conf/modules.config | 70 +++++++++++++++++-- lib/Utils.groovy | 7 ++ subworkflows/local/prepare_genome.nf | 16 ++--- .../nf-core/bam_markduplicates_picard/main.nf | 2 +- .../fastq_fastqc_umitools_fastp/main.nf | 4 +- .../fastq_fastqc_umitools_trimgalore/main.nf | 2 +- .../nf-core/fastq_subsample_fq_salmon/main.nf | 4 +- workflows/rnaseq.nf | 10 +-- 8 files changed, 90 insertions(+), 25 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 9d1152ac1..aa067b7b1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -10,12 +10,6 @@ ---------------------------------------------------------------------------------------- */ -def rseqc_modules = params.rseqc_modules ? params.rseqc_modules.split(',').collect{ it.trim().toLowerCase() } : [] - -// -// General configuration options -// - process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, @@ -23,3 +17,67 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + +params { + + // + // Genome preparation options + // + + prepare_genome_untar_args = '--no-same-owner' + prepare_genome_gffread_args = '--keep-exon-attrs -F -T' + prepare_genome_rsem_args = '--star' + prepare_genome_bbsplit_args = 'build=1' + + // + // Read subsampling and strand inferring options + // + + subsample_fq_args = '--record-count 1000000 --seed 1' + subsample_salmon_args = '--skipQuant' + + // + // Read QC and trimming options + // + + fastqc_args = '--quiet' + + // + // Contaminant removal options + // + + bbsplit_args = 'build=1 ambiguous2=all maxindel=150000' + sortmerna_args = '--num_alignments 1 -v' + + // + // General alignment options + // + + picard_args = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + bedtools_args = '-split -du' + + // + // STAR Salmon alignment options + // + + samtools_sort_args = '-n' + + // + // STAR RSEM alignment options + // + + // + // HISAT2 alignment options + // + + // + // Post-alignment QC options + // + + preseq_lcextrap_args = '-verbose -bam -seed 1 -seg_len 100000000' + + // + // Salmon pseudo-alignment options + // + +} diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 8d030f4e8..3c3af19d0 100644 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -44,4 +44,11 @@ class Utils { "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } + + public static Map publishDir(def params, String subPath, Map opts=[:]) { + final defaults = [ + mode: params.publish_dir_mode + ] + return defaults + [path: "${params.outdir}/${subPath}"] + opts + } } diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index 3122f9314..18a1c0c14 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -99,7 +99,7 @@ workflow PREPARE_GENOME { ch_gff = Channel.value(file(gff)) } - GFFREAD.config.ext.args = '--keep-exon-attrs -F -T' + GFFREAD.config.ext.args = params.prepare_genome_gffread_args GFFREAD.config.publishDir = genome_publish_dir ch_gtf = GFFREAD ( ch_gff ).gtf ch_versions = ch_versions.mix(GFFREAD.out.versions) @@ -184,7 +184,7 @@ workflow PREPARE_GENOME { if ('bbsplit' in prepare_tool_indices) { if (bbsplit_index) { if (bbsplit_index.endsWith('.tar.gz')) { - UNTAR_BBSPLIT_INDEX.config.ext.args2 = '--no-same-owner' + UNTAR_BBSPLIT_INDEX.config.ext.args2 = params.prepare_genome_untar_args UNTAR_BBSPLIT_INDEX.config.publishDir = genome_index_publish_dir ch_bbsplit_index = UNTAR_BBSPLIT_INDEX ( [ [:], bbsplit_index ] ).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_BBSPLIT_INDEX.out.versions) @@ -201,7 +201,7 @@ workflow PREPARE_GENOME { .collect { [ it ] } // Collect entries as a list to pass as "tuple val(short_names), path(path_to_fasta)" to module .set { ch_bbsplit_fasta_list } - BBMAP_BBSPLIT.config.ext.args = 'build=1' + BBMAP_BBSPLIT.config.ext.args = params.prepare_genome_bbsplit_args BBMAP_BBSPLIT.config.publishDir = genome_index_publish_dir ch_bbsplit_index = BBMAP_BBSPLIT ( [ [:], [] ], [], ch_fasta, ch_bbsplit_fasta_list, true ).index ch_versions = ch_versions.mix(BBMAP_BBSPLIT.out.versions) @@ -215,7 +215,7 @@ workflow PREPARE_GENOME { if ('star_salmon' in prepare_tool_indices) { if (star_index) { if (star_index.endsWith('.tar.gz')) { - UNTAR_STAR_INDEX.config.ext.args2 = '--no-same-owner' + UNTAR_STAR_INDEX.config.ext.args2 = params.prepare_genome_untar_args UNTAR_STAR_INDEX.config.publishDir = genome_index_publish_dir ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) @@ -242,7 +242,7 @@ workflow PREPARE_GENOME { if ('star_rsem' in prepare_tool_indices) { if (rsem_index) { if (rsem_index.endsWith('.tar.gz')) { - UNTAR_RSEM_INDEX.config.ext.args2 = '--no-same-owner' + UNTAR_RSEM_INDEX.config.ext.args2 = params.prepare_genome_untar_args UNTAR_RSEM_INDEX.config.publishDir = genome_index_publish_dir ch_rsem_index = UNTAR_RSEM_INDEX ( [ [:], rsem_index ] ).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_RSEM_INDEX.out.versions) @@ -250,7 +250,7 @@ workflow PREPARE_GENOME { ch_rsem_index = Channel.value(file(rsem_index)) } } else { - RSEM_PREPAREREFERENCE_GENOME.config.ext.args = '--star' + RSEM_PREPAREREFERENCE_GENOME.config.ext.args = params.prepare_genome_rsem_args RSEM_PREPAREREFERENCE_GENOME.config.publishDir = genome_index_publish_dir ch_rsem_index = RSEM_PREPAREREFERENCE_GENOME ( ch_fasta, ch_gtf ).index ch_versions = ch_versions.mix(RSEM_PREPAREREFERENCE_GENOME.out.versions) @@ -272,7 +272,7 @@ workflow PREPARE_GENOME { } if (hisat2_index) { if (hisat2_index.endsWith('.tar.gz')) { - UNTAR_HISAT2_INDEX.config.ext.args2 = '--no-same-owner' + UNTAR_HISAT2_INDEX.config.ext.args2 = params.prepare_genome_untar_args UNTAR_HISAT2_INDEX.config.publishDir = genome_index_publish_dir ch_hisat2_index = UNTAR_HISAT2_INDEX ( [ [:], hisat2_index ] ).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_HISAT2_INDEX.out.versions) @@ -292,7 +292,7 @@ workflow PREPARE_GENOME { ch_salmon_index = Channel.empty() if (salmon_index) { if (salmon_index.endsWith('.tar.gz')) { - UNTAR_SALMON_INDEX.config.ext.args2 = '--no-same-owner' + UNTAR_SALMON_INDEX.config.ext.args2 = params.prepare_genome_untar_args UNTAR_SALMON_INDEX.config.publishDir = genome_index_publish_dir ch_salmon_index = UNTAR_SALMON_INDEX ( [ [:], salmon_index ] ).untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_SALMON_INDEX.out.versions) diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf index 59255fe43..8c0158adc 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/main.nf +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -17,7 +17,7 @@ workflow BAM_MARKDUPLICATES_PICARD { ch_versions = Channel.empty() - PICARD_MARKDUPLICATES.config.ext.args = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + PICARD_MARKDUPLICATES.config.ext.args = params.picard_args PICARD_MARKDUPLICATES.config.ext.prefix = { "${meta.id}.markdup.sorted" } PICARD_MARKDUPLICATES.config.publishDir = [ [ diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf index 445e53fc1..304a0ff3a 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf +++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf @@ -35,7 +35,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { fastqc_raw_html = Channel.empty() fastqc_raw_zip = Channel.empty() if (!skip_fastqc) { - FASTQC_RAW.config.ext.args = '--quiet' + FASTQC_RAW.config.ext.args = params.fastqc_args FASTQC_RAW ( reads ) @@ -148,7 +148,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP { .set { trim_read_count } if (!skip_fastqc) { - FASTQC_TRIM.config.ext.args = '--quiet' + FASTQC_TRIM.config.ext.args = params.fastqc_args FASTQC_TRIM.config.publishDir = [ path: "${params.outdir}/${params.trimmer}/fastqc", mode: params.publish_dir_mode, diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf index a5f77b614..f8770d094 100644 --- a/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf +++ b/subworkflows/nf-core/fastq_fastqc_umitools_trimgalore/main.nf @@ -36,7 +36,7 @@ workflow FASTQ_FASTQC_UMITOOLS_TRIMGALORE { fastqc_html = Channel.empty() fastqc_zip = Channel.empty() if (!skip_fastqc) { - FASTQC.config.ext.args = '--quiet' + FASTQC.config.ext.args = params.fastqc_args FASTQC (reads) fastqc_html = FASTQC.out.html fastqc_zip = FASTQC.out.zip diff --git a/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf b/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf index f87e0b2a7..f64a6489c 100644 --- a/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf +++ b/subworkflows/nf-core/fastq_subsample_fq_salmon/main.nf @@ -38,7 +38,7 @@ workflow FASTQ_SUBSAMPLE_FQ_SALMON { // // Sub-sample FastQ files with fq // - FQ_SUBSAMPLE.config.ext.args = '--record-count 1000000 --seed 1' + FQ_SUBSAMPLE.config.ext.args = params.subsample_fq_args FQ_SUBSAMPLE.config.ext.prefix = { "${meta.id}.subsampled" } FQ_SUBSAMPLE.config.publishDir = [ path: "${params.outdir}/sample_fastq/fastq", @@ -54,7 +54,7 @@ workflow FASTQ_SUBSAMPLE_FQ_SALMON { // def lib_type = 'A' def alignment_mode = false - SALMON_QUANT.config.ext.args = '--skipQuant' + SALMON_QUANT.config.ext.args = params.subsample_salmon_args SALMON_QUANT.config.publishDir = [ path: "${params.outdir}/sample_fastq/salmon", mode: params.publish_dir_mode, diff --git a/workflows/rnaseq.nf b/workflows/rnaseq.nf index 48bbd641d..cfc66804f 100755 --- a/workflows/rnaseq.nf +++ b/workflows/rnaseq.nf @@ -350,7 +350,7 @@ workflow RNASEQ { // MODULE: Remove genome contaminant reads // if (!params.skip_bbsplit) { - BBMAP_BBSPLIT.config.ext.args = 'build=1 ambiguous2=all maxindel=150000' + BBMAP_BBSPLIT.config.ext.args = params.bbsplit_args BBMAP_BBSPLIT.config.publishDir = [ [ path: "${params.outdir}/bbsplit", @@ -383,7 +383,7 @@ workflow RNASEQ { if (params.remove_ribo_rna) { ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect() - SORTMERNA.config.ext.args = '--num_alignments 1 -v' + SORTMERNA.config.ext.args = params.sortmerna_args SORTMERNA.config.publishDir = [ [ path: "${params.outdir}/sortmerna", @@ -548,7 +548,7 @@ workflow RNASEQ { ) // Name sort BAM before passing to Salmon - SAMTOOLS_SORT.config.ext.args = '-n' + SAMTOOLS_SORT.config.ext.args = params.samtools_sort_args SAMTOOLS_SORT.config.ext.prefix = { "${meta.id}.umi_dedup.transcriptome" } SAMTOOLS_SORT.config.publishDir = [ path: "${params.outdir}/${params.aligner}", @@ -780,7 +780,7 @@ workflow RNASEQ { // ch_preseq_multiqc = Channel.empty() if (!params.skip_alignment && !params.skip_qc && !params.skip_preseq) { - PRESEQ_LCEXTRAP.config.ext.args = '-verbose -bam -seed 1 -seg_len 100000000' + PRESEQ_LCEXTRAP.config.ext.args = params.preseq_lcextrap_args PRESEQ_LCEXTRAP.config.publishDir = [ [ path: "${params.outdir}/${params.aligner}/preseq", @@ -895,7 +895,7 @@ workflow RNASEQ { // if (!params.skip_alignment && !params.skip_bigwig) { - BEDTOOLS_GENOMECOV.config.ext.args = '-split -du' + BEDTOOLS_GENOMECOV.config.ext.args = params.bedtools_args BEDTOOLS_GENOMECOV.config.publishDir = [ path: { "${params.outdir}/bedtools/${meta.id}" }, enabled: false