nextflow-io · kenibrewer · Nov 17, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 4, 2024
diff --git a/docs/hello_nextflow/04_hello_genomics.md b/docs/hello_nextflow/04_hello_genomics.md
@@ -181,7 +181,7 @@ process SAMTOOLS_INDEX {
 
     container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464'
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         path input_bam
@@ -195,18 +195,18 @@ process SAMTOOLS_INDEX {
 }
 ```
 
-You should recognize all the pieces from what you learned in Part 1 & Part 2 of this training series; the only notable change is that this time we're using `mode: symlink` for the `publishDir` directive.
+You should recognize all the pieces from what you learned in Part 1 & Part 2 of this training series; the only notable change is that this time we're using `mode: symlink` for the `publishDir` directive, and we're using a parameter to define the `publishDir`.
 
 !!! note
 
     Even though the data files we're using here are very small, in genomics they can get very large, so we should get into the habit of using symbolic links rather than making actual copies of these files, unless there's a compelling reason to do so.
 
 This process is going to require us to pass in a file path via the `input_bam` input, so let's set that up next.
 
-### 1.2. Add an input parameter declaration
+### 1.2. Add an input and output parameter declaration
 
 At the top of the file, under the `Pipeline parameters` section, we declare a CLI parameter called `reads_bam` and give it a default value.
-That way, we can be lazy and not specify the input when we type the command to launch the pipeline (for development purposes).
+That way, we can be lazy and not specify the input when we type the command to launch the pipeline (for development purposes). We're also going to set `params.outdir` with a default value for the output directory.
 
 ```groovy title="hello-genomics.nf" linenums="3"
 /*
@@ -215,6 +215,7 @@ That way, we can be lazy and not specify the input when we type the command to l
 
 // Primary input
 params.reads_bam = "${projectDir}/data/bam/reads_mother.bam"
+params.outdir    = "results_genomics"
 ```
 
 Now we have a process ready, as well as a parameter to give it an input to run on, so let's wire those things up together.
@@ -298,7 +299,7 @@ process GATK_HAPLOTYPECALLER {
 
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         path input_bam

diff --git a/docs/hello_nextflow/05_hello_operators.md b/docs/hello_nextflow/05_hello_operators.md
@@ -14,7 +14,7 @@ Specifically, we show you how to implement joint variant calling with GATK, buil
 
 The GATK variant calling method we used in Part 3 simply generated variant calls per sample.
 That's fine if you only want to look at the variants from each sample in isolation, but that yields limited information.
-It's often more interesting to look at variant calls differ across multiple samples, and to do so, GATK offers an alternative method called joint variant calling, which we demonstrate here.
+It's often more interesting to look at how variant calls differ across multiple samples, and to do so, GATK offers an alternative method called joint variant calling, which we demonstrate here.
 
 Joint variant calling involves generating a special kind of variant output called GVCF (for Genomic VCF) for each sample, then combining the GVCF data from all the samples and finally, running a 'joint genotyping' statistical analysis.
 
@@ -411,7 +411,7 @@ Let's write a new process to define how that's going to work, based on the comma
 process GATK_GENOMICSDB {
 
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
-    publishDir 'results_genomics', mode: 'copy'
+    publishDir params.outdir, mode: 'copy'
 
     input:
         path all_gvcfs

diff --git a/docs/hello_nextflow/06_hello_config.md b/docs/hello_nextflow/06_hello_config.md
@@ -233,7 +233,7 @@ process SAMTOOLS_INDEX {
 
     container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464'
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 ```
 
 _After:_
@@ -244,7 +244,7 @@ process SAMTOOLS_INDEX {
     container "community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464"
     conda "bioconda::samtools=1.20"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 ```
 
 #### 1.4.2. Update GATK_HAPLOTYPECALLER
@@ -258,7 +258,7 @@ process GATK_HAPLOTYPECALLER {
 
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 ```
 
 _After:_
@@ -269,7 +269,7 @@ process GATK_HAPLOTYPECALLER {
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
     conda "bioconda::gatk4=4.5.0.0"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 ```
 
 #### 1.4.3. Update GATK_JOINTGENOTYPING
@@ -283,7 +283,7 @@ process GATK_JOINTGENOTYPING {
 
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 ```
 
 _After:_
@@ -294,7 +294,7 @@ process GATK_JOINTGENOTYPING {
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
     conda "bioconda::gatk4=4.5.0.0"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 ```
 
 Once all three processes are updated, we can try running the workflow again.
@@ -872,6 +872,9 @@ So let's cut this set of params out of `main.nf`:
 // Primary input (file of input files, one per line)
 params.reads_bam = "${projectDir}/data/sample_bams.txt"
 
+// Output directory
+params.outdir    = 'results_genomics'
+
 // Accessory files
 params.reference        = "${projectDir}/data/ref/ref.fasta"
 params.reference_index  = "${projectDir}/data/ref/ref.fasta.fai"
@@ -924,6 +927,9 @@ params {
     // Primary input (file of input files, one per line)
     reads_bam        = "${projectDir}/data/sample_bams.txt"
 
+    // Output directory
+    params.outdir    = 'results_genomics'
+
     // Accessory files
     reference        = "${projectDir}/data/ref/ref.fasta"
     reference_index  = "${projectDir}/data/ref/ref.fasta.fai"
@@ -952,6 +958,7 @@ The values are the same input files and reference files we've been using so far.
 ```json title="demo-params.json" linenums="1"
 {
     "reads_bam": "data/sample_bams.txt",
+    "outdir": "results_genomics",
     "reference": "data/ref/ref.fasta",
     "reference_index": "data/ref/ref.fasta.fai",
     "reference_dict": "data/ref/ref.dict",
@@ -992,6 +999,9 @@ params {
     // Primary input (file of input files, one per line)
     reads_bam        = "${projectDir}/data/sample_bams.txt"
 
+    // Output directory
+    outdir           = 'results_genomics'
+
     // Accessory files
     reference        = "${projectDir}/data/ref/ref.fasta"
     reference_index  = "${projectDir}/data/ref/ref.fasta.fai"
@@ -1010,6 +1020,9 @@ params {
     // Primary input (file of input files, one per line)
     reads_bam        = null
 
+    // Output directory
+    outdir           = null
+
     // Accessory files
     reference        = null
     reference_index  = null
@@ -1085,6 +1098,9 @@ profiles {
         // Primary input (file of input files, one per line)
         params.reads_bam        = "data/sample_bams.txt"
 
+        // Output directory
+        params.outdir           = 'results_genomics'
+
         // Accessory files
         params.reference        = "data/ref/ref.fasta"
         params.reference_index  = "data/ref/ref.fasta.fai"

diff --git a/docs/hello_nextflow/07_hello_modules.md b/docs/hello_nextflow/07_hello_modules.md
@@ -167,7 +167,7 @@ process SAMTOOLS_INDEX {
     container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464'
     conda "bioconda::samtools=1.20"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         path input_bam
@@ -282,7 +282,7 @@ process GATK_HAPLOTYPECALLER {
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
     conda "bioconda::gatk4=4.5.0.0"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         tuple path(input_bam), path(input_bam_index)
@@ -319,7 +319,7 @@ process GATK_JOINTGENOTYPING {
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
     conda "bioconda::gatk4=4.5.0.0"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         path all_gvcfs

diff --git a/hello-nextflow/hello-config/demo-params.json b/hello-nextflow/hello-config/demo-params.json
@@ -1,5 +1,6 @@
 {
     "reads_bam": "data/sample_bams.txt",
+    "outdir": "results_genomics",
     "reference": "data/ref/ref.fasta",
     "reference_index": "data/ref/ref.fasta.fai",
     "reference_dict": "data/ref/ref.dict",

diff --git a/hello-nextflow/hello-config/main.nf b/hello-nextflow/hello-config/main.nf
@@ -7,6 +7,9 @@
 // Primary input (file of input files, one per line)
 params.reads_bam = "${projectDir}/data/sample_bams.txt"
 
+// Output directory
+params.outdir = "results_genomics"
+
 // Accessory files
 params.reference        = "${projectDir}/data/ref/ref.fasta"
 params.reference_index  = "${projectDir}/data/ref/ref.fasta.fai"
@@ -23,7 +26,7 @@ process SAMTOOLS_INDEX {
 
     container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464'
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         path input_bam
@@ -43,7 +46,7 @@ process GATK_HAPLOTYPECALLER {
 
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         tuple path(input_bam), path(input_bam_index)
@@ -73,7 +76,7 @@ process GATK_JOINTGENOTYPING {
 
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         path all_gvcfs

diff --git a/hello-nextflow/hello-modules/demo-params.json b/hello-nextflow/hello-modules/demo-params.json
@@ -1,5 +1,6 @@
 {
     "reads_bam": "data/sample_bams.txt",
+    "outdir": "results_genomics",
     "reference": "data/ref/ref.fasta",
     "reference_index": "data/ref/ref.fasta.fai",
     "reference_dict": "data/ref/ref.dict",

diff --git a/hello-nextflow/hello-modules/main.nf b/hello-nextflow/hello-modules/main.nf
@@ -8,7 +8,7 @@ process SAMTOOLS_INDEX {
     container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464'
     conda "bioconda::samtools=1.20"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         path input_bam
@@ -29,7 +29,7 @@ process GATK_HAPLOTYPECALLER {
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
     conda "bioconda::gatk4=4.5.0.0"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         tuple path(input_bam), path(input_bam_index)
@@ -60,7 +60,7 @@ process GATK_JOINTGENOTYPING {
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
     conda "bioconda::gatk4=4.5.0.0"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         path all_gvcfs

diff --git a/hello-nextflow/hello-modules/nextflow.config b/hello-nextflow/hello-modules/nextflow.config
@@ -24,6 +24,9 @@ profiles {
         // Primary input (file of input files, one per line)
         params.reads_bam        = "data/sample_bams.txt"
 
+       // Output directory
+       params.outdir = "results_genomics"
+
         // Accessory files
         params.reference        = "data/ref/ref.fasta"
         params.reference_index  = "data/ref/ref.fasta.fai"

diff --git a/hello-nextflow/hello-nf-test/demo-params.json b/hello-nextflow/hello-nf-test/demo-params.json
@@ -1,5 +1,6 @@
 {
     "reads_bam": "data/sample_bams.txt",
+    "outdir": "results_genomics",
     "reference": "data/ref/ref.fasta",
     "reference_index": "data/ref/ref.fasta.fai",
     "reference_dict": "data/ref/ref.dict",

diff --git a/hello-nextflow/hello-nf-test/modules/local/gatk/haplotypecaller/main.nf b/hello-nextflow/hello-nf-test/modules/local/gatk/haplotypecaller/main.nf
@@ -8,7 +8,7 @@ process GATK_HAPLOTYPECALLER {
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
     conda "bioconda::gatk4=4.5.0.0"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         tuple path(input_bam), path(input_bam_index)

diff --git a/hello-nextflow/hello-nf-test/modules/local/gatk/jointgenotyping/main.nf b/hello-nextflow/hello-nf-test/modules/local/gatk/jointgenotyping/main.nf
@@ -6,7 +6,7 @@ process GATK_JOINTGENOTYPING {
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
     conda "bioconda::gatk4=4.5.0.0"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         path all_gvcfs

diff --git a/hello-nextflow/hello-nf-test/modules/local/samtools/index/main.nf b/hello-nextflow/hello-nf-test/modules/local/samtools/index/main.nf
@@ -8,7 +8,7 @@ process SAMTOOLS_INDEX {
     container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464'
     conda "bioconda::samtools=1.20"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         path input_bam

diff --git a/hello-nextflow/hello-nf-test/nextflow.config b/hello-nextflow/hello-nf-test/nextflow.config
@@ -1,5 +1,8 @@
 docker.fixOwnership = true
 
+// Default output directory
+params.outdir = 'results_genomics'
+
 profiles {
     docker_on {
         docker.enabled = true
@@ -24,6 +27,9 @@ profiles {
         // Primary input (file of input files, one per line)
         params.reads_bam        = "data/sample_bams.txt"
 
+       // Output directory
+       params.outdir = "results_genomics"
+
         // Accessory files
         params.reference        = "data/ref/ref.fasta"
         params.reference_index  = "data/ref/ref.fasta.fai"

diff --git a/hello-nextflow/hello-operators.nf b/hello-nextflow/hello-operators.nf
@@ -7,6 +7,9 @@
 // Primary input (file of input files, one per line)
 params.reads_bam = "${projectDir}/data/sample_bams.txt"
 
+// Output directory
+params.outdir = "results_genomics"
+
 // Accessory files
 params.reference        = "${projectDir}/data/ref/ref.fasta"
 params.reference_index  = "${projectDir}/data/ref/ref.fasta.fai"
@@ -20,7 +23,7 @@ process SAMTOOLS_INDEX {
 
     container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464'
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         path input_bam
@@ -40,7 +43,7 @@ process GATK_HAPLOTYPECALLER {
 
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
 
-    publishDir 'results_genomics', mode: 'symlink'
+    publishDir params.outdir, mode: 'symlink'
 
     input:
         tuple path(input_bam), path(input_bam_index)