Merged in release (pull request #199)

v0.6.7 changes
TheJacksonLaboratory · Aug 5, 2024 · 67cd86b · 67cd86b
2 parents 8386555 + 6034cb2
commit 67cd86b
Show file tree

Hide file tree

Showing 35 changed files with 58 additions and 40 deletions.
diff --git a/ReleaseNotes.md b/ReleaseNotes.md
@@ -1,5 +1,16 @@
 # RELEASE NOTES
 
+## Release 0.6.7
+
+In this release we make the following minor adjustments:   
+
+1. Correct syntax errors in the Xengsort module when running single-end data.  
+1. Minor adjustments to EMASE and GBRS help and log information to include the `gen_org` param.  
+1. Bump the version of MultiQC to v1.23.  
+1. Increase the memory request for a `PTA` moudles: `python_merge_prep.nf` and `python_reorder_vcf_columns.nf`.  
+1. Add `CHECK_STRANDEDNESS` to multiQC output for PDX RNAseq
+1. Increased job memory request in example run scripts.   
+
 ## Release 0.6.6
 
 In this release, we add a FASTQ sorting function to the Xengsort module. Due to asynchronous multi-threading in the classification step, Xengsort produces FASTQ output with non-deterministic sort order. BWA produces subtly different mapping results when reads in otherwise identical FASTQ inputs are shuffled ([see note from BWA developer here](https://github.com/lh3/bwa/issues/192#issuecomment-380612006)). The slight mapping differences are not enough to impact overall results, but do prevent fully reproducible results when Xengsort is used and reads are not sorted. The addition of the sorting function allows for fully reproducible results, with no additional user action required.    

diff --git a/bin/help/emase.nf b/bin/help/emase.nf
@@ -24,6 +24,8 @@ Examples:
     Given the input file name "SAMPLE_NAME_1_OTHER_STUFF-WeDont_WANT.txt" if this `concat_sampleID_delim` = '_' and `concat_sampleID_positions` = "3" the sample ID would be assigned as `SAMPLE_NAME_1`
     Given the input file name "SAMPLE_NAME_1_OTHER_STUFF-WeDont_WANT.txt" if this `concat_sampleID_delim` = '-' and `concat_sampleID_positions` = "1" the sample ID would be assigned as `SAMPLE_NAME_1_OTHER_STUFF`
 
+--genome_build | 'GRCm39' | Options: GRCm39 or GRCm38.
+
 --bowtie_index | /<PATH> | Path to the bowtie index. Include the bowtie prefix in this path (e.g., `/path/to/bowtie.transcripts` where bowtie.transcripts.* are the full set of index files in the directory.  
 --transcripts_info | /<PATH> | A file containing all transcript IDs. NOTE: These IDs must not contain haplotype IDs. This file must also have a 'length' column. Note that 'length' is not used in this context. ONLY IDs are used from this file. Can be obtained from `prepare_emase` workflow (emase.fullTranscripts.info)
 --gbrs_strain_list | <comma,delim,list> | A list of haplotype names corresponding to genomes used in hybrid genome construction (e.g., 'A,B,C,D,E,F,G,H'). 

diff --git a/bin/help/gbrs.nf b/bin/help/gbrs.nf
@@ -24,6 +24,8 @@ Examples:
     Given the input file name "SAMPLE_NAME_1_OTHER_STUFF-WeDont_WANT.txt" if this `concat_sampleID_delim` = '_' and `concat_sampleID_positions` = "3" the sample ID would be assigned as `SAMPLE_NAME_1`
     Given the input file name "SAMPLE_NAME_1_OTHER_STUFF-WeDont_WANT.txt" if this `concat_sampleID_delim` = '-' and `concat_sampleID_positions` = "1" the sample ID would be assigned as `SAMPLE_NAME_1_OTHER_STUFF`
 
+--genome_build | 'GRCm39' | Options: GRCm39 or GRCm38.
+
 --bowtie_index | /<PATH> | Path to the bowtie index. Include the bowtie prefix in this path (e.g., `/path/to/bowtie.transcripts` where bowtie.transcripts.* are the full set of index files in the directory.  
 --transcripts_info | /<PATH> | A file containing all transcript IDs. NOTE: These IDs must not contain haplotype IDs. This file must also have a 'length' column. Note that 'length' is not used in this context. ONLY IDs are used from this file. Can be obtained from `prepare_emase` workflow (emase.fullTranscripts.info)
 --gbrs_strain_list | <comma,delim,list> | A list of haplotype names corresponding to genomes used in hybrid genome contrucution (e.g., 'A,B,C,D,E,F,G,H'). 

diff --git a/bin/log/emase.nf b/bin/log/emase.nf
@@ -23,6 +23,7 @@ ______________________________________________________
 --read_type                     ${params.read_type}
 --csv_input                     ${params.csv_input}
 --download_data                 ${params.download_data}
+--genome_build                  ${params.genome_build}
 --bowtie_index                  ${params.bowtie_index}
 --transcripts_info              ${params.transcripts_info}
 --gbrs_strain_list              ${params.gbrs_strain_list}
@@ -56,6 +57,7 @@ ______________________________________________________
 --concat_lanes                  ${params.concat_lanes}
 --concat_sampleID_delim         ${params.concat_sampleID_delim}
 --concat_sampleID_positions     ${params.concat_sampleID_positions}
+--genome_build                  ${params.genome_build}
 --bowtie_index                  ${params.bowtie_index}
 --transcripts_info              ${params.transcripts_info}
 --gbrs_strain_list              ${params.gbrs_strain_list}
@@ -88,6 +90,7 @@ ______________________________________________________
 --concat_lanes                  ${params.concat_lanes}
 --concat_sampleID_delim         "N/A"
 --concat_sampleID_positions     "N/A"
+--genome_build                  ${params.genome_build}
 --bowtie_index                  ${params.bowtie_index}
 --transcripts_info              ${params.transcripts_info}
 --gbrs_strain_list              ${params.gbrs_strain_list}

diff --git a/bin/log/gbrs.nf b/bin/log/gbrs.nf
@@ -35,6 +35,7 @@ def param_log(){
     --read_type                     ${params.read_type}
     --csv_input                     ${params.csv_input}
     --download_data                 ${params.download_data}
+    --genome_build                  ${params.genome_build}
     --bowtie_index                  ${params.bowtie_index}
     --transcripts_info              ${params.transcripts_info}
     --gbrs_strain_list              ${params.gbrs_strain_list}

diff --git a/modules/multiqc/multiqc.nf b/modules/multiqc/multiqc.nf
@@ -5,7 +5,7 @@ process MULTIQC {
 
     errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
 
-    container 'quay.io/jaxcompsci/multiqc:v1.21_custom'
+    container 'quay.io/jaxcompsci/multiqc:v1.23_custom'
 
     publishDir "${params.pubdir}/multiqc", pattern: "*multiqc_report.html", mode:'copy'
     publishDir "${params.pubdir}/multiqc", pattern: "*_data", mode:'copy'

diff --git a/modules/python/python_merge_prep.nf b/modules/python/python_merge_prep.nf
@@ -2,7 +2,7 @@ process MERGE_PREP {
   tag "$sampleID"
 
   cpus 1
-  memory 4.GB
+  memory 10.GB
   time '04:00:00'
   errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
 

diff --git a/modules/python/python_reorder_vcf_columns.nf b/modules/python/python_reorder_vcf_columns.nf
@@ -2,7 +2,7 @@ process REORDER_VCF_COLUMNS {
   tag "$sampleID"
 
   cpus 1
-  memory 4.GB
+  memory 10.GB
   time '04:00:00'
   errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}
 

diff --git a/modules/xengsort/xengsort_classify.nf b/modules/xengsort/xengsort_classify.nf
@@ -21,8 +21,8 @@ process XENGSORT_CLASSIFY {
     tuple val(sampleID), path(trimmed)
 
     output:
-    tuple val(sampleID), path("*fastq-graft_sorted.*.fq"), emit: xengsort_human_fastq
-    tuple val(sampleID), path("*fastq-host_sorted.*.fq"), emit: xengsort_mouse_fastq
+    tuple val(sampleID), path("*graft_sorted.*.fq"), emit: xengsort_human_fastq
+    tuple val(sampleID), path("*host_sorted.*.fq"), emit: xengsort_mouse_fastq
     tuple val(sampleID), path("*.txt"), emit: xengsort_log
 
     script:
@@ -33,17 +33,16 @@ process XENGSORT_CLASSIFY {
         """
         
         xengsort classify \
-        --index ${xengsort_index}/${xengsort_index} \
+        --index ${xengsort_index}/${params.xengsort_idx_name} \
         --fastq ${trimmed[0]} \
         --prefix ${sampleID} \
         --mode count \
-        --threads ${task.cpus}
-        --out fastq \
+        --threads ${task.cpus} \
         --chunksize 32.0 \
         --compression none &> ${sampleID}_xengsort_log.txt
 
-        cat fastq-host.1.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}_fastq-host_sorted.1.fq
-        cat fastq-graft.1.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}_fastq-graft_sorted.1.fq
+        cat ${sampleID}-host.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}-host_sorted.1.fq
+        cat ${sampleID}-graft.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}-graft_sorted.1.fq
 
         """
 
@@ -58,15 +57,14 @@ process XENGSORT_CLASSIFY {
         --prefix ${sampleID} \
         --mode count \
         --threads ${task.cpus} \
-        --out fastq \
         --chunksize 32.0 \
         --compression none &> ${sampleID}_xengsort_log.txt
 
-        cat fastq-host.1.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}_fastq-host_sorted.1.fq
-        cat fastq-host.2.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}_fastq-host_sorted.2.fq
+        cat ${sampleID}-host.1.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}-host_sorted.1.fq
+        cat ${sampleID}-host.2.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}-host_sorted.2.fq
 
-        cat fastq-graft.1.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}_fastq-graft_sorted.1.fq
-        cat fastq-graft.2.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}_fastq-graft_sorted.2.fq
+        cat ${sampleID}-graft.1.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}-graft_sorted.1.fq
+        cat ${sampleID}-graft.2.fq | paste - - - - | sort -k1,1 -t " " | tr "\\t" "\\n" > ${sampleID}-graft_sorted.2.fq
 
         """
 

diff --git a/nextflow.config b/nextflow.config
@@ -45,7 +45,7 @@ manifest {
     homePage = "https://github.com/TheJacksonLaboratory/cs-nf-pipelines"
     mainScript = "main.nf"
     nextflowVersion = "!>=22.04.3"
-    version = "0.6.6"
+    version = "0.6.7"
     author = 'Michael Lloyd, Brian Sanderson, Barry Guglielmo, Sai Lek, Peter Fields, Harshpreet Chandok, Carolyn Paisie, Gabriel Rech, Ardian Ferraj, Anuj Srivastava. Copyright Jackson Laboratory 2024'
 }
 

diff --git a/run_scripts/README.md b/run_scripts/README.md
@@ -13,7 +13,7 @@ Example:
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/amplicon_human.sh b/run_scripts/amplicon_human.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/atac_human.sh b/run_scripts/atac_human.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/atac_mouse.sh b/run_scripts/atac_mouse.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/chip_seq_human.sh b/run_scripts/chip_seq_human.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/chip_seq_mouse.sh b/run_scripts/chip_seq_mouse.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/emase.sh b/run_scripts/emase.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/gbrs.sh b/run_scripts/gbrs.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/generate_pseudoreference.sh b/run_scripts/generate_pseudoreference.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/germline_sv.sh b/run_scripts/germline_sv.sh
@@ -4,7 +4,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=2G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 # LOAD NEXTFLOW

diff --git a/run_scripts/prepare_emase.sh b/run_scripts/prepare_emase.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/pta_human.sh b/run_scripts/pta_human.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/pta_mouse.sh b/run_scripts/pta_mouse.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/rnafusion_human.sh b/run_scripts/rnafusion_human.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/rnaseq_human.sh b/run_scripts/rnaseq_human.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/rnaseq_mouse.sh b/run_scripts/rnaseq_mouse.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/rnaseq_pdx.sh b/run_scripts/rnaseq_pdx.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/rrbs_human.sh b/run_scripts/rrbs_human.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/rrbs_mouse.sh b/run_scripts/rrbs_mouse.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/somatic_wes_pdx.sh b/run_scripts/somatic_wes_pdx.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/wes_human.sh b/run_scripts/wes_human.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/wes_mouse.sh b/run_scripts/wes_mouse.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/wgs_human.sh b/run_scripts/wgs_human.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/run_scripts/wgs_mouse.sh b/run_scripts/wgs_mouse.sh
@@ -5,7 +5,7 @@
 #SBATCH -p compute
 #SBATCH -q batch
 #SBATCH -t 72:00:00
-#SBATCH --mem=1G
+#SBATCH --mem=5G
 #SBATCH --ntasks=1
 
 cd $SLURM_SUBMIT_DIR

diff --git a/subworkflows/pdx_rnaseq.nf b/subworkflows/pdx_rnaseq.nf
@@ -128,6 +128,7 @@ workflow PDX_RNASEQ {
     ch_multiqc_files = ch_multiqc_files.mix(FASTP.out.quality_json.collect{it[1]}.ifEmpty([]))
     ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.quality_stats.collect{it[1]}.ifEmpty([]))
     ch_multiqc_files = ch_multiqc_files.mix(XENGSORT_CLASSIFY.out.xengsort_log.collect{it[1]}.ifEmpty([]))
+    ch_multiqc_files = ch_multiqc_files.mix(CHECK_STRANDEDNESS.out.strandedness_report.collect{it[1]}.ifEmpty([]))
     ch_multiqc_files = ch_multiqc_files.mix(RSEM_ALIGNMENT_EXPRESSION_HUMAN.out.rsem_cnt.collect{it[1]}.ifEmpty([]))
     ch_multiqc_files = ch_multiqc_files.mix(RSEM_ALIGNMENT_EXPRESSION_HUMAN.out.star_log.collect{it[1]}.ifEmpty([]))
     ch_multiqc_files = ch_multiqc_files.mix(PICARD_COLLECTRNASEQMETRICS_HUMAN.out.picard_metrics.collect{it[1]}.ifEmpty([]))