Merge pull request #285 from jonasscheid/rearrange_quant

Final changes for release 2.5.0
nf-core · Oct 9, 2023 · 1990be7 · 1990be7
2 parents 87141eb + eb6d692
commit 1990be7
Show file tree

Hide file tree

Showing 16 changed files with 33 additions and 85 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -55,7 +55,7 @@ jobs:
           # Test latest edge release of Nextflow
           - NXF_VER: ""
             NXF_EDGE: "1"
-        tests: ["test_deeplc", "test_ms2pip", "test_ionannotator", "test_quant", "test_full"]
+        tests: ["test_deeplc", "test_ms2pip", "test_ionannotator", "test_full"]
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v2

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,7 +3,7 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## v2.5.0 - nfcore/mhcquant "Angry Bird" - 2023/10/04
+## v2.5.0 - nfcore/mhcquant "Angry Bird" - 2023/10/09
 
 ### `Added`
 

diff --git a/README.md b/README.md
@@ -32,52 +32,32 @@ On release, automated continuous integration tests run the pipeline on a full-si
 > to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)
 > with `-profile test` before running the workflow on actual data.
 
-<!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.
-     Explain what rows and columns represent. For instance (please edit as appropriate):
-
 First, prepare a samplesheet with your input data that looks as follows:
 
-`samplesheet.csv`:
+`samplesheet.tsv`:
 
-```csv
-sample,fastq_1,fastq_2
-CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
+```tsv
+ID	Sample	Condition	ReplicateFileName
+1	msrun	tumor	/path/to/msrun.raw|mzML|d
 ```
 
-Each row represents a fastq file (single-end) or a pair of fastq files (paired end).
-
--->
+Each row represents a mass spectrometry run in one of the formats: raw, mzML, d
 
 Now, you can run the pipeline using:
 
-<!-- TODO nf-core: update the following command to include all required parameters for a minimal example -->
-
 ```bash
-nextflow run nf-core/mhcquant \
-   -profile <docker/singularity/.../institute> \
-   --input samplesheet.csv \
-   --outdir <OUTDIR>
+nextflow run nf-core/mhcquant
+    -profile <docker/singularity/.../institute> \
+    --input 'samples.tsv' \
+    --fasta 'SWISSPROT_2020.fasta' \
+    --outdir ./results
 ```
 
-:::warning
-Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those
-provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_;
-see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
-:::
+> [!NOTE]
+> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
 
 For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/mhcquant/usage) and the [parameter documentation](https://nf-co.re/mhcquant/parameters).
 
-## Pipeline output
-
-=======
-
-```bash
-nextflow run nf-core/mhcquant -profile test,<docker/singularity/podman/shifter/charliecloud/conda/institute> \
-                              --input 'samples.tsv' \
-                              --fasta 'SWISSPROT_2020.fasta' \
-                              --outdir ./results
-```
-
 ## Pipeline summary
 
 ### Default Steps
@@ -130,12 +110,19 @@ Additional functionality contained by the pipeline currently includes:
 - Retention time prediction (`DeepLC`)
 - Peak intensity prediction (`MS2PIP`)
 
+> [!WARNING]
+> The refine FDR feature will be evaluated on a large benchmark dataset in the following releases.
+> Consider it as an experimental feature.
+
 #### Refine FDR
 
 - This application converts several OpenMS XML formats to mzTab. (`MzTabExporter`)
 - Predict psm results using mhcflurry to shrink search space (`mhcflurry`)
 - Facilitates the input to, the call of and output integration of Percolator (`PercolatorAdapter`)
 
+> [!WARNING]
+> The HLA prediction feature is outdated and will be reworked in the following releases
+
 #### Prediction of HLA class 1 peptides
 
 - Predict peptides (`mhcnuggets`, `mhcflurry`, `fred2`)
@@ -149,8 +136,6 @@ Additional functionality contained by the pipeline currently includes:
 
 ## Documentation
 
-> > > > > > > dev
-
 To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/mhcquant/results) tab on the nf-core website pipeline page.
 For more details about the output files and reports, please refer to the
 [output documentation](https://nf-co.re/mhcquant/output).

diff --git a/conf/base.config b/conf/base.config
@@ -57,7 +57,7 @@ process {
         cache = false
     }
     withName:TDF2MZML {
-        cpus   = { check_max( 10    * task.attempt, 'cpus'    ) }
+        cpus   = { check_max( 1     * task.attempt, 'cpus'    ) }
         memory = { check_max( 10.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 16.h  * task.attempt, 'time'    ) }
     }

diff --git a/conf/test.config b/conf/test.config
@@ -22,7 +22,6 @@ params {
     // Input data
     fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta'
     input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_sample_sheet.tsv'
-    allele_sheet = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/HepG2_allele_sheet.tsv'
 
     // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
     skip_quantification = true

diff --git a/conf/test_deeplc.config b/conf/test_deeplc.config
@@ -25,7 +25,6 @@ params {
 
     // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
     skip_quantification     = true
-    spectrum_batch_size     = 5000
     use_deeplc              = true
     deeplc_add_abs_rt_error = true
     deeplc_add_sqr_rt_error = true

diff --git a/conf/test_full.config b/conf/test_full.config
@@ -17,12 +17,5 @@ params {
     // Input data
     fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/UP000005640_9606.fasta'
     input = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/sample_sheet_full.tsv'
-    allele_sheet = 'https://raw.githubusercontent.com/nf-core/test-datasets/mhcquant/testdata/allele_sheet_full.tsv'
 
-    predict_class_1   = true
-    predict_class_2   = true
-    use_deeplc        = true
-    use_ms2pip        = true
-    ms2pip_model_name = 'CID'
-    annotate_ions     = true
 }
diff --git a/conf/test_ionannotator.config b/conf/test_ionannotator.config
@@ -25,6 +25,5 @@ params {
 
     // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
     skip_quantification = true
-    spectrum_batch_size = 5000
     annotate_ions       = true
 }
diff --git a/conf/test_ms2pip.config b/conf/test_ms2pip.config
@@ -25,7 +25,6 @@ params {
 
     // Don't do quantification since this step needs a larger test dataset (-> test quantification using test_full)
     skip_quantification = true
-    spectrum_batch_size = 5000
     use_ms2pip          = true
     ms2pip_model_name   = 'Immuno-HCD'
 }
diff --git a/conf/test_quant.config b/conf/test_quant.config
diff --git a/modules/local/pyopenms_idfilter.nf b/modules/local/pyopenms_idfilter.nf
@@ -1,6 +1,6 @@
 process PYOPENMS_IDFILTER {
     tag "$meta.id"
-    label 'process_low'
+    label 'process_single'
 
     conda "bioconda::pyopenms=3.0.0"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?

diff --git a/modules/local/tdf2mzml.nf b/modules/local/tdf2mzml.nf
@@ -1,6 +1,5 @@
 process TDF2MZML {
     tag "$meta.id"
-    //label 'process_medium'
 
     container "docker.io/mfreitas/tdf2mzml"
 

diff --git a/nextflow.config b/nextflow.config
@@ -71,7 +71,7 @@ params {
 
     // DeepLC settings
     use_deeplc                      = false
-    deeplc_calibration_mode         = 'rt_bin'
+    deeplc_calibration_mode         = 'idx_bin'
     deeplc_calibration_bins         = 20
     deeplc_add_abs_rt_error         = false
     deeplc_add_sqr_rt_error         = false
@@ -234,7 +234,6 @@ profiles {
     test_deeplc       { includeConfig 'conf/test_deeplc.config'       }
     test_ms2pip       { includeConfig 'conf/test_ms2pip.config'       }
     test_ionannotator { includeConfig 'conf/test_ionannotator.config' }
-    test_quant        { includeConfig 'conf/test_quant.config'        }
     test_full         { includeConfig 'conf/test_full.config'         }
 }
 

diff --git a/subworkflows/local/process_feature.nf b/subworkflows/local/process_feature.nf
@@ -6,8 +6,6 @@ include { OPENMS_IDMERGER }                                                 from
 include { OPENMS_FEATUREFINDERIDENTIFICATION }                              from '../../modules/local/openms_featurefinderidentification'
 include { OPENMS_FEATURELINKERUNLABELEDKD }                                 from '../../modules/local/openms_featurelinkerunlabeledkd'
 include { OPENMS_IDCONFLICTRESOLVER }                                       from '../../modules/local/openms_idconflictresolver'
-include { OPENMS_TEXTEXPORTER as OPENMS_TEXTEXPORTER_QUANT }                from '../../modules/local/openms_textexporter'
-include { OPENMS_MZTABEXPORTER as OPENMS_MZTABEXPORTER_QUANT }              from '../../modules/local/openms_mztabexporter'
 
 workflow PROCESS_FEATURE {
     take:

diff --git a/subworkflows/local/quant.nf b/subworkflows/local/quant.nf
@@ -45,7 +45,7 @@ workflow QUANT {
         // NOTE: This is an alternative filtering method that will be replaced by IDFilter with new release of OpenMS
         PYOPENMS_IDFILTER( ch_runs_to_be_filtered ).filtered
                 .map { meta, idxml -> [[id:meta.sample + '_' + meta.condition], [id:meta.id, file:idxml]] }
-                .groupTuple(sort: sortById)
+                .groupTuple( sort: sortById )
                 .map { meta, idxml -> [meta, idxml.file] }
                 .set { ch_runs_to_be_aligned }
         ch_versions = ch_versions.mix(PYOPENMS_IDFILTER.out.versions.ifEmpty(null))
@@ -56,21 +56,21 @@ workflow QUANT {
             mzml,
             merge_meta_map
         )
-        ch_versions = ch_versions.mix(MAP_ALIGNMENT.out.versions.ifEmpty(null))
+        ch_versions = ch_versions.mix( MAP_ALIGNMENT.out.versions.ifEmpty(null) )
 
         // We need to merge groupwise the aligned idxml files together to use them as id_ext in featurefinder
-        OPENMS_IDMERGER_QUANT(MAP_ALIGNMENT.out.aligned_idxml
+        OPENMS_IDMERGER_QUANT( MAP_ALIGNMENT.out.aligned_idxml
                                     .map { meta, aligned_idxml -> [[id: meta.sample + '_' + meta.condition], aligned_idxml] }
                                     .groupTuple())
         ch_versions = ch_versions.mix(OPENMS_IDMERGER_QUANT.out.versions.ifEmpty(null))
 
         // Manipulate channels such that we end up with : [meta, mzml, run_idxml, merged_runs_idxml]
         MAP_ALIGNMENT.out.aligned_mzml
-                .join(MAP_ALIGNMENT.out.aligned_idxml)
-                .map {meta, mzml, idxml -> [[id: meta.sample + '_' + meta.condition], meta, [id:meta.id, file:mzml], [id:meta.id, file:idxml]] }
-                .groupTuple(sort: sortById)
-                .map { group_meta, meta, mzml, idxml -> [group_meta, meta, mzml.file, idxml.file]}
-                .join(OPENMS_IDMERGER_QUANT.out.idxml)
+                .join( MAP_ALIGNMENT.out.aligned_idxml )
+                .map { meta, mzml, idxml -> [[id: meta.sample + '_' + meta.condition], meta, [id:meta.id, file:mzml], [id:meta.id, file:idxml]] }
+                .groupTuple( sort: sortById )
+                .map { group_meta, meta, mzml, idxml -> [group_meta, meta, mzml.file, idxml.file] }
+                .join( OPENMS_IDMERGER_QUANT.out.idxml )
                 .map { group_meta, meta, mzml, idxml, merged_idxml -> [meta, mzml, idxml, merged_idxml] }
                 .transpose()
                 .set { ch_runs_to_be_quantified }

diff --git a/workflows/mhcquant.nf b/workflows/mhcquant.nf
@@ -336,7 +336,7 @@ workflow MHCQUANT {
 
     if (params.annotate_ions) {
         // Join the ch_filtered_idxml and the ch_mzml_file
-        ch_clean_mzml_file.map {meta, mzml -> [[id: meta.sample + '_' + meta.condition], mzml] }
+        ch_clean_mzml_file.map { meta, mzml -> [[id: meta.sample + '_' + meta.condition], mzml] }
             .groupTuple()
             .join(filter_q_value)
             .set{ ch_ion_annotator_input }