diff --git a/CHANGELOG.md b/CHANGELOG.md index 08d23c4a..b0c91eb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v2.6.1dev + +### `Added` + +- Added `PYOPENMS_CHROMATOGRAMEXTRACTOR` extracting MS1 Chromatograms and visualize them in multiQC report [#329](https://github.com/nf-core/mhcquant/pull/329) + ## v2.6.0 - nfcore/mhcquant "Mr Bob" - 2024/06/17 ### `Added` diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 619d853b..285d5424 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,9 +3,9 @@ custom_logo_url: https://github.com/nf-core/mhcquant custom_logo_title: "nf-core/mhcquant" report_comment: > - This report has been generated by the nf-core/mhcquant + This report has been generated by the nf-core/mhcquant analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. report_section_order: "nf-core-mhcquant-methods-description": order: -1000 @@ -15,5 +15,41 @@ report_section_order: order: -1002 export_plots: true - disable_version_detection: true + +# Modules to run +run_modules: + - custom_content + +# Custom tables and plots +custom_data: + # Summary of chromatogram plot + chromatogram: + plot_type: "linegraph" + file_format: "csv" + section_name: "MS1 Chromatogram" + description: | + An MS1 chromatogram is a plot of the intensity of precursor ions (MS1) detected over time, + typically with retention time (RT) on the x-axis and ion intensity on the y-axis. + It represents the total ion current (TIC) from the MS1 scan, + which is useful for monitoring the overall elution profile of compounds during a chromatographic separation. + pconfig: + id: "chromatogram" + title: "MS1 Chromatograms" + xlab: "Retention time [min]" + xmin: 0 + ylab: "Intensity" + logswitch: true + logswitch_active: true + +sp: + chromatogram: + fn: "*_chrom.csv" +## Define the order of sections +#module_order: +# - custom_content +# +## Set the order of custom code plots and tables +#custom_content: +# order: +# - chromatogram diff --git a/bin/chromatogram_extractor.py b/bin/chromatogram_extractor.py new file mode 100755 index 00000000..0818239e --- /dev/null +++ b/bin/chromatogram_extractor.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# Written by Jonas Scheid under the MIT license + +import logging +import csv +import argparse +import matplotlib.pyplot as plt + +import pandas as pd +import pyopenms as oms + +# Setup logging +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") + + +def parse_arguments(): + parser = argparse.ArgumentParser(description='Exctract TICs of MS1 Spectra') + parser.add_argument('-in','--input', type=str, help='Path to the spectrum file') + parser.add_argument('-out', '--output', type=str, help='Path to the output CSV file containing RT and TIC of Precursors') + return parser.parse_args() + +def main(): + args = parse_arguments() + input_file = args.input + output_file = args.output + + # Load the mzML file + logging.info(f'Loading file: {input_file}') + exp = oms.MSExperiment() + mzml_file = oms.MzMLFile() + mzml_file.load(input_file, exp) + + # Get RT and Spectrum TIC of MS1 Spectra + chromatogram = [(spectrum.getRT() / 60 , spectrum.calculateTIC()) for spectrum in exp.getSpectra() if spectrum.getMSLevel() == 1] + logging.info(f'Found {len(chromatogram)} MS1 Spectra') + logging.info(f'RT range: {round(chromatogram[0][0],2)} - {round(chromatogram[-1][0],2)} [min]') + # Create pandas df + chromatogram_df = pd.DataFrame(chromatogram, columns=['RT', 'TIC']) + # bin data into minutes and take the mean of the TIC + chromatogram_df = chromatogram_df.groupby('RT').mean().reset_index() + # Add RT=0 and Intensity=0 to start and end of chromatogram_df + start = pd.DataFrame([{'RT': 0, 'TIC': 0}]) + end = pd.DataFrame([{'RT': chromatogram_df['RT'].max(), 'TIC': 0}]) + # Concatenate the DataFrames + chromatogram_df = pd.concat([start, chromatogram_df, end], ignore_index=True) + + # Write to csv + chromatogram_df.to_csv(output_file, index=False, header=False) + +if __name__ == '__main__': + main() diff --git a/conf/modules.config b/conf/modules.config index 69159d28..6db009b3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -186,6 +186,12 @@ process { ] } + withName: 'PYOPENMS_CHROMATOGRAMEXTRACTOR' { + publishDir = [ + enabled: false + ] + } + withName: 'OPENMS_MAPALIGNERIDENTIFICATION' { ext.args = [ "-model:type linear", diff --git a/modules.json b/modules.json index 3de27dda..b5439bd7 100644 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", "installed_by": ["modules"] }, "openms/decoydatabase": { diff --git a/modules/local/pyopenms_chromatogramextractor.nf b/modules/local/pyopenms_chromatogramextractor.nf new file mode 100644 index 00000000..21836d9d --- /dev/null +++ b/modules/local/pyopenms_chromatogramextractor.nf @@ -0,0 +1,47 @@ +process PYOPENMS_CHROMATOGRAMEXTRACTOR { + tag "$meta.id" + label 'process_single' + + conda "bioconda::pyopenms=3.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pyopenms:3.1.0--py311h9b8898c_0' : + 'biocontainers/pyopenms:3.1.0--py311h9b8898c_0' }" + + input: + tuple val(meta), path(mzml) + + output: + tuple val(meta), path("*.csv") , emit: csv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${mzml.baseName}" + def args = task.ext.args ?: '' + + """ + chromatogram_extractor.py \\ + -in $mzml \\ + -out ${prefix}_chrom.csv \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pyOpenMS: \$(pip show pyopenms | grep Version | cut -d ' ' -f 2) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${mzml.baseName}" + + """ + touch ${prefix}_chrom.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pyOpenMS: \$(pip show pyopenms | grep Version | cut -d ' ' -f 2) + END_VERSIONS + """ +} diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index ca39fb67..f1cd99b0 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,7 +1,5 @@ -name: multiqc channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::multiqc=1.21 + - bioconda::multiqc=1.24.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 47ac352f..ceaec139 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,14 +3,16 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : - 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.24.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.24.1--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) + path(replace_names) + path(sample_names) output: path "*multiqc_report.html", emit: report @@ -23,16 +25,22 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' - def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ $args \\ $config \\ + $prefix \\ $extra_config \\ $logo \\ + $replace \\ + $samples \\ . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 45a9bc35..382c08cb 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -29,6 +29,19 @@ input: type: file description: Optional logo file for MultiQC pattern: "*.{png}" + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: type: file diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index f1c4242e..33316a7d 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -8,6 +8,8 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" + config "./nextflow.config" + test("sarscov2 single-end [fastqc]") { when { @@ -17,6 +19,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -41,6 +45,8 @@ nextflow_process { input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -66,6 +72,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index bfebd802..83fa080c 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:48:55.657331" + "timestamp": "2024-07-10T12:41:34.562023" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:49:49.071937" + "timestamp": "2024-07-10T11:27:11.933869532" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,6eb13f3b11bbcbfc98ad3166420ff760" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.2" }, - "timestamp": "2024-02-29T08:49:25.457567" + "timestamp": "2024-07-10T11:26:56.709849369" } -} \ No newline at end of file +} diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 00000000..c537a6a3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/nextflow.config b/nextflow.config index e8863f02..57457e05 100644 --- a/nextflow.config +++ b/nextflow.config @@ -292,7 +292,7 @@ manifest { description = """Identify and quantify peptides from mass spectrometry raw data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.6.0' + version = '2.6.1dev' doi = '10.1021/acs.jproteome.9b00313' } diff --git a/workflows/mhcquant.nf b/workflows/mhcquant.nf index 018e22ae..af19db1e 100644 --- a/workflows/mhcquant.nf +++ b/workflows/mhcquant.nf @@ -8,15 +8,16 @@ // MODULE: Loaded from modules/local/ // -include { OPENMS_FILEFILTER } from '../modules/local/openms_filefilter' -include { OPENMS_COMETADAPTER } from '../modules/local/openms_cometadapter' -include { OPENMS_PEPTIDEINDEXER } from '../modules/local/openms_peptideindexer' -include { MS2RESCORE } from '../modules/local/ms2rescore' -include { OPENMS_PSMFEATUREEXTRACTOR } from '../modules/local/openms_psmfeatureextractor' -include { OPENMS_PERCOLATORADAPTER } from '../modules/local/openms_percolatoradapter' -include { PYOPENMS_IONANNOTATOR } from '../modules/local/pyopenms_ionannotator' -include { OPENMS_TEXTEXPORTER } from '../modules/local/openms_textexporter' -include { OPENMS_MZTABEXPORTER } from '../modules/local/openms_mztabexporter' +include { OPENMS_FILEFILTER } from '../modules/local/openms_filefilter' +include { PYOPENMS_CHROMATOGRAMEXTRACTOR } from '../modules/local/pyopenms_chromatogramextractor' +include { OPENMS_COMETADAPTER } from '../modules/local/openms_cometadapter' +include { OPENMS_PEPTIDEINDEXER } from '../modules/local/openms_peptideindexer' +include { MS2RESCORE } from '../modules/local/ms2rescore' +include { OPENMS_PSMFEATUREEXTRACTOR } from '../modules/local/openms_psmfeatureextractor' +include { OPENMS_PERCOLATORADAPTER } from '../modules/local/openms_percolatoradapter' +include { PYOPENMS_IONANNOTATOR } from '../modules/local/pyopenms_ionannotator' +include { OPENMS_TEXTEXPORTER } from '../modules/local/openms_textexporter' +include { OPENMS_MZTABEXPORTER } from '../modules/local/openms_mztabexporter' // // SUBWORKFLOW: Loaded from subworkflows/local/ @@ -83,6 +84,11 @@ workflow MHCQUANT { ch_clean_mzml_file = PREPARE_SPECTRA.out.mzml } + // Compute MS1 TICs for QC + PYOPENMS_CHROMATOGRAMEXTRACTOR(ch_clean_mzml_file) + ch_versions = ch_versions.mix(PYOPENMS_CHROMATOGRAMEXTRACTOR.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(PYOPENMS_CHROMATOGRAMEXTRACTOR.out.csv.map{ meta, mzml -> mzml }) + // Run comet database search OPENMS_COMETADAPTER(ch_clean_mzml_file.combine(ch_decoy_db)) ch_versions = ch_versions.mix(OPENMS_COMETADAPTER.out.versions) @@ -223,7 +229,9 @@ workflow MHCQUANT { ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + [], + [] ) emit: