From 0771b238ace19fa58ef8cf942863ef29e261da04 Mon Sep 17 00:00:00 2001 From: Kevin Velghe Date: Fri, 23 Aug 2024 13:28:51 +0200 Subject: [PATCH 1/4] add support for gzipped files --- ms2pip/spectrum_input.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ms2pip/spectrum_input.py b/ms2pip/spectrum_input.py index beca7c2..3dfbbe2 100644 --- a/ms2pip/spectrum_input.py +++ b/ms2pip/spectrum_input.py @@ -29,8 +29,17 @@ def read_spectrum_file(spectrum_file: str) -> Generator[ObservedSpectrum, None, If the file extension is not supported. """ - file_extension = Path(spectrum_file).suffix.lower() - if file_extension not in [".mgf", ".mzml", ".d"] and not _is_minitdf(spectrum_file): + spectrum_path = Path(spectrum_file) + file_extension = spectrum_path.suffix.lower() + if ( + file_extension not in [".mgf", ".mzml", ".raw", ".d"] + and not ( + file_extension == ".gz" + and len(spectrum_path.suffixes) > 1 + and spectrum_path.suffixes[-2].lower() in [".mgf", ".mzml"] + ) + and not _is_minitdf(spectrum_file) + ): raise UnsupportedSpectrumFiletypeError(file_extension) for spectrum in get_ms2_spectra(str(spectrum_file)): From 99ace0fcd43a39e257201a24b781c6ee814e2577 Mon Sep 17 00:00:00 2001 From: Kevin Velghe Date: Fri, 23 Aug 2024 13:29:33 +0200 Subject: [PATCH 2/4] catch exception instead of own filetype check --- ms2pip/spectrum_input.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/ms2pip/spectrum_input.py b/ms2pip/spectrum_input.py index 3dfbbe2..f5b0120 100644 --- a/ms2pip/spectrum_input.py +++ b/ms2pip/spectrum_input.py @@ -29,20 +29,12 @@ def read_spectrum_file(spectrum_file: str) -> Generator[ObservedSpectrum, None, If the file extension is not supported. """ - spectrum_path = Path(spectrum_file) - file_extension = spectrum_path.suffix.lower() - if ( - file_extension not in [".mgf", ".mzml", ".raw", ".d"] - and not ( - file_extension == ".gz" - and len(spectrum_path.suffixes) > 1 - and spectrum_path.suffixes[-2].lower() in [".mgf", ".mzml"] - ) - and not _is_minitdf(spectrum_file) - ): - raise UnsupportedSpectrumFiletypeError(file_extension) + try: + spectra = get_ms2_spectra(str(spectrum_file)) + except ValueError: + raise UnsupportedSpectrumFiletypeError(Path(spectrum_file).suffixes) - for spectrum in get_ms2_spectra(str(spectrum_file)): + for spectrum in spectra: obs_spectrum = ObservedSpectrum( mz=np.array(spectrum.mz, dtype=np.float32), intensity=np.array(spectrum.intensity, dtype=np.float32), From 06b2e95a4ecfd7e79e54f934d7303320ffaca850 Mon Sep 17 00:00:00 2001 From: Kevin Velghe Date: Thu, 29 Aug 2024 19:06:40 +0200 Subject: [PATCH 3/4] drop unused function --- ms2pip/spectrum_input.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/ms2pip/spectrum_input.py b/ms2pip/spectrum_input.py index f5b0120..8674c72 100644 --- a/ms2pip/spectrum_input.py +++ b/ms2pip/spectrum_input.py @@ -51,15 +51,3 @@ def read_spectrum_file(spectrum_file: str) -> Generator[ObservedSpectrum, None, ): continue yield obs_spectrum - - -def _is_minitdf(spectrum_file: str) -> bool: - """ - Check if the spectrum file is a Bruker miniTDF folder. - - A Bruker miniTDF folder has no fixed name, but contains files matching the patterns - ``*ms2spectrum.bin`` and ``*ms2spectrum.parquet``. - """ - files = set(Path(spectrum_file).glob("*ms2spectrum.bin")) - files.update(Path(spectrum_file).glob("*ms2spectrum.parquet")) - return len(files) >= 2 From 14e4330bc203a1b4ec196c23ce612b9877e99ec0 Mon Sep 17 00:00:00 2001 From: Kevin Velghe Date: Thu, 29 Aug 2024 19:06:53 +0200 Subject: [PATCH 4/4] update documentation to run ms2pip correlate --- docs/source/usage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index aca0d15..ac97c38 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -65,7 +65,7 @@ For instance: .. code-block:: sh - ms2pip correlate results.sage.tsv --spectrum-file spectra.mgf + ms2pip correlate --psm-filetype sage results.sage.tsv spectra.mgf ``get-training-data``