From 7679ecca9f3fb4d8cad05957dd88e0fcabbaf3db Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Tue, 31 Oct 2023 17:19:37 +0100 Subject: [PATCH 1/2] First implementation of SpectrumValidator and Modification --- .../validation_pipeline.py | 62 +++++++++++++++++-- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/library_spectra_validation/validation_pipeline.py b/library_spectra_validation/validation_pipeline.py index d0e6ac7..72a51c4 100644 --- a/library_spectra_validation/validation_pipeline.py +++ b/library_spectra_validation/validation_pipeline.py @@ -8,11 +8,61 @@ Add index handling - when a spectrum is processed, its id is added to the corresponding list """ -class ValidationPipeline: +import logging +from typing import Iterable, List, Optional, Union +from matchms.filtering.SpectrumProcessor import SpectrumProcessor - def __init__(self, spectrum) -> None: - pass +logger = logging.getLogger("matchms") - def run(spectrum): - #here run all the checks and modifications - return instructions #{'quality':, 'metadata adduct': {'previous':..., 'updated':....}} \ No newline at end of file + +class Modification: + def __init__(self, metadata_field, before, after, logging_message, validated_by_user): + self.metadata_field = metadata_field + self.before = before + self.after = after + self.logging_message = logging_message + self.validated_by_user = validated_by_user + + +def find_modifications(spectrum_old, spectrum_new, logging_message: str): + """Checks which modifications have been made in a filter step""" + modifications = [] + metadata_fields_to_check = ["parent_mass", "precursor_mz", "adduct", "smiles", + "compound_name", "inchi", "inchikey", "charge", "ionmode"] + for metadata_field in metadata_fields_to_check: + if spectrum_old.get(metadata_field) != spectrum_new.get(metadata_field): + modifications.append( + Modification(metadata_field=metadata_field, + before=spectrum_old.get(metadata_field), + after=spectrum_new(metadata_field), + logging_message=logging_message, + validated_by_user=False)) + return modifications + + +class SpectrumValidator(SpectrumProcessor): + def __init__(self, predefined_pipeline: Optional[str] = 'default', + additional_filters: Iterable[Union[str, List[dict]]] = ()): + super().__init__(predefined_pipeline, + additional_filters) + + def process_spectrum(self, spectrum, + processing_report=None): + raise AttributeError("process spectrum is not a valid method of SpectrumValidator") + + def process_spectrum_store_modifications(self, spectrum) -> List[Modification]: + if not self.filters: + raise TypeError("No filters to process") + modifications = [] + for filter_func in self.filters: + # todo capture logging + logging_message = "" + spectrum_out = filter_func(spectrum) + modifications += find_modifications(spectrum_old=spectrum, + spectrum_new=spectrum_out, + logging_message=logging_message) + # todo Think about what to do here + if spectrum_out is None: + break + spectrum = spectrum_out + return modifications From 21106255ca4347d395d6fc64aa0fa5009ebca6ff Mon Sep 17 00:00:00 2001 From: niekdejonge Date: Wed, 1 Nov 2023 11:49:32 +0100 Subject: [PATCH 2/2] Added spectrumvalidator --- .../validation_pipeline.py | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/library_spectra_validation/validation_pipeline.py b/library_spectra_validation/validation_pipeline.py index 72a51c4..d872775 100644 --- a/library_spectra_validation/validation_pipeline.py +++ b/library_spectra_validation/validation_pipeline.py @@ -20,6 +20,7 @@ def __init__(self, metadata_field, before, after, logging_message, validated_by_ self.metadata_field = metadata_field self.before = before self.after = after + # self.original = self.logging_message = logging_message self.validated_by_user = validated_by_user @@ -40,7 +41,7 @@ def find_modifications(spectrum_old, spectrum_new, logging_message: str): return modifications -class SpectrumValidator(SpectrumProcessor): +class SpectrumRepairer(SpectrumProcessor): def __init__(self, predefined_pipeline: Optional[str] = 'default', additional_filters: Iterable[Union[str, List[dict]]] = ()): super().__init__(predefined_pipeline, @@ -61,8 +62,31 @@ def process_spectrum_store_modifications(self, spectrum) -> List[Modification]: modifications += find_modifications(spectrum_old=spectrum, spectrum_new=spectrum_out, logging_message=logging_message) - # todo Think about what to do here if spectrum_out is None: - break + raise AttributeError("SpectrumRepairer is only expected to repair spectra, not set to None") spectrum = spectrum_out return modifications + + +class SpectrumValidator(SpectrumProcessor): + def __init__(self): + # todo add the fields each requirement checks. + fields_checked_by_filter = {filter_name: [fields_checked]} + super().__init__(predefined_pipeline=None, + additional_filters=list(fields_checked_by_filter.keys())) + + def process_spectrum(self, spectrum, + processing_report=None): + raise AttributeError("process spectrum is not a valid method of SpectrumValidator") + + def process_spectrum_store_failed_filters(self, spectrum) -> List[Modification]: + if not self.filters: + raise TypeError("No filters to process") + failed_requirements = [] + for filter_func in self.filters: + # todo capture logging + logging_message = "" + spectrum_out = filter_func(spectrum) + if spectrum_out is None: + failed_requirements += logging_message + return failed_requirements \ No newline at end of file