From ccba6254b15c3eac3dd1f8d766fce6abc88686e7 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 31 Mar 2023 11:41:38 -0400 Subject: [PATCH 01/24] checkpoint --- implementations/python/mzlib/attributes.py | 11 +++++++--- implementations/python/mzlib/cluster.py | 6 ++++-- implementations/python/mzlib/spectrum.py | 25 +++++++++++++++++----- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/implementations/python/mzlib/attributes.py b/implementations/python/mzlib/attributes.py index 8349187..04010fd 100644 --- a/implementations/python/mzlib/attributes.py +++ b/implementations/python/mzlib/attributes.py @@ -7,18 +7,20 @@ class Attribute(object): - __slots__ = ("key", "value", "group_id") + __slots__ = ("key", "value", "group_id", "owner_id") key: str value: Union[str, int, float, 'Attribute', List] group_id: Optional[str] + owner_id: int = -1 - def __init__(self, key, value, group_id=None): + def __init__(self, key, value, group_id=None, owner_id=-1): self.key = key self.value = value self.group_id = group_id + self.owner_id = owner_id def copy(self): - return self.__class__(self.key, self.value, self.group_id) + return self.__class__(self.key, self.value, self.group_id, self.owner_id) def __getitem__(self, i): if i == 0: @@ -27,6 +29,8 @@ def __getitem__(self, i): return self.value elif i == 2: return self.group_id + elif i == 3: + return self.owner_id else: raise IndexError(i) @@ -35,6 +39,7 @@ def __iter__(self): yield self.value if self.group_id: yield self.group_id + yield self.owner_id def __len__(self): if self.group_id is None: diff --git a/implementations/python/mzlib/cluster.py b/implementations/python/mzlib/cluster.py index a3f1f31..2d5a3f3 100644 --- a/implementations/python/mzlib/cluster.py +++ b/implementations/python/mzlib/cluster.py @@ -7,7 +7,7 @@ from mzlib.attributes import AttributeManager, AttributeManagedProperty from .utils import ensure_iter, flatten -SIMILAR_SPECTRUM_KEYS = "" +SIMILAR_SPECTRUM_KEYS = "MS:1003263|similar spectrum keys" SIMILAR_SPECTRUM_USI = "MS:1003264|similar spectrum USI" CLUSTER_KEY = "MS:1003267|spectrum cluster key" @@ -45,6 +45,8 @@ def __init__(self, attributes: List): @property def members(self) -> List[ClusterMemberRef]: - internal_refs = [SpectrumRef(k) for k in flatten(ensure_iter(self._member_references))] + internal_refs = [ + SpectrumRef(k) for k in flatten(ensure_iter(self._member_references)) + ] usi_members = [USIRef(k) for k in ensure_iter(self._cluster_member_usis)] return internal_refs + usi_members diff --git a/implementations/python/mzlib/spectrum.py b/implementations/python/mzlib/spectrum.py index 5f86853..8f363f1 100644 --- a/implementations/python/mzlib/spectrum.py +++ b/implementations/python/mzlib/spectrum.py @@ -2,11 +2,17 @@ import textwrap -from typing import Dict, List +from typing import Any, Dict, List, Optional, TYPE_CHECKING -from mzlib.attributes import AttributeManager, AttributeManagedProperty, AttributeListManagedProperty, AttributeProxy as _AttributeProxy, AttributeFacet +from mzlib.attributes import ( + AttributeManager, AttributeManagedProperty, AttributeListManagedProperty, + AttributeProxy as _AttributeProxy, AttributeFacet +) from mzlib.analyte import Analyte, InterpretationCollection, Interpretation +if TYPE_CHECKING: + from mzlib.spectrum_library import SpectrumLibrary + #A class that holds data for each spectrum that is read from the SpectralLibrary class SPECTRUM_NAME = "MS:1003061|spectrum name" @@ -26,16 +32,25 @@ class Spectrum(AttributeManager): peak_list: List analytes: Dict[str, Analyte] interpretations: InterpretationCollection + _source: Optional['SpectrumLibrary'] #### Constructor - def __init__(self, attributes=None, peak_list=None, analytes=None, interpretations=None): + def __init__(self, attributes=None, peak_list=None, analytes=None, + interpretations=None): """ - __init__ - SpectrumLibrary constructor Parameters ---------- - attributes: list + attributes : list A list of attribute [key, value (, group)] sets to initialize to. + peak_list : list + A list of tuples representing (annotated) peaks + analytes : dict[str, :class:`~.Analyte`] + A mapping from identifier to :class:`~.Analyte` unique within this + :class:`Spectrum`. + interpretations : :class:`~.InterpretationCollection` + A mapping from identifier to :class:`~.Interpretation` unique within + this :class:`Spectrum`. """ if peak_list is None: peak_list = [] From 2fb766ffde5e86e7af6200ffd3d2043278f6c07d Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Tue, 18 Apr 2023 06:01:57 -0400 Subject: [PATCH 02/24] Checkpoint --- implementations/python/mzlib/attributes.py | 21 +++++- implementations/python/mzlib/backends/base.py | 60 ++++++++++++---- implementations/python/mzlib/backends/text.py | 71 +++++++++++++------ 3 files changed, 113 insertions(+), 39 deletions(-) diff --git a/implementations/python/mzlib/attributes.py b/implementations/python/mzlib/attributes.py index cbcbfd3..5c32302 100644 --- a/implementations/python/mzlib/attributes.py +++ b/implementations/python/mzlib/attributes.py @@ -1,6 +1,11 @@ import textwrap -from typing import Any, DefaultDict, Iterable, Iterator, Optional, Tuple, Union, List, Dict, Generic, TypeVar, Type +from typing import ( + Any, DefaultDict, Iterable, + Iterator, Optional, Tuple, + Union, List, Dict, + Generic, TypeVar, Type +) T = TypeVar('T') @@ -11,7 +16,7 @@ class Attribute(object): key: str value: Union[str, int, float, 'Attribute', List] group_id: Optional[str] - owner_id: int = -1 + owner_id: int def __init__(self, key, value, group_id=None, owner_id=-1): self.key = key @@ -176,7 +181,9 @@ def add_attribute_group(self, attributes: List[Union[Attribute, Tuple[str, Any]] key, value = attr self.add_attribute(key, value, group_id) - def get_attribute(self, key: str, group_identifier: Optional[str] = None, raw: bool = False) -> Union[Any, List[Any], Attribute, List[Attribute]]: + def get_attribute(self, key: str, group_identifier: Optional[str] = None, + raw: bool = False) -> Union[Any, List[Any], Attribute, + List[Attribute]]: """Get the value or values associated with a given attribute key. @@ -696,6 +703,14 @@ def __init__(self, name: str, attributes: Iterable = None, **kwargs): super().__init__(attributes, **kwargs) self.name = name + def member_of(self, target: Attributed) -> bool: + for attrib in self.attributes: + if attrib.group_id: + raise NotImplementedError() + if not target.has_attribute(attrib.key): + return False + return True + def apply(self, target: Attributed): terms_to_remove: List[Tuple[str, Union[Attribute, List[Attribute]]]] = [] for key in self.attributes.keys(): diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py index d49fd42..b438c37 100644 --- a/implementations/python/mzlib/backends/base.py +++ b/implementations/python/mzlib/backends/base.py @@ -7,12 +7,16 @@ from psims.controlled_vocabulary import Entity -from psims.controlled_vocabulary.controlled_vocabulary import load_uo, load_unimod, load_psims +from psims.controlled_vocabulary.controlled_vocabulary import ( + load_uo, load_unimod, load_psims) from mzlib.index import MemoryIndex, SQLIndex, IndexBase from mzlib.spectrum import LIBRARY_ENTRY_INDEX, LIBRARY_ENTRY_KEY, Spectrum -from mzlib.analyte import Analyte, Interpretation, InterpretationMember, ANALYTE_MIXTURE_TERM -from mzlib.attributes import Attributed, AttributedEntity, AttributeSet, AttributeManagedProperty +from mzlib.analyte import ( + Analyte, Interpretation, InterpretationMember, ANALYTE_MIXTURE_TERM) +from mzlib.cluster import SpectrumCluster +from mzlib.attributes import ( + Attributed, AttributedEntity, AttributeSet, AttributeManagedProperty) from .utils import open_stream, LineBuffer @@ -36,6 +40,7 @@ class AttributeSetTypes(enum.Enum): spectrum = enum.auto() analyte = enum.auto() interpretation = enum.auto() + cluster = enum.auto() class VocabularyResolverMixin(object): @@ -86,20 +91,24 @@ def type_for_format(cls, format_or_extension): return cls._file_extension_to_implementation.get(format_or_extension) -class SpectralLibraryBackendBase(AttributedEntity, VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass): +class SpectralLibraryBackendBase(AttributedEntity, VocabularyResolverMixin, + metaclass=SubclassRegisteringMetaclass): """A base class for all spectral library formats. """ file_format = None - _file_extension_to_implementation: Dict[str, Type['SpectralLibraryBackendBase']] = {} - _format_name_to_implementation: Dict[str, Type['SpectralLibraryBackendBase']] = {} + _file_extension_to_implementation: Dict[str, + Type['SpectralLibraryBackendBase']] = {} + _format_name_to_implementation: Dict[str, + Type['SpectralLibraryBackendBase']] = {} index: IndexBase entry_attribute_sets: Dict[str, AttributeSet] analyte_attribute_sets: Dict[str, AttributeSet] interpretation_attribute_sets: Dict[str, AttributeSet] + cluster_attribute_sets: Dict[str, AttributeSet] name = AttributeManagedProperty[str](LIBRARY_NAME_TERM) identifier = AttributeManagedProperty[str](LIBRARY_IDENTIFIER_TERM) @@ -145,7 +154,8 @@ def guess_from_header(cls, filename) -> bool: return False @classmethod - def guess_implementation(cls, filename, index_type=None, **kwargs) -> 'SpectralLibraryBackendBase': + def guess_implementation(cls, filename, index_type=None, + **kwargs) -> 'SpectralLibraryBackendBase': """Guess the backend implementation to use with this file format. Parameters @@ -233,8 +243,17 @@ def _new_analyte(self, id=None) -> Analyte: attr_set.apply(analyte) return analyte - def _analyte_interpretation_link(self, spectrum: Spectrum, interpretation: Interpretation): - if interpretation.has_attribute(ANALYTE_MIXTURE_TERM) and not interpretation.analytes: + def _new_cluster(self) -> SpectrumCluster: + cluster = SpectrumCluster() + attr_set = self.cluster_attribute_sets.get('all') + if attr_set: + attr_set.apply(cluster) + return cluster + + def _analyte_interpretation_link(self, spectrum: Spectrum, + interpretation: Interpretation): + if (interpretation.has_attribute(ANALYTE_MIXTURE_TERM) and + not interpretation.analytes): analyte_ids = interpretation.get_attribute(ANALYTE_MIXTURE_TERM) if isinstance(analyte_ids, str): term = self.find_term_for(ANALYTE_MIXTURE_CURIE) @@ -256,7 +275,8 @@ def _default_interpretation_to_analytes(self, spectrum: Spectrum): for analyte in spectrum.analytes.values(): interpretation.add_analyte(analyte) - def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None): + def get_spectrum(self, spectrum_number: int=None, + spectrum_name: str=None) -> Spectrum: """Retrieve a single spectrum from the library. Parameters @@ -272,6 +292,9 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None): """ raise NotImplementedError() + def get_cluster(self, cluster_number: int) -> SpectrumCluster: + raise NotImplementedError() + def find_spectra(self, specification, **query_keys): raise NotImplementedError() @@ -334,13 +357,16 @@ def has_index_preference(cls, filename: str) -> Type[IndexBase]: def read(self): raise NotImplementedError() - def _add_attribute_set(self, attribute_set: AttributeSet, attribute_set_type: AttributeSetTypes): + def _add_attribute_set(self, attribute_set: AttributeSet, + attribute_set_type: AttributeSetTypes): if attribute_set_type == AttributeSetTypes.spectrum: self.entry_attribute_sets[attribute_set.name] = attribute_set elif attribute_set_type == AttributeSetTypes.analyte: self.analyte_attribute_sets[attribute_set.name] = attribute_set elif attribute_set_type == AttributeSetTypes.interpretation: self.interpretation_attribute_sets[attribute_set.name] = attribute_set + elif attribute_set_type == AttributeSetTypes.cluster: + self.cluster_attribute_sets[attribute_set.name] = attribute_set else: raise ValueError(f"Could not map {attribute_set_type}") @@ -352,7 +378,8 @@ def summarize_parsing_errors(self) -> Dict: class _PlainTextSpectralLibraryBackendBase(SpectralLibraryBackendBase): - def __init__(self, filename, index_type=None, read_metadata=True, create_index: bool=True): + def __init__(self, filename, index_type=None, read_metadata=True, + create_index: bool=True): if index_type is None and create_index: index_type = self.has_index_preference(filename) @@ -439,12 +466,14 @@ def search(self, specification, **query_keys) -> List[Spectrum]: return spectra -class SpectralLibraryWriterBase(VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass): +class SpectralLibraryWriterBase(VocabularyResolverMixin, + metaclass=SubclassRegisteringMetaclass): def __init__(self, filename, **kwargs): self.filename = filename super().__init__(**kwargs) - def _filter_attributes(self, attributes: Attributed, filter_fn: Callable) -> Iterable: + def _filter_attributes(self, attributes: Attributed, + filter_fn: Callable) -> Iterable: if isinstance(attributes, AttributedEntity): attributes = attributes.attributes for attrib in attributes: @@ -498,6 +527,9 @@ def write_library(self, library: SpectralLibraryBackendBase): def write_spectrum(self, spectrum: Spectrum): raise NotImplementedError() + def write_cluster(self, cluster: SpectrumCluster): + raise NotImplementedError() + def __enter__(self) -> 'SpectralLibraryWriterBase': return self diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py index 091cc21..151875d 100644 --- a/implementations/python/mzlib/backends/text.py +++ b/implementations/python/mzlib/backends/text.py @@ -7,11 +7,11 @@ from typing import ClassVar, List, Tuple, Union, Iterable -from mzlib.index import MemoryIndex from mzlib.annotation import parse_annotation from mzlib.spectrum import Spectrum +from mzlib.cluster import SpectrumCluster from mzlib.attributes import AttributeManager, Attributed, AttributeSet -from mzlib.analyte import ANALYTE_MIXTURE_TERM, Analyte, Interpretation, InterpretationMember +from mzlib.analyte import Analyte, Interpretation, InterpretationMember from .base import ( SpectralLibraryBackendBase, @@ -43,6 +43,7 @@ class SpectrumParserStateEnum(enum.Enum): interpretation_member = 4 peaks = 5 done = 6 + cluster = 7 class LibraryParserStateEnum(enum.Enum): @@ -61,7 +62,8 @@ class LibraryParserStateEnum(enum.Enum): START_OF_LIBRARY_MARKER = re.compile(r"^") SPECTRUM_NAME_PRESENT = re.compile(r'MS:1003061\|spectrum name=') START_OF_INTERPRETATION_MEMBER_MARKER = re.compile(r"") -START_OF_ATTRIBUTE_SET = re.compile(r"") +START_OF_ATTRIBUTE_SET = re.compile( + r"") START_OF_CLUSTER = re.compile(r"") @@ -90,7 +92,9 @@ class TextSpectralLibrary(_PlainTextSpectralLibraryBackendBase): def guess_from_header(cls, filename: str) -> bool: with open_stream(filename, 'r', encoding='utf8') as stream: first_line = stream.readline() - if START_OF_SPECTRUM_MARKER.match(first_line) or START_OF_LIBRARY_MARKER.match(first_line): + if (START_OF_SPECTRUM_MARKER.match(first_line) or + START_OF_LIBRARY_MARKER.match(first_line) or + START_OF_CLUSTER.match(first_line)): return True return False @@ -123,9 +127,11 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]: if match: state = LibraryParserStateEnum.attribute_sets if current_attribute_set is not None: - self._add_attribute_set(current_attribute_set, current_attribute_set_type) + self._add_attribute_set( + current_attribute_set, current_attribute_set_type) - current_attribute_set_type = attribute_set_types[match.group(1).lower()] + current_attribute_set_type = attribute_set_types[ + match.group(1).lower()] attrib_set_name = match.group(2) current_attribute_set = AttributeSet(attrib_set_name, []) else: @@ -151,7 +157,8 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]: match = grouped_key_value_term_pattern.match(line) if match is not None: d = match.groupdict() - # If we're in an attribute set, store it in the attribute set + # If we're in an attribute set, store it in the attribute + # set if state == LibraryParserStateEnum.attribute_sets: current_attribute_set.add_attribute( d['term'], try_cast(d['value']), d['group_id']) @@ -178,7 +185,8 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]: line = stream.readline() if current_attribute_set is not None: - self._add_attribute_set(current_attribute_set, current_attribute_set_type) + self._add_attribute_set( + current_attribute_set, current_attribute_set_type) self.attributes.clear() self.attributes._from_iterable(attributes) return True, nbytes @@ -234,7 +242,7 @@ def create_index(self) -> int: line = line.rstrip() if state == 'header': - # if re.match(r'MS:1003061\|spectrum name=', line): + if START_OF_SPECTRUM_MARKER.match(line): state = 'body' spectrum_file_offset = line_beginning_file_offset @@ -243,7 +251,7 @@ def create_index(self) -> int: if state == 'body': if len(line) == 0: continue - # if re.match(r'MS:1003061\|spectrum name=', line): + if START_OF_SPECTRUM_MARKER.match(line): if len(spectrum_buffer) > 0: if not spectrum_name: @@ -308,7 +316,9 @@ def _prepare_attribute_dict(self, match): except KeyError: match['value'] = try_cast(value) - def _parse_attribute_into(self, line: str, store: Attributed, line_number_message=lambda:'', state: SpectrumParserStateEnum=None) -> bool: + def _parse_attribute_into(self, line: str, store: Attributed, + line_number_message=lambda:'', + state: SpectrumParserStateEnum=None) -> bool: match = key_value_term_pattern.match(line) if match is not None: d = match.groupdict() @@ -320,6 +330,8 @@ def _parse_attribute_into(self, line: str, store: Attributed, line_number_messag attr_set = self.analyte_attribute_sets[d['value']] elif SpectrumParserStateEnum.interpretation == state: attr_set = self.interpretation_attribute_sets[d['value']] + elif SpectrumParserStateEnum.cluster == state: + attr_set = self.cluster_attribute_sets[d['value']] else: raise ValueError(f"Cannot define attribute sets for {state}") attr_set.apply(store) @@ -336,7 +348,8 @@ def _parse_attribute_into(self, line: str, store: Attributed, line_number_messag store.group_counter = int(d['group_id']) return True else: - raise ValueError(f"Malformed grouped attribute {line}{line_number_message()}") + raise ValueError( + f"Malformed grouped attribute {line}{line_number_message()}") elif "=" in line: name, value = line.split("=") store.add_attribute(name, try_cast(value)) @@ -344,13 +357,14 @@ def _parse_attribute_into(self, line: str, store: Attributed, line_number_messag else: raise ValueError(f"Malformed attribute line {line}{line_number_message()}") - def _parse(self, buffer: Iterable, spectrum_index: int = None, + def _parse(self, buffer: Iterable[str], spectrum_index: int = None, start_line_number: int=None) -> Spectrum: spec: Spectrum = self._new_spectrum() spec.index = spectrum_index if spectrum_index is not None else -1 interpretation: Interpretation = None analyte: Analyte = None interpretation_member: InterpretationMember = None + cluster: SpectrumCluster = None STATES = SpectrumParserStateEnum state: SpectrumParserStateEnum = STATES.header @@ -402,7 +416,8 @@ def real_line_number_or_nothing(): analyte = self._new_analyte(match.group(1)) spec.add_analyte(analyte) continue - self._parse_attribute_into(line, spec, real_line_number_or_nothing, state) + self._parse_attribute_into( + line, spec, real_line_number_or_nothing, state) elif state == STATES.interpretation: if START_OF_ANALYTE_MARKER.match(line): @@ -438,7 +453,8 @@ def real_line_number_or_nothing(): interpretation.add_member_interpretation(interpretation_member) continue - self._parse_attribute_into(line, interpretation.attributes, real_line_number_or_nothing) + self._parse_attribute_into( + line, interpretation.attributes, real_line_number_or_nothing) self._analyte_interpretation_link(spec, interpretation) elif state == STATES.interpretation_member: @@ -526,16 +542,23 @@ def real_line_number_or_nothing(): f"Malformed peak line {line} with {n_tokens} entries{real_line_number_or_nothing()}") else: raise ValueError(f"Malformed peak line {line}{real_line_number_or_nothing()}") + elif state == STATES.cluster: + self._parse_attribute_into( + line, cluster, real_line_number_or_nothing, state) else: - raise ValueError(f"Unknown state {state}{real_line_number_or_nothing()}") + raise ValueError( + f"Unknown state {state}{real_line_number_or_nothing()}") + if cluster: + return cluster spec.peak_list = peak_list # Backfill analytes into interpretations that never explicitly listed them. self._default_interpretation_to_analytes(spec) return spec - def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Spectrum: - # keep the two branches separate for the possibility that this is not possible with all - # index schemes. + def get_spectrum(self, spectrum_number: int=None, + spectrum_name: str=None) -> Spectrum: + # keep the two branches separate for the possibility that this is not + # possible with all index schemes. if spectrum_number is not None: if spectrum_name is not None: raise ValueError( @@ -596,13 +619,16 @@ def write_header(self, library: SpectralLibraryBackendBase): for attr_set in library.interpretation_attribute_sets.values(): self.write_attribute_set(attr_set, AttributeSetTypes.interpretation) - def write_attribute_set(self, attribute_set: AttributeSet, attribute_set_type: AttributeSetTypes): + def write_attribute_set(self, attribute_set: AttributeSet, + attribute_set_type: AttributeSetTypes): if attribute_set_type == AttributeSetTypes.spectrum: set_type = "Spectrum" elif attribute_set_type == AttributeSetTypes.analyte: set_type = "Analyte" elif attribute_set_type == AttributeSetTypes.interpretation: set_type = "Interpretation" + elif attribute_set_type == AttributeSetTypes.cluster: + set_type = "Cluster" header = f"\n" self.handle.write(header) @@ -620,12 +646,13 @@ def write_spectrum(self, spectrum: Spectrum): for analyte in spectrum.analytes.values(): self.handle.write(f"\n") self._write_attributes(analyte.attributes) - n_interps = len(spectrum.interpretations) + _n_interps = len(spectrum.interpretations) for interpretation in spectrum.interpretations.values(): interpretation: Interpretation if len(spectrum.analytes) == 1: - attribs_of = list(self._filter_attributes(interpretation, self._not_analyte_mixture_term)) + attribs_of = list(self._filter_attributes( + interpretation, self._not_analyte_mixture_term)) else: attribs_of = interpretation.attributes From a256e28da3af276f15f2ff351791de2b7a824a26 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Sun, 7 May 2023 19:53:44 -0400 Subject: [PATCH 03/24] Missing import --- implementations/python/mzlib/backends/base.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py index 6fc8d38..2b56797 100644 --- a/implementations/python/mzlib/backends/base.py +++ b/implementations/python/mzlib/backends/base.py @@ -9,6 +9,7 @@ from psims.controlled_vocabulary import Entity from psims.controlled_vocabulary.controlled_vocabulary import ( load_uo, load_unimod, load_psims) +from mzlib.cluster import SpectrumCluster from mzlib.index import MemoryIndex, SQLIndex, IndexBase from mzlib.spectrum import LIBRARY_ENTRY_INDEX, LIBRARY_ENTRY_KEY, Spectrum @@ -63,9 +64,8 @@ def type_for_format(cls, format_or_extension): class SpectralLibraryBackendBase(AttributedEntity, _VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass): - """A base class for all spectral library formats. + """A base class for all spectral library formats.""" - """ file_format = None _file_extension_to_implementation: Dict[str, @@ -447,6 +447,7 @@ def search(self, specification, **query_keys) -> List[Spectrum]: class SpectralLibraryWriterBase(_VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass): + def __init__(self, filename, **kwargs): self.filename = filename super().__init__(**kwargs) From dd6dc2ae3d68224c79cc77612ba5bfede8f029a7 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Thu, 11 May 2023 22:37:06 -0400 Subject: [PATCH 04/24] Add cluster support, fix crosswired key vs index at the heart of the library --- ...chinese_hamster_hcd_selected_head.mzlb.txt | 14 +- implementations/python/mzlib/attributes.py | 36 ++-- implementations/python/mzlib/backends/base.py | 84 +++++--- .../python/mzlib/backends/bibliospec.py | 4 +- implementations/python/mzlib/backends/json.py | 71 +++++-- implementations/python/mzlib/backends/msp.py | 6 +- implementations/python/mzlib/backends/text.py | 166 ++++++++++++---- implementations/python/mzlib/cluster.py | 9 +- implementations/python/mzlib/index/base.py | 78 +++++++- implementations/python/mzlib/index/memory.py | 185 +++++++++++++++--- implementations/python/mzlib/index/sql.py | 110 ++++++++++- implementations/python/mzlib/spectrum.py | 20 +- .../python/mzlib/spectrum_library.py | 58 ++++-- implementations/python/pyproject.toml | 8 + .../test_data/bad_peak_annotations.mzlb.txt | 4 +- ...hinese_hamster_hcd_selected_head.mzlb.json | 14 +- ...chinese_hamster_hcd_selected_head.mzlb.txt | 14 +- .../tests/test_data/clusters_example.mzlb | 142 ++++++++++++++ .../complex_interpretations.mzlb.txt | 2 +- ...lex_interpretations_with_members.mzlb.json | 2 +- ...plex_interpretations_with_members.mzlb.txt | 2 +- implementations/python/tests/test_index.py | 2 +- .../python/tests/test_library_backend.py | 2 +- implementations/python/tests/test_spectrum.py | 8 +- 24 files changed, 841 insertions(+), 200 deletions(-) create mode 100644 implementations/python/pyproject.toml create mode 100644 implementations/python/tests/test_data/clusters_example.mzlb diff --git a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt index fc6a51e..79cd964 100644 --- a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt +++ b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt @@ -4,7 +4,7 @@ MS:1003188|library name=examples/chinese_hamster_hcd_selected_head.msp -MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV +MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=855.4538 @@ -135,7 +135,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4 1496.7792 11918.3 y15/-6.5ppm -MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV +MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=855.4538 @@ -383,7 +383,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5 1628.3004 719.6 ? -MS:1003061|spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV +MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=1207.1672 @@ -549,7 +549,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 1980.9857 3567.9 ? -MS:1003061|spectrum name=AAAAGSTSVKPIFSR/2_0_44eV +MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=731.9043 @@ -704,7 +704,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1 1465.9423 113.63 ? -MS:1003061|spectrum name=AAAAGSTSVKPIFSR/3_0_28eV +MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=3 MS:1000744|selected ion m/z=488.2719 @@ -909,7 +909,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 1469.9915 925.5 ? -MS:1003061|spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV +MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=830.8834 @@ -1021,7 +1021,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6 1670.2889 140.136 ? -MS:1003061|spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV +MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=830.8834 diff --git a/implementations/python/mzlib/attributes.py b/implementations/python/mzlib/attributes.py index 5c32302..fc30180 100644 --- a/implementations/python/mzlib/attributes.py +++ b/implementations/python/mzlib/attributes.py @@ -418,8 +418,7 @@ def _attributes_from_iterable(self, attributes): return self._from_iterable(attributes) def copy(self): - """Make a deep copy of the object - """ + """Make a deep copy of the object""" return self.__class__(self.attributes) def __repr__(self): @@ -457,7 +456,8 @@ class _ReadAttributes(object): attributes: AttributeManager def get_attribute(self, key, group_identifier=None, raw: bool = False): - """Get the value or values associated with a given + """ + Get the value or values associated with a given attribute key from the entity's attribute store. Parameters @@ -466,6 +466,9 @@ def get_attribute(self, key, group_identifier=None, raw: bool = False): The name of the attribute to retrieve group_identifier : str, optional The specific group identifier to return from. + raw : bool, optional + To return the stored value, or an :class:`Attribute` object preserving + additional information Returns ------- @@ -478,7 +481,8 @@ def get_attribute_group(self, group_identifier: str) -> List[Any]: return self.attributes.get_attribute_group(group_identifier) def has_attribute(self, key) -> bool: - """Test for the presence of a given attribute in the library + """ + Test for the presence of a given attribute in the library level store. Parameters @@ -493,7 +497,8 @@ def has_attribute(self, key) -> bool: return self.attributes.has_attribute(key) def get_by_name(self, name: str): - '''Search for an attribute by human-readable name. + """ + Search for an attribute by human-readable name. Parameters ---------- @@ -504,7 +509,7 @@ def get_by_name(self, name: str): ------- object: The attribute value if found or :const:`None`. - ''' + """ return self.attributes.get_by_name(name) def _iter_attribute_groups(self): @@ -523,7 +528,8 @@ class _WriteAttributes(object): attributes: AttributeManager def add_attribute(self, key, value, group_identifier=None) -> Union[Any, List[Any]]: - """Add an attribute to the entity's attributes store. + """ + Add an attribute to the entity's attributes store. Parameters ---------- @@ -541,7 +547,8 @@ def replace_attribute(self, key, value, group_identifier=None): return self.attributes.replace_attribute(key, value, group_identifier=group_identifier) def remove_attribute(self, key, group_identifier=None): - """Remove the value or values associated with a given + """ + Remove the value or values associated with a given attribute key from the entity's attribute store. This rebuilds the entire store, which may be expensive. @@ -564,13 +571,15 @@ def _clear_attributes(self): class AttributedEntity(_ReadAttributes, _WriteAttributes): - '''A base type for entities which contain an :class:`AttributeManager` + """ + A base type for entities which contain an :class:`AttributeManager` without being completely subsumed by it. An :class:`AttributeManager` represents a collection of attributes first and foremost, supplying :class:`~.collections.abc.MutableMapping`-like interface to them, in addition to methods. - ''' + """ + __slots__ = ("attributes", ) attributes: AttributeManager @@ -711,10 +720,11 @@ def member_of(self, target: Attributed) -> bool: return False return True - def apply(self, target: Attributed): + def apply(self, target: Attributed, ): terms_to_remove: List[Tuple[str, Union[Attribute, List[Attribute]]]] = [] for key in self.attributes.keys(): - terms_to_remove.append((key, target.get_attribute(key, raw=True))) + if target.has_attribute(key): + terms_to_remove.append((key, target.get_attribute(key, raw=True))) group_ids = DefaultDict(int) for key, terms in terms_to_remove: @@ -734,7 +744,7 @@ def apply(self, target: Attributed): for group_id, attrs in self._iter_attribute_groups(): if group_id is None: for a in attrs: - target.add_attribute(a) + target.add_attribute(a.key, a.value, group_identifier=None) else: target.add_attribute_group(attrs) diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py index 0373734..b718414 100644 --- a/implementations/python/mzlib/backends/base.py +++ b/implementations/python/mzlib/backends/base.py @@ -14,7 +14,7 @@ from mzlib.cluster import SpectrumCluster from mzlib.index import MemoryIndex, SQLIndex, IndexBase -from mzlib.spectrum import LIBRARY_ENTRY_INDEX, LIBRARY_ENTRY_KEY, Spectrum +from mzlib.spectrum import LIBRARY_SPECTRUM_INDEX, LIBRARY_SPECTRUM_KEY, Spectrum from mzlib.analyte import Analyte, Interpretation, InterpretationMember, ANALYTE_MIXTURE_TERM from mzlib.attributes import Attributed, AttributedEntity, AttributeSet, AttributeManagedProperty from mzlib.ontology import _VocabularyResolverMixin @@ -69,7 +69,26 @@ def type_for_format(cls, format_or_extension): return cls._file_extension_to_implementation.get(format_or_extension) -class SpectralLibraryBackendBase(AttributedEntity, _VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass): +class _LibraryViewMixin: + + name = AttributeManagedProperty[str](LIBRARY_NAME_TERM) + identifier = AttributeManagedProperty[str](LIBRARY_IDENTIFIER_TERM) + description = AttributeManagedProperty[str](LIBRARY_DESCRIPTION_TERM) + uri = AttributeManagedProperty[str](LIBRARY_URI_TERM) + library_version = AttributeManagedProperty[str](LIBRARY_VERSION_TERM) + + @property + def format_version(self): + try: + value = self.get_attribute(FORMAT_VERSION_TERM) + return value + except KeyError: + value = DEFAULT_VERSION + self.add_attribute(FORMAT_VERSION_TERM, value) + return value + + +class SpectralLibraryBackendBase(AttributedEntity, _VocabularyResolverMixin, _LibraryViewMixin, metaclass=SubclassRegisteringMetaclass): """A base class for all spectral library formats.""" file_format = None @@ -86,11 +105,6 @@ class SpectralLibraryBackendBase(AttributedEntity, _VocabularyResolverMixin, met interpretation_attribute_sets: Dict[str, AttributeSet] cluster_attribute_sets: Dict[str, AttributeSet] - name = AttributeManagedProperty[str](LIBRARY_NAME_TERM) - identifier = AttributeManagedProperty[str](LIBRARY_IDENTIFIER_TERM) - description = AttributeManagedProperty[str](LIBRARY_DESCRIPTION_TERM) - uri = AttributeManagedProperty[str](LIBRARY_URI_TERM) - @classmethod def guess_from_filename(cls, filename: Union[str, Path, io.FileIO]) -> bool: """ @@ -178,19 +192,12 @@ def __init__(self, filename): self.interpretation_attribute_sets = { "all": AttributeSet("all", []) } + self.cluster_attribute_sets = { + "all": AttributeSet("all", []) + } super().__init__(None) - @property - def format_version(self): - try: - value = self.get_attribute(FORMAT_VERSION_TERM) - return value - except KeyError: - value = DEFAULT_VERSION - self.add_attribute(FORMAT_VERSION_TERM, value) - return value - def read_header(self) -> bool: """ Read just the header of the whole library @@ -265,7 +272,7 @@ def get_spectrum(self, spectrum_number: int=None, Parameters ---------- spectrum_number : int, optional - The index of the specturm in the library + The index of the spectrum in the library spectrum_name : str, optional The name of the spectrum in the library @@ -276,6 +283,18 @@ def get_spectrum(self, spectrum_number: int=None, raise NotImplementedError() def get_cluster(self, cluster_number: int) -> SpectrumCluster: + """ + Retrieve a single spectrum cluster from the library. + + Parameters + ---------- + cluster_number : int, optional + The index of the cluster in the library + + Returns + ------- + :class:`~.SpectrumCluster` + """ raise NotImplementedError() def find_spectra(self, specification, **query_keys): @@ -358,6 +377,7 @@ def _add_attribute_set(self, attribute_set: AttributeSet, def summarize_parsing_errors(self) -> Dict: return {} + guess_implementation = SpectralLibraryBackendBase.guess_implementation @@ -578,14 +598,14 @@ def _not_analyte_mixture_term(self, attrib): def _not_entry_index(self, attrib): if attrib: key = attrib[0] - if key == LIBRARY_ENTRY_INDEX: + if key == LIBRARY_SPECTRUM_INDEX: return False return True def _not_entry_key_or_index(self, attrib): if attrib: key = attrib[0] - if key in (LIBRARY_ENTRY_INDEX, LIBRARY_ENTRY_KEY): + if key in (LIBRARY_SPECTRUM_INDEX, LIBRARY_SPECTRUM_KEY): return False return True @@ -601,14 +621,24 @@ def write_library(self, library: SpectralLibraryBackendBase): step = max(min(n // 100, 5000), 1) ident = '' i = 0 - for i, spectrum in enumerate(library): + for i, entry in enumerate(library): if i % step == 0 and i: + if isinstance(entry, SpectrumCluster): + tag = "cluster " + else: + tag = "" try: - ident = f"{spectrum.key}:{spectrum.name}" + ident = f"{tag}{entry.key}:{entry.name}" except Exception: - ident = str(spectrum.key) + ident = f"{tag}{entry.key}" logger.info(f"Wrote {ident} {i}/{n} ({i / n * 100.0:0.2f}%)") - self.write_spectrum(spectrum) + if isinstance(entry, Spectrum): + self.write_spectrum(entry) + elif isinstance(entry, SpectrumCluster): + self.write_cluster(entry) + else: + raise TypeError(f"Don't know how to save {entry.__class__}") + i = n logger.info(f"Wrote {n} spectra") @@ -628,17 +658,13 @@ def close(self): pass -class LibrarySpectrumIterator(AttributedEntity, Iterator[Spectrum]): +class LibraryIterator(AttributedEntity, _LibraryViewMixin, Iterator[Spectrum]): def __init__(self, backend: SpectralLibraryBackendBase) -> None: self.backend = backend self.attributes = backend self.iter = backend.read() self._buffer = next(self.iter) - @property - def format_version(self): - return self.backend.format_version - def __iter__(self): return self diff --git a/implementations/python/mzlib/backends/bibliospec.py b/implementations/python/mzlib/backends/bibliospec.py index 2741c59..e2c5580 100644 --- a/implementations/python/mzlib/backends/bibliospec.py +++ b/implementations/python/mzlib/backends/bibliospec.py @@ -81,8 +81,8 @@ def __len__(self): class BibliospecSpectralLibrary(BibliospecBase, SpectralLibraryBackendBase): - '''Read Bibliospec 2 SQLite3 spectral library files. - ''' + """Read Bibliospec 2 SQLite3 spectral library files.""" + connection: sqlite3.Connection file_format = "blib" diff --git a/implementations/python/mzlib/backends/json.py b/implementations/python/mzlib/backends/json.py index e039fc4..336dc86 100644 --- a/implementations/python/mzlib/backends/json.py +++ b/implementations/python/mzlib/backends/json.py @@ -4,10 +4,11 @@ import logging import warnings -from typing import Iterable, List, Dict, Mapping, Union +from typing import Any, Iterable, List, Dict, Mapping, Union from pathlib import Path from xml.dom.minidom import Attr +from mzlib.cluster import SpectrumCluster from mzlib.index import MemoryIndex from mzlib.attributes import AttributeManager, Attributed @@ -25,18 +26,22 @@ LIBRARY_METADATA_KEY = "attributes" ELEMENT_ATTRIBUTES_KEY = "attributes" SPECTRA_KEY = "spectra" +CLUSTERS_KEY = "clusters" FORMAT_VERSION_KEY = "format_version" ANALYTES_KEY = 'analytes' INTERPRETATIONS_KEY = 'interpretations' INTERPRETATION_MEMBERS_KEY = 'members' -PEAK_ANNOTATIONS_KEY = 'peak_annotations' ID_KEY = 'id' + MZ_KEY = "mzs" INTENSITY_KEY = "intensities" AGGREGATIONS_KEY = "aggregations" +PEAK_ANNOTATIONS_KEY = 'peak_annotations' + SPECTRUM_CLASSES = "spectrum_attribute_sets" ANALYTE_CLASSES = "analyte_attribute_sets" INTERPRETATION_CLASSES = "interpretation_attribute_sets" +CLUSTER_CLASSES = "cluster_attribute_sets" FORMAT_VERSION_ACC = FORMAT_VERSION_TERM.split("|")[0] @@ -81,15 +86,25 @@ def read_header(self) -> bool: def create_index(self): for i, record in enumerate(self.buffer[SPECTRA_KEY]): + name = None + key = None for attrib in record['attributes']: if attrib["accession"] == "MS:1003061": - self.index.add(i, i, attrib['value'], None, None) - break + name = attrib['value'] + if name and key: + break + if attrib["accession"] == "MS:1003237": + key = attrib['value'] + if name and key: + break else: - raise ValueError(f"Unidentified spectrum at index {i}") + if not name and not key: + raise ValueError(f"Unidentified spectrum at index {i}") + self.index.add(key, i, name, None, None) def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Spectrum: - """Retrieve a single spectrum from the library. + """ + Retrieve a single spectrum from the library. Parameters ---------- @@ -102,7 +117,6 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp ------- :class:`~.Spectrum` """ - if spectrum_number is not None: if spectrum_name is not None: raise ValueError( @@ -111,7 +125,7 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp elif spectrum_name is not None: offset = self.index.offset_for(spectrum_name) data = self.buffer[SPECTRA_KEY][offset] - spectrum = self.make_spectrum_from_payload(data) + spectrum = self._make_spectrum_from_payload(data) return spectrum def _fill_attributes(self, attributes: List, store: Attributed, context_type: AttributeSetTypes=None) -> Attributed: @@ -140,7 +154,7 @@ def _fill_attributes(self, attributes: List, store: Attributed, context_type: At store.group_counter = int(group) return store - def make_analyte_from_payload(self, analyte_id, analyte_d: Dict) -> Analyte: + def _make_analyte_from_payload(self, analyte_id, analyte_d: Dict) -> Analyte: if analyte_id != analyte_d.get('id'): warnings.warn( f"An analyte with explicit id {analyte_d['id']!r} does not match its key {analyte_id!r}") @@ -148,7 +162,7 @@ def make_analyte_from_payload(self, analyte_id, analyte_d: Dict) -> Analyte: self._fill_attributes(analyte_d[ELEMENT_ATTRIBUTES_KEY], analyte, AttributeSetTypes.analyte) return analyte - def make_interpretation_from_payload(self, interpretation_id, interpretation_d: Dict) -> Interpretation: + def _make_interpretation_from_payload(self, interpretation_id, interpretation_d: Dict) -> Interpretation: if interpretation_id != interpretation_d.get('id'): warnings.warn( f"An analyte with explicit id {interpretation_d['id']!r} does not match its key {interpretation_id!r}") @@ -166,7 +180,13 @@ def make_interpretation_from_payload(self, interpretation_id, interpretation_d: interpretation.add_member_interpretation(member_d) return interpretation - def make_spectrum_from_payload(self, data: Dict) -> Spectrum: + def _make_cluster_from_payload(self, data: Dict[str, Any]) -> SpectrumCluster: + cluster = self._new_cluster() + self._fill_attributes( + data[ELEMENT_ATTRIBUTES_KEY], cluster, AttributeSetTypes.cluster) + return cluster + + def _make_spectrum_from_payload(self, data: Dict) -> Spectrum: spectrum = self._new_spectrum() self._fill_attributes( data[ELEMENT_ATTRIBUTES_KEY], @@ -175,12 +195,12 @@ def make_spectrum_from_payload(self, data: Dict) -> Spectrum: ) if ANALYTES_KEY in data: for analyte_id, analyte in data[ANALYTES_KEY].items(): - analyte_d = self.make_analyte_from_payload(analyte_id, analyte) + analyte_d = self._make_analyte_from_payload(analyte_id, analyte) spectrum.add_analyte(analyte_d) if INTERPRETATIONS_KEY in data: for interpretation_id, interpretation_d in data[INTERPRETATIONS_KEY].items(): - interpretation = self.make_interpretation_from_payload( + interpretation = self._make_interpretation_from_payload( interpretation_id, interpretation_d ) @@ -215,10 +235,16 @@ def make_spectrum_from_payload(self, data: Dict) -> Spectrum: return spectrum def read(self): + n = len(self.buffer.get(CLUSTERS_KEY, [])) + for offset in range(n): + data = self.buffer[CLUSTERS_KEY][offset] + cluster = self._make_cluster_from_payload(data) + yield cluster + n = len(self.buffer[SPECTRA_KEY]) for offset in range(n): data = self.buffer[SPECTRA_KEY][offset] - spectrum = self.make_spectrum_from_payload(data) + spectrum = self._make_spectrum_from_payload(data) yield spectrum @@ -241,6 +267,7 @@ def __init__(self, filename, version=None, pretty_print=True, format_annotations FORMAT_VERSION_KEY: self.version, LIBRARY_METADATA_KEY: [], SPECTRA_KEY: [], + CLUSTERS_KEY: [], SPECTRUM_CLASSES: {}, ANALYTE_CLASSES: {}, INTERPRETATION_CLASSES: {}, @@ -271,7 +298,6 @@ def write_header(self, library: SpectralLibraryBackendBase): c.name: self._format_attributes(c.attributes) for c in library.interpretation_attribute_sets.values() } - def _format_attributes(self, attributes_manager: Iterable) -> List: attributes = [] for attribute in attributes_manager: @@ -311,6 +337,15 @@ def _format_attributes(self, attributes_manager: Iterable) -> List: attributes.append(reformed_attribute) return attributes + def write_cluster(self, cluster: SpectrumCluster): + attributes = self._format_attributes( + cluster.attributes + ) + payload = { + ELEMENT_ATTRIBUTES_KEY: attributes + } + self.buffer[CLUSTERS_KEY].append(payload) + def write_spectrum(self, spectrum: Spectrum): mzs = [] intensities = [] @@ -361,7 +396,7 @@ def write_spectrum(self, spectrum: Spectrum): ELEMENT_ATTRIBUTES_KEY: self._format_attributes(member) } - spectrum = { + payload = { ELEMENT_ATTRIBUTES_KEY: attributes, MZ_KEY: mzs, INTENSITY_KEY: intensities, @@ -371,9 +406,9 @@ def write_spectrum(self, spectrum: Spectrum): INTERPRETATIONS_KEY: interpretations } if not any(aggregations): - spectrum.pop(AGGREGATIONS_KEY) + payload.pop(AGGREGATIONS_KEY) - self.buffer[SPECTRA_KEY].append(spectrum) + self.buffer[SPECTRA_KEY].append(payload) def flush(self): # If we know we're writing a complete library, skip the probably-doing-too-many-things diff --git a/implementations/python/mzlib/backends/msp.py b/implementations/python/mzlib/backends/msp.py index 30223e7..2900972 100644 --- a/implementations/python/mzlib/backends/msp.py +++ b/implementations/python/mzlib/backends/msp.py @@ -883,7 +883,6 @@ def create_index(self) -> int: n_spectra: int The number of entries read """ - #### Check that the spectrum library filename isvalid filename = self.filename @@ -1395,13 +1394,14 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp if spectrum_number is not None: if spectrum_name is not None: raise ValueError("Provide only one of spectrum_number or spectrum_name") - offset = self.index.offset_for(spectrum_number) + index_record = self.index.record_for(spectrum_number) + offset = index_record.offset elif spectrum_name is not None: index_record = self.index.record_for(spectrum_name) spectrum_number = index_record.number offset = index_record.offset buffer = self._get_lines_for(offset) - spectrum = self._parse(buffer, spectrum_number) + spectrum = self._parse(buffer, index_record.index) return spectrum def summarize_parsing_errors(self) -> Dict: diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py index 151875d..f093c60 100644 --- a/implementations/python/mzlib/backends/text.py +++ b/implementations/python/mzlib/backends/text.py @@ -1,3 +1,4 @@ +from collections import deque import re import os import io @@ -60,7 +61,7 @@ class LibraryParserStateEnum(enum.Enum): START_OF_ANALYTE_MARKER = re.compile(r"^") START_OF_PEAKS_MARKER = re.compile(r"^") START_OF_LIBRARY_MARKER = re.compile(r"^") -SPECTRUM_NAME_PRESENT = re.compile(r'MS:1003061\|spectrum name=') +SPECTRUM_NAME_PRESENT = re.compile(r'MS:1003061\|(?:library )?spectrum name=') START_OF_INTERPRETATION_MEMBER_MARKER = re.compile(r"") START_OF_ATTRIBUTE_SET = re.compile( r"") @@ -175,7 +176,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]: raise ValueError( f"Malformed grouped attribute {line}") elif "=" in line: - name, value = line.split("=") + name, value = line.split("=", 1) if state == LibraryParserStateEnum.attribute_sets: current_attribute_set.add_attribute(name, value) else: @@ -207,7 +208,6 @@ def create_index(self) -> int: n_spectra: int The number of entries read """ - #### Check that the spectrum library filename isvalid filename = self.filename @@ -216,13 +216,19 @@ def create_index(self) -> int: with open_stream(filename, 'rt', encoding='utf8') as infile: state = 'header' - spectrum_buffer = [] + entry_buffer = deque() + n_spectra = 0 + n_clusters = 0 + start_index = 0 file_offset = 0 + line_beginning_file_offset = 0 spectrum_file_offset = 0 spectrum_name = '' + current_key = None + entry_is_cluster = False # Required for counting file_offset manually (LF vs CRLF) infile.readline() @@ -243,49 +249,80 @@ def create_index(self) -> int: line = line.rstrip() if state == 'header': - if START_OF_SPECTRUM_MARKER.match(line): + if is_spec := START_OF_SPECTRUM_MARKER.match(line): + current_key = int(is_spec.group(1)) state = 'body' spectrum_file_offset = line_beginning_file_offset + entry_is_cluster = False + elif is_clus := START_OF_CLUSTER.match(line): + current_key = int(is_clus.group(1)) + state = 'body' + spectrum_file_offset = line_beginning_file_offset + entry_is_cluster = True else: continue + if state == 'body': if len(line) == 0: continue - if START_OF_SPECTRUM_MARKER.match(line): - if len(spectrum_buffer) > 0: - if not spectrum_name: - raise ValueError("No spectrum name") - self.index.add( - number=n_spectra + start_index, - offset=spectrum_file_offset, - name=spectrum_name, - analyte=None) - n_spectra += 1 - spectrum_buffer = [] - #### Commit every now and then - if n_spectra % 10000 == 0: - self.index.commit() - logger.info(f"Processed {file_offset} bytes, {n_spectra} spectra read") - + is_spec = START_OF_SPECTRUM_MARKER.match(line) + is_clus = START_OF_CLUSTER.match(line) + if (is_spec) or (is_clus): + if len(entry_buffer) > 0: + if not entry_is_cluster: + if not spectrum_name: + raise ValueError("No spectrum name") + self.index.add( + number=current_key, + offset=spectrum_file_offset, + name=spectrum_name, + analyte=None) + n_spectra += 1 + current_key = int(is_spec.group(1)) if is_spec else int(is_clus.group(1)) + #### Commit every now and then + if n_spectra % 10000 == 0: + self.index.commit() + logger.info( + f"Processed {file_offset} bytes, {n_spectra} spectra read, {n_clusters} read") + else: + self.index.add_cluster(number=n_clusters, offset=spectrum_file_offset) + if n_clusters % 10000 == 0: + self.index.commit() + logger.info( + f"Processed {file_offset} bytes, {n_spectra} spectra read, {n_clusters} read") + n_clusters += 1 + current_key = int(is_spec.group(1)) if is_spec else int(is_clus.group(1)) + + entry_buffer.clear() + entry_is_cluster = bool(is_clus) spectrum_file_offset = line_beginning_file_offset spectrum_name = '' - if re.match(r'MS:1003061\|spectrum name', line): - spectrum_name = re.match(r'MS:1003061\|spectrum name=(.+)', line).group(1) - - spectrum_buffer.append(line) - - - if not spectrum_name: - raise ValueError("No spectrum name") - self.index.add( - number=n_spectra + start_index, - offset=spectrum_file_offset, - name=spectrum_name, - analyte=None) - self.index.commit() - n_spectra += 1 - logger.debug(f"Processed {file_offset} bytes, {n_spectra} spectra read") + if re.match(r'MS:1003061\|(?:library )?spectrum name', line): + spectrum_name = re.match(r'MS:1003061\|(?:library )?spectrum name=(.+)', line).group(1) + + entry_buffer.append(line) + + + if spectrum_name: + self.index.add( + number=current_key, + offset=spectrum_file_offset, + name=spectrum_name, + analyte=None) + self.index.commit() + n_spectra += 1 + logger.info( + f"Processed {file_offset} bytes, {n_spectra} spectra read, {n_clusters} read") + elif entry_is_cluster: + self.index.add_cluster( + number=current_key, + offset=spectrum_file_offset, + ) + self.index.commit() + n_clusters += 1 + logger.info( + f"Processed {file_offset} bytes, {n_spectra} spectra read, {n_clusters} read") #### Flush the index self.index.commit() @@ -301,7 +338,7 @@ def _buffer_from_stream(self, infile: io.IOBase) -> List: if state == 'body': if len(line) == 0: continue - if START_OF_SPECTRUM_MARKER.match(line): + if START_OF_SPECTRUM_MARKER.match(line) or START_OF_CLUSTER.match(line): if len(spectrum_buffer) > 0: return spectrum_buffer spectrum_buffer.append(line) @@ -351,14 +388,14 @@ def _parse_attribute_into(self, line: str, store: Attributed, raise ValueError( f"Malformed grouped attribute {line}{line_number_message()}") elif "=" in line: - name, value = line.split("=") + name, value = line.split("=", 1) store.add_attribute(name, try_cast(value)) return True else: raise ValueError(f"Malformed attribute line {line}{line_number_message()}") def _parse(self, buffer: Iterable[str], spectrum_index: int = None, - start_line_number: int=None) -> Spectrum: + start_line_number: int=None) -> Union[Spectrum, SpectrumCluster]: spec: Spectrum = self._new_spectrum() spec.index = spectrum_index if spectrum_index is not None else -1 interpretation: Interpretation = None @@ -416,6 +453,14 @@ def real_line_number_or_nothing(): analyte = self._new_analyte(match.group(1)) spec.add_analyte(analyte) continue + + elif START_OF_CLUSTER.match(line): + state = STATES.cluster + cluster = self._new_cluster() + match = START_OF_CLUSTER.match(line) + cluster.key = int(match.group(1)) or cluster.index - 1 + continue + self._parse_attribute_into( line, spec, real_line_number_or_nothing, state) @@ -542,7 +587,28 @@ def real_line_number_or_nothing(): f"Malformed peak line {line} with {n_tokens} entries{real_line_number_or_nothing()}") else: raise ValueError(f"Malformed peak line {line}{real_line_number_or_nothing()}") + elif state == STATES.cluster: + if START_OF_SPECTRUM_MARKER.match(line): + raise ValueError( + f"Clusters should not include spectrum sections {real_line_number_or_nothing()}") + + elif START_OF_PEAKS_MARKER.match(line): + raise ValueError( + f"Clusters should not include peaks {real_line_number_or_nothing()}") + + elif START_OF_INTERPRETATION_MARKER.match(line): + raise ValueError( + f"Clusters should not include interpretation sections {real_line_number_or_nothing()}") + + elif START_OF_ANALYTE_MARKER.match(line): + raise ValueError( + f"Clusters should not include analyte sections {real_line_number_or_nothing()}") + + elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line): + raise ValueError( + f"Clusters should not include interpretation member sections {real_line_number_or_nothing()}") + self._parse_attribute_into( line, cluster, real_line_number_or_nothing, state) else: @@ -563,16 +629,23 @@ def get_spectrum(self, spectrum_number: int=None, if spectrum_name is not None: raise ValueError( "Provide only one of spectrum_number or spectrum_name") - offset = self.index.offset_for(spectrum_number) + index_record = self.index.record_for(spectrum_number) + offset = index_record.offset elif spectrum_name is not None: index_record = self.index.record_for(spectrum_name) offset = index_record.offset spectrum_number = index_record.number buffer = self._get_lines_for(offset) - spectrum = self._parse(buffer, spectrum_number) + spectrum = self._parse(buffer, index_record.index) return spectrum + def get_cluster(self, cluster_number: int) -> SpectrumCluster: + offset = self.index.offset_for_cluster(cluster_number) + buffer = self._get_lines_for(offset) + cluster = self._parse(buffer, cluster_number) + return cluster + class TextSpectralLibraryWriter(SpectralLibraryWriterBase): file_format = "mzlb.txt" @@ -675,6 +748,15 @@ def write_spectrum(self, spectrum: Spectrum): self.handle.write("\t".join(peak_parts) + "\n") self.handle.write("\n") + def write_cluster(self, cluster: SpectrumCluster): + self.handle.write(f"\n") + attribs_of = list(self._filter_attributes( + cluster, + self._not_entry_key_or_index) + ) + self._write_attributes(attribs_of) + self.handle.write("\n") + def close(self): self.handle.close() diff --git a/implementations/python/mzlib/cluster.py b/implementations/python/mzlib/cluster.py index 2d5a3f3..7bba278 100644 --- a/implementations/python/mzlib/cluster.py +++ b/implementations/python/mzlib/cluster.py @@ -4,17 +4,20 @@ from typing import Dict, List -from mzlib.attributes import AttributeManager, AttributeManagedProperty +from mzlib.attributes import AttributeManager, AttributeManagedProperty, AttributeGroupFacet from .utils import ensure_iter, flatten SIMILAR_SPECTRUM_KEYS = "MS:1003263|similar spectrum keys" SIMILAR_SPECTRUM_USI = "MS:1003264|similar spectrum USI" CLUSTER_KEY = "MS:1003267|spectrum cluster key" +CLUSTER_SIZE = "MS:1003320|spectrum cluster size" CLUSTER_MEMBERS_KEYS = "MS:1003268|spectrum cluster member spectrum keys" CLUSTER_MEMBER_USI = "MS:1003269|spectrum cluster member USI" +CLUSTER_SUMMARY_STATS = "MS:1003321|summary statistics of clustered spectra" + @dataclass class ClusterMemberRef: @@ -36,13 +39,15 @@ def key(self) -> str: class SpectrumCluster(AttributeManager): - def __init__(self, attributes: List): + def __init__(self, attributes: List=None): super().__init__(attributes) key = AttributeManagedProperty[int](CLUSTER_KEY) _member_references = AttributeManagedProperty(CLUSTER_MEMBERS_KEYS) _cluster_member_usis = AttributeManagedProperty(CLUSTER_MEMBER_USI) + size = AttributeManagedProperty[int](CLUSTER_SIZE) + @property def members(self) -> List[ClusterMemberRef]: internal_refs = [ diff --git a/implementations/python/mzlib/index/base.py b/implementations/python/mzlib/index/base.py index 13726a5..b14c98e 100644 --- a/implementations/python/mzlib/index/base.py +++ b/implementations/python/mzlib/index/base.py @@ -1,12 +1,13 @@ import warnings -from typing import Collection, Union, Any, List +from typing import Collection, Iterator, Optional, Union, Any, List class IndexRecordBase: __slots__ = () number: int + index: int offset: int name: str @@ -21,6 +22,10 @@ def offset_for(self, record_label) -> int: record = self.record_for(record_label) return record.offset + def offset_for_cluster(self, record_label) -> int: + record = self.record_for_cluster(record_label) + return record.offset + def record_for(self, record_label: Union[int, str]) -> IndexRecordBase: record = self.search(record_label) if isinstance(record, list): @@ -29,37 +34,98 @@ def record_for(self, record_label: Union[int, str]) -> IndexRecordBase: record = record[0] return record + def record_for_cluster(self, record_label: int) -> IndexRecordBase: + record = self.search_clusters(record_label) + if isinstance(record, list): + warnings.warn( + f"Multiple records found for {record_label}, using the first") + record = record[0] + return record + def search(self, i: Union[str, int, slice], **kwargs) -> Union[IndexRecordBase, List[IndexRecordBase]]: raise NotImplementedError() + def search_clusters(self, i: Optional[Union[int, slice]]=None, **kwargs) -> Union[IndexRecordBase, List[IndexRecordBase]]: + raise NotImplementedError() + def add(self, number: int, offset: int, name: str, analyte: Any, attributes=None): + """ + Add a new entry to the spectrum index. + + Parameters + ---------- + number : int + A numerical identifier for this spectrum. + offset : int + The offset in the file to reach the spectrum (in bytes if appropriate) + name : str, + A text identifier for this spectrum. + analyte : str, optional + A text representation of the analyte for that record + attributes : Dict[str, Any], optional + A key-value pair collection of this record, currently not supported. + """ + raise NotImplementedError() + + def add_cluster(self, number: int, offset: int, attributes=None): + """ + Add a new entry to the spectrum index. + + Parameters + ---------- + number : int + A numerical identifier for this spectrum. + offset : int + The offset in the file to reach the spectrum (in bytes if appropriate) + attributes : Dict[str, Any], optional + A key-value pair collection of this record, currently not supported. + """ raise NotImplementedError() def commit(self): + """ + Commit any index state to disk, if this index supports persistence. + + Has no effect on index types that do not have a persistence functionality. + """ raise NotImplementedError() - def __iter__(self): + def iter_clusters(self) -> Iterator[IndexRecordBase]: + raise NotImplementedError() + + def iter_spectra(self) -> Iterator[IndexRecordBase]: for i in range(len(self)): yield self[i] + def _get_by_index(self, i: Union[int, slice]) -> Union[IndexRecordBase, List[IndexRecordBase]]: + raise NotImplementedError() + + def __iter__(self): + return self.iter_spectra() + def __getitem__(self, i: Union[int, str, slice]): return self.search(i) def __len__(self): raise NotImplementedError() - def __contains__(self): - raise NotImplementedError() + def __contains__(self, key) -> bool: + try: + hit = self.search(key) + return True + except (KeyError, IndexError, ValueError): + return False def check_names_unique(self) -> bool: - '''Checks that all indexed spectra have unique + """ + Checks that all indexed spectra have unique ``spectrum name`` parameters. Returns ------- bool: Whether the spectrum names in the index are unique. - ''' + """ seen = set() for record in self: if record.name in seen: diff --git a/implementations/python/mzlib/index/memory.py b/implementations/python/mzlib/index/memory.py index f82521f..6262875 100644 --- a/implementations/python/mzlib/index/memory.py +++ b/implementations/python/mzlib/index/memory.py @@ -1,46 +1,71 @@ import warnings import logging -from typing import Any, Dict, Optional, List, DefaultDict +from typing import Any, Dict, Iterator, Optional, List, DefaultDict, Union from numbers import Integral from collections import defaultdict +from mzlib.index.base import IndexRecordBase + from .base import IndexBase, IndexRecordBase logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) -class IndexRecord(IndexRecordBase): - __slots__ = ('number', 'offset', 'name', 'analyte', 'attributes') +class _IndexAttr: + __slots__ = () + + def get(self, key: str, default=None) -> Any: + if self.attributes is not None: + return self.attributes.get(key, default) + return default + + def set(self, key: str, value: Any): + if self.attributes is not None: + self.attributes[key] = value + else: + self.attributes = {key: value} + + +class IndexRecord(IndexRecordBase, _IndexAttr): + """ + A spectrum index record. + + Attributes + ---------- + number : int + A numerical identifier for the spectrum + offset : int + The offset in the file to reach the spectrum (in bytes if appropriate) + name : str, + A text identifier for this spectrum. + analyte : str, optional + A text representation of the analyte for that record + attributes : Dict[str, Any], optional + A key-value pair collection of this record. + """ + + __slots__ = ('number', 'offset', 'name', 'analyte', 'index', 'attributes') number: int offset: int name: str + index: int analyte: Any attributes: Optional[Dict[str, Any]] - def __init__(self, number, offset, name, analyte, attributes=None): + def __init__(self, number, offset, name, analyte, index: int=None, attributes=None): self.number = number self.offset = offset self.name = name self.analyte = analyte + self.index = index self.attributes = attributes - def get(self, key: str, default=None) -> Any: - if self.attributes is not None: - return self.attributes.get(key, default) - return default - - def set(self, key: str, value: Any): - if self.attributes is not None: - self.attributes[key] = value - else: - self.attributes = {key: value} - def __repr__(self): - template = f"{self.__class__.__name__}({self.number}, {self.offset}, {self.name}, {self.analyte}, {self.attributes})" + template = f"{self.__class__.__name__}({self.number}, {self.offset}, {self.name}, {self.analyte}, {self.index}, {self.attributes})" return template def __eq__(self, other): @@ -74,26 +99,88 @@ def from_dict(cls, state: Dict) -> 'IndexRecord': return cls(**state) +class ClusterIndexRecord(IndexRecordBase, _IndexAttr): + """ + A spectrum cluster index record. + + Attributes + ---------- + number : int + A numerical identifier for this spectrum. + offset : int + The offset in the file to reach the spectrum (in bytes if appropriate) + attributes : Dict[str, Any], optional + A key-value pair collection of this record + """ + + __slots__ = ('number', 'offset', 'attributes') + + def __init__(self, number, offset, attributes=None): + self.number = number + self.offset = offset + self.attributes = attributes + + def __repr__(self): + template = f"{self.__class__.__name__}({self.number}, {self.offset}, {self.attributes})" + return template + + def __eq__(self, other): + if self.number != other.number: + return False + elif self.offset != other.offset: + return False + if bool(self.attributes) == bool(other.attributes): + if bool(self.attributes) and self.attributes != other.attributes: + return False + # Implicitly allow None and empty dictionaries to be the same + return True + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self.name) + + def to_dict(self) -> Dict: + return { + k: getattr(self, k, None) for k in self.__slots__ + } + + @classmethod + def from_dict(cls, state: Dict) -> 'ClusterIndexRecord': + return cls(**state) + + class MemoryIndex(IndexBase): records: List[IndexRecord] + cluster_records: List[ClusterIndexRecord] metadata: Dict[str, Any] _dirty: bool + _by_key: Dict[int, IndexRecord] _by_name: DefaultDict[str, List[IndexRecord]] + _by_attr: DefaultDict[str, DefaultDict[Any, List[IndexRecord]]] @classmethod def from_filename(cls, filename, library=None): inst = cls() return inst, False - def __init__(self, records=None, metadata=None): + def __init__(self, records=None, cluster_records=None, metadata=None): self.records = list(records or []) + self.cluster_records = list(cluster_records or []) self._by_name = defaultdict(list) + self._by_key = {} self._by_attr = defaultdict(lambda: defaultdict(list)) self.metadata = metadata or {} self._dirty = True - def __iter__(self): + def iter_clusters(self) -> Iterator[IndexRecordBase]: + """Iterate over cluster entries in the index.""" + return iter(self.cluster_records) + + def iter_spectra(self): + """Iterate over spectrum entries in the index.""" return iter(self.records) def __len__(self): @@ -107,11 +194,11 @@ def search(self, i=None, **kwargs): raise NotImplementedError() if isinstance(i, Integral): try: - return self.records[i] + return self._by_key[i] except IndexError as err: raise KeyError(i) from err elif isinstance(i, slice): - return self.records[i] + return [self._by_key[i] for i in range(i.start, i.stop) if i in self._by_key] if i in self._by_name: records = self._by_name[i] if len(records) == 1: @@ -121,22 +208,74 @@ def search(self, i=None, **kwargs): else: raise KeyError(i) + def search_clusters(self, i=None, **kwargs): + if self._dirty: + self._update_index() + if i is None and kwargs: + # Executing attribute query + raise NotImplementedError() + if isinstance(i, Integral): + try: + return self.cluster_records[i] + except IndexError as err: + raise KeyError(i) from err + elif isinstance(i, slice): + return self.cluster_records[i] + def __getitem__(self, i): - return self.search(i) + return self._get_by_index(i) + + def _get_by_index(self, i: Union[int, slice]) -> Union[IndexRecord, List[IndexRecord]]: + return self.records[i] def _update_index(self): self.records.sort(key=lambda x: x.number) self._by_name = defaultdict(list) for record in self: + self._by_key[record.number] = record self._by_name[record.name].append(record) - self._dirty = False def add(self, number: int, offset: int, name: str, analyte: Any, attributes=None): - record = IndexRecord(number, offset, name, analyte, attributes) + """ + Add a new entry to the spectrum index. + + Parameters + ---------- + number : int + A numerical identifier for this spectrum. + offset : int + The offset in the file to reach the spectrum (in bytes if appropriate) + name : str, + A text identifier for this spectrum. + analyte : str, optional + A text representation of the analyte for that record + attributes : Dict[str, Any], optional + A key-value pair collection of this record, currently not supported. + """ + n = len(self.records) + record = IndexRecord(number, offset, name, analyte, n, attributes) self.records.append(record) self._dirty = True + def add_cluster(self, number: int, offset: int, attributes=None): + """ + Add a new entry to the spectrum index. + + Parameters + ---------- + number : int + A numerical identifier for this spectrum. + offset : int + The offset in the file to reach the spectrum (in bytes if appropriate) + attributes : Dict[str, Any], optional + A key-value pair collection of this record, currently not supported. + """ + record = ClusterIndexRecord(number, offset, attributes) + self.cluster_records.append(record) + self._dirty = True + + def commit(self): self._update_index() diff --git a/implementations/python/mzlib/index/sql.py b/implementations/python/mzlib/index/sql.py index b1782d9..c7829e6 100644 --- a/implementations/python/mzlib/index/sql.py +++ b/implementations/python/mzlib/index/sql.py @@ -2,8 +2,11 @@ import numbers import pathlib import logging +from typing import Iterator, List, Union from sqlalchemy import Column, ForeignKey, Integer, Float, String, DateTime, Text, LargeBinary + +from mzlib.index.base import IndexRecordBase try: # For SQLAlchemy 2.0 from sqlalchemy.orm import declarative_base except ImportError: @@ -38,12 +41,23 @@ class SpectrumLibraryIndexRecord(Base): number = Column(Integer, nullable=False, index=True) offset = Column(Integer, nullable=False) name = Column(String(1024), nullable=False) + index = Column(Integer, nullable=False, index=True) analyte = Column(String(2014), nullable=True) def __repr__(self): return f"{self.__class__.__name__}({self.number}, {self.offset}, {self.name}, {self.analyte})" +class ClusterSpectrumLibraryIndexRecord(Base): + __tablename__ = 'cluster_spectrum_library_index_record' + id = Column(Integer, primary_key=True) + number = Column(Integer, nullable=False, index=True) + offset = Column(Integer, nullable=False) + + def __repr__(self): + return f"{self.__class__.__name__}({self.number}, {self.offset}, {self.name}, {self.analyte})" + + class SQLIndex(IndexBase): extension = '.splindex' @@ -77,6 +91,8 @@ def __init__(self, filename): self.index_filename = self.filename + self.extension self._cache = None self.connect() + self._size = len(self) + self._size_uncommitted = 0 def connect(self, create=None): filename = self.index_filename @@ -94,21 +110,78 @@ def connect(self, create=None): self.engine = engine self._cache = None - def add(self, number, offset, name, analyte, attributes=None): - record = SpectrumLibraryIndexRecord(number=number, offset=offset, name=name, analyte=analyte) + def add(self, number, offset, name, analyte=None, attributes=None): + """ + Add a new entry to the spectrum index. + + Parameters + ---------- + number : int + A numerical identifier for this spectrum. + offset : int + The offset in the file to reach the spectrum (in bytes if appropriate) + name : str, + A text identifier for this spectrum. + analyte : str, optional + A text representation of the analyte for that record + attributes : Dict[str, Any], optional + A key-value pair collection of this record, currently not supported. + """ + record = SpectrumLibraryIndexRecord(number=number, offset=offset, name=name, + index=self._size + self._size_uncommitted, analyte=analyte) + self._size_uncommitted += 1 + if attributes is not None: + raise NotImplementedError("Record attribute storage is not implemented") + self.session.add(record) + + def add_cluster(self, number: int, offset: int, attributes=None): + """ + Add a new entry to the spectrum index. + + Parameters + ---------- + number : int + A numerical identifier for this spectrum. + offset : int + The offset in the file to reach the spectrum (in bytes if appropriate) + attributes : Dict[str, Any], optional + A key-value pair collection of this record, currently not supported. + """ + record = ClusterSpectrumLibraryIndexRecord(number=number, offset=offset) if attributes is not None: raise NotImplementedError("Record attribute storage is not implemented") self.session.add(record) def commit(self): + """Persist any new entries to disk.""" + self._size += self._size_uncommitted + self._size_uncommitted = 0 self.session.commit() - def __iter__(self): - for record in self.session.query(SpectrumLibraryIndexRecord).order_by(SpectrumLibraryIndexRecord.number).yield_per(10000): + def iter_clusters(self) -> Iterator[IndexRecordBase]: + """Iterate over cluster entries in the index.""" + for record in self.session.query(ClusterSpectrumLibraryIndexRecord).order_by( + ClusterSpectrumLibraryIndexRecord.number).yield_per(10000): + yield record + + def iter_spectra(self): + """Iterate over spectrum entries in the index.""" + for record in self.session.query(SpectrumLibraryIndexRecord).order_by( + SpectrumLibraryIndexRecord.number).yield_per(10000): yield record def __getitem__(self, i): - return self.search(i) + return self._get_by_index(i) + + def _get_by_index(self, i: Union[int, slice]) -> Union[SpectrumLibraryIndexRecord, List[SpectrumLibraryIndexRecord]]: + if isinstance(i, slice): + records = self.session.query(SpectrumLibraryIndexRecord).slice(i.start, i.stop).all() + if i.step: + raise NotImplementedError() + return records + else: + record = self.session.query(SpectrumLibraryIndexRecord).offset(i).limit(1).first() + return record def __len__(self): value = self.session.query(func.count(SpectrumLibraryIndexRecord.id)).scalar() @@ -148,3 +221,30 @@ def search(self, i, **kwargs): else: return records + def search_clusters(self, i, **kwargs): + if i is None and kwargs: + # Executing attribute query + raise NotImplementedError() + if isinstance(i, numbers.Integral): + if i < 0: + i = len(self) + i + if self._cache is not None and self._cache.number == i: + return self._cache + records = self.session.query(ClusterSpectrumLibraryIndexRecord).filter( + ClusterSpectrumLibraryIndexRecord.number == i).all() + + if len(records) == 1: + return records[0] + elif len(records) == 0: + raise IndexError(i) + else: + raise ValueError(f"Too many records found for spectrum number {i}") + elif isinstance(i, slice): + start = i.start or 0 + end = i.stop or float('inf') + records = self.session.query(ClusterSpectrumLibraryIndexRecord).filter( + ClusterSpectrumLibraryIndexRecord.number >= start, + ClusterSpectrumLibraryIndexRecord.number < end).all() + return records + else: + raise NotImplementedError() diff --git a/implementations/python/mzlib/spectrum.py b/implementations/python/mzlib/spectrum.py index 8f363f1..a554a07 100644 --- a/implementations/python/mzlib/spectrum.py +++ b/implementations/python/mzlib/spectrum.py @@ -15,9 +15,9 @@ #A class that holds data for each spectrum that is read from the SpectralLibrary class -SPECTRUM_NAME = "MS:1003061|spectrum name" -LIBRARY_ENTRY_KEY = "MS:1003237|library spectrum key" -LIBRARY_ENTRY_INDEX = "MS:1003062|library spectrum index" +SPECTRUM_NAME = "MS:1003061|library spectrum name" +LIBRARY_SPECTRUM_KEY = "MS:1003237|library spectrum key" +LIBRARY_SPECTRUM_INDEX = "MS:1003062|library spectrum index" PRECURSOR_MZ = "MS:1003208|experimental precursor monoisotopic m/z" CHARGE_STATE = "MS:1000041|charge state" @@ -66,8 +66,8 @@ def __init__(self, attributes=None, peak_list=None, analytes=None, self.interpretations = interpretations name = AttributeManagedProperty[str](SPECTRUM_NAME) - key = AttributeManagedProperty[int](LIBRARY_ENTRY_KEY) - index = AttributeManagedProperty[int](LIBRARY_ENTRY_INDEX) + key = AttributeManagedProperty[int](LIBRARY_SPECTRUM_KEY) + index = AttributeManagedProperty[int](LIBRARY_SPECTRUM_INDEX) precursor_mz = AttributeListManagedProperty[float]([PRECURSOR_MZ, "MS:1000744|selected ion m/z"]) precursor_charge = AttributeManagedProperty[int](CHARGE_STATE) @@ -123,9 +123,15 @@ def __str__(self): # pragma: no cover def write(self, format="text", **kwargs): # pragma: no cover """ - write - Write out the spectrum in any of the supported formats - """ + Write out the spectrum in any of the supported formats + Parameters + ---------- + format : str + The name of the format to write in + **kwargs + Passed to implementation + """ #### Set a buffer to fill with string data buffer = '' diff --git a/implementations/python/mzlib/spectrum_library.py b/implementations/python/mzlib/spectrum_library.py index a47604f..7e82470 100644 --- a/implementations/python/mzlib/spectrum_library.py +++ b/implementations/python/mzlib/spectrum_library.py @@ -1,15 +1,9 @@ #!/usr/bin/env python3 -from __future__ import print_function -import sys -def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) - -import re -import timeit import os import pathlib from typing import Type, List, Union +from mzlib.cluster import SpectrumCluster from mzlib.spectrum_library_index import SpectrumLibraryIndex from mzlib.spectrum import Spectrum @@ -60,9 +54,14 @@ def __init__(self, identifier=None, filename=None, format=None, index_type=None) Parameters ---------- + identifier : str, optional + A universal identifier for a hosted spectral library to fetch. + filename : str, os.PathLike, or io.IOBase, optional + A path-like or file-like object that holds a spectral library to read. format : string Name of the format for the current encoding of the library. - + index_type : Type[:class:`~.mzlib.index.base.IndexBase`] + The type of index to preferentially construct. """ self.backend = None self.identifier = identifier @@ -99,7 +98,10 @@ def _requires_backend(self): #### Define getter/setter for attribute identifier @property def identifier(self): - return(self._identifier) + if self._identifier is None: + if self._backend_initialized(): + return self.backend.identifier + return self._identifier @identifier.setter def identifier(self, identifier): @@ -148,11 +150,22 @@ def read(self): self._requires_backend() return self.backend.read() - def write(self, destination, format: str=None): - """Write the library to disk + def write(self, destination, format: str=None, **kwargs): + """ + Write the library to disk. + + Parameters + ---------- + destination : str, os.PathLike, or io.IOBase + The path or stream to write the library to. + format : str, Type, or Callable + The name of the format or a callable object that returns + a :class:`~.SpectrumLibraryWriterBase`. + **kwargs + Passed to implementation. """ filename = destination - if not isinstance(filename, (str, pathlib.Path)): + if not isinstance(filename, (str, pathlib.Path, os.PathLike)): filename = getattr(destination, "name", None) if format is None and filename is not None: @@ -167,7 +180,7 @@ def write(self, destination, format: str=None): if writer_type is None: raise ValueError( f"Could not find a format writer from file name {filename} or format {format}") - writer = writer_type(destination) + writer = writer_type(destination, **kwargs) if self._backend_initialized(): with writer: writer.write_library(self.backend) @@ -176,7 +189,8 @@ def write(self, destination, format: str=None): writer.close() def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Spectrum: - """Retrieve a single spectrum from the library. + """ + Retrieve a single spectrum from the library. Parameters ---------- @@ -192,6 +206,10 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp self._requires_backend() return self.backend.get_spectrum(spectrum_number, spectrum_name) + def get_cluster(self, cluster_number: int) -> SpectrumCluster: + self._requires_backend() + return self.backend.get_cluster(cluster_number) + def find_spectra(self, specification, **query_keys) -> List[Spectrum]: """ find_spectra - Return a list of spectra given query constraints @@ -214,7 +232,8 @@ def __iter__(self): return iter([]) def add_attribute(self, key, value, group_identifier=None): - """Add an attribute to the library level attributes store. + """ + Add an attribute to the library level attributes store. Parameters ---------- @@ -230,7 +249,8 @@ def add_attribute(self, key, value, group_identifier=None): return self.backend.add_attribute(key, value, group_identifier=group_identifier) def get_attribute(self, key, group_identifier=None): - """Get the value or values associated with a given + """ + Get the value or values associated with a given attribute key from the library level attribute store. Parameters @@ -249,7 +269,8 @@ def get_attribute(self, key, group_identifier=None): return self.backend.get_attribute(key, group_identifier=group_identifier) def remove_attribute(self, key, group_identifier=None): - """Remove the value or values associated with a given + """ + Remove the value or values associated with a given attribute key from the library level attribute store. This rebuilds the entire store, which may be expensive. @@ -266,7 +287,8 @@ def remove_attribute(self, key, group_identifier=None): return self.backend.remove_attribute(key, group_identifier=group_identifier) def has_attribute(self, key): - """Test for the presence of a given attribute in the library + """ + Test for the presence of a given attribute in the library level store. Parameters diff --git a/implementations/python/pyproject.toml b/implementations/python/pyproject.toml new file mode 100644 index 0000000..4b2cb8c --- /dev/null +++ b/implementations/python/pyproject.toml @@ -0,0 +1,8 @@ +[tool.ruff] +target-version = "py38" +line-length = 120 +select = ["D"] +ignore = ["D415", "D400", "D212", "D205", "D203", "D105"] + +[tool.ruff.pydocstyle] +convention = "numpy" \ No newline at end of file diff --git a/implementations/python/tests/test_data/bad_peak_annotations.mzlb.txt b/implementations/python/tests/test_data/bad_peak_annotations.mzlb.txt index 74029d3..80b01f4 100644 --- a/implementations/python/tests/test_data/bad_peak_annotations.mzlb.txt +++ b/implementations/python/tests/test_data/bad_peak_annotations.mzlb.txt @@ -4,7 +4,7 @@ MS:1003188|library name=tests/test_data/chinese_hamster_hcd_selected_head.msp -MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV +MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=855.4538 @@ -137,7 +137,7 @@ MS:1003169|proforma peptidoform sequence=AAAAC[Carbamidomethyl]ALTPGPLADLAAR 1496.7792 11918.3 q15/-6.5ppm -MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV +MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=855.4538 diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json index f329cc0..33d4ff7 100644 --- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json +++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json @@ -104,7 +104,7 @@ }, { "accession": "MS:1003061", - "name": "spectrum name", + "name": "library spectrum name", "value": "AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV" }, { @@ -611,7 +611,7 @@ }, { "accession": "MS:1003061", - "name": "spectrum name", + "name": "library spectrum name", "value": "AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV" }, { @@ -1469,7 +1469,7 @@ }, { "accession": "MS:1003061", - "name": "spectrum name", + "name": "library spectrum name", "value": "AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV" }, { @@ -2081,7 +2081,7 @@ }, { "accession": "MS:1003061", - "name": "spectrum name", + "name": "library spectrum name", "value": "AAAAGSTSVKPIFSR/2_0_44eV" }, { @@ -2660,7 +2660,7 @@ }, { "accession": "MS:1003061", - "name": "spectrum name", + "name": "library spectrum name", "value": "AAAAGSTSVKPIFSR/3_0_28eV" }, { @@ -3389,7 +3389,7 @@ }, { "accession": "MS:1003061", - "name": "spectrum name", + "name": "library spectrum name", "value": "AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV" }, { @@ -3839,7 +3839,7 @@ }, { "accession": "MS:1003061", - "name": "spectrum name", + "name": "library spectrum name", "value": "AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV" }, { diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt index 43cc2f9..1599ab0 100644 --- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt +++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt @@ -4,7 +4,7 @@ MS:1003188|library name=tests/test_data/chinese_hamster_hcd_selected_head.msp -MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV +MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=855.4538 @@ -135,7 +135,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4 1496.7792 11918.3 y15/-6.5ppm -MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV +MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=855.4538 @@ -383,7 +383,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5 1628.3004 719.6 ? -MS:1003061|spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV +MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=1207.1672 @@ -549,7 +549,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 1980.9857 3567.9 ? -MS:1003061|spectrum name=AAAAGSTSVKPIFSR/2_0_44eV +MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=731.9043 @@ -704,7 +704,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1 1465.9423 113.63 ? -MS:1003061|spectrum name=AAAAGSTSVKPIFSR/3_0_28eV +MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=3 MS:1000744|selected ion m/z=488.2719 @@ -909,7 +909,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 1469.9915 925.5 ? -MS:1003061|spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV +MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=830.8834 @@ -1021,7 +1021,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6 1670.2889 140.136 ? -MS:1003061|spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV +MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 MS:1000744|selected ion m/z=830.8834 diff --git a/implementations/python/tests/test_data/clusters_example.mzlb b/implementations/python/tests/test_data/clusters_example.mzlb new file mode 100644 index 0000000..6b94bd6 --- /dev/null +++ b/implementations/python/tests/test_data/clusters_example.mzlb @@ -0,0 +1,142 @@ + + +MS:1003320|spectrum cluster size=6 +MS:1003268|spectrum cluster member spectrum keys=1,6,23,63,89 +MS:1003269|spectrum cluster member USI=mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555 +[1]MS:1003321|summary statistics of clustered spectra=MS:1003304|spectral dot product +[1]MS:1003176|attribute mean=0.7 +[2]MS:1003321|summary statistics of clustered spectra=MS:1003208|experimental precursor monoisotopic m/z +[2]MS:1003176|attribute mean=1029.05 +[2]MS:1003177|attribute standard deviation=0.41 +MS:1003322|spectrum cluster best representative=63 + + +MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV +MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum +MS:1000041|charge state=2 +MS:1000744|selected ion m/z=855.4538 +MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation +[1]MS:1000045|collision energy=46 +[1]UO:0000000|unit=UO:0000266|electronvolt +MS:1003057|scan number=5538 +MS:1003203|constituent spectrum file="CHO-K1_bRPLC_C1.RAW.FT.hcd.ch.MGF" +MS:1003070|number of replicate spectra used=1 +MS:1003069|number of replicate spectra available=2 +MS:1000002|sample name="jhu_cho_brplc_cam" +MS:1000028|detector resolution=7500 +[2]MS:1000828|isolation window lower offset=0.95 +[2]UO:0000000|unit=MS:1000040|m/z +[3]MS:1000829|isolation window upper offset=0.95 +[3]UO:0000000|unit=MS:1000040|m/z +MS:1003085|previous MS1 scan precursor intensity=8799173.32 +MS:1003086|precursor apex intensity=25273307.5 +MS:1000512|filter string="FTMS + p NSI d Full ms2 855.96@hcd35.00 [140.00-1725.00]" +MS:1003059|number of peaks=87 +[4]MS:1003275|other attribute name=Se +[4]MS:1003276|other attribute value=1(^G1:sc=8.13346e-015) + +MS:1000224|molecular mass=1710.9076 +MS:1000888|stripped peptide sequence=AAAACALTPGPLADLAAR +[1]MS:1001975|delta m/z=1.4 +[1]UO:0000000|unit=UO:0000169|parts per million +MS:1003208|experimental precursor monoisotopic m/z=855.455 +MS:1003169|proforma peptidoform sequence=AAAAC[Carbamidomethyl]ALTPGPLADLAAR +MS:1001117|theoretical mass=1708.89303961159 +[2]MS:1003048|number of enzymatic termini=2 +[2]MS:1001045|cleavage agent name=MS:1001251|Trypsin +[2]MS:1001112|n-terminal flanking residue=R +[2]MS:1001113|c-terminal flanking residue=L +[2]MS:1000885|protein accession="tr|G3IJB9|G3IJB9_CRIGR UDP-N-acetylhexosamine pyrophosphorylase-like protein 1 OS=Cricetulus griseus GN=I79_023952 PE=4 SV=1" + +MS:1003079|total unassigned intensity fraction=0.2848 +MS:1003080|top 20 peak unassigned intensity fraction=0.1879 +MS:1003289|intensity of highest unassigned peak=0.45 +MS:1003290|number of unassigned peaks among top 20 peaks=4 + +143.0823 14791.5 b2/5.6ppm +153.2575 5008.6 ? +159.0917 11531.8 ? +162.5977 5804.6 ? +169.0972 12931.0 ? +175.1193 18211.1 y1/2.0ppm,IR/2.0ppm +194.0699 6011.0 ? +199.1074 6737.4 ? +201.7527 5576.3 ? +212.1051 13786.1 ? +214.1195 82269.8 b3/4.1ppm +229.1552 7852.0 ? +232.0764 28555.9 ? +249.1071 4670.7 ? +257.1633 15001.9 a4/9.7ppm +276.4497 5288.0 ? +283.1411 24965.9 ? +283.3943 5090.5 ? +285.1567 48285.5 b4/3.4ppm +302.1834 10558.9 ? +303.114 52736.6 ? +317.1891 6585.3 y3/-12.9ppm +345.1593 18050.6 ? +354.1793 31400.1 b5-NH2-CO-CH2SH/5.9ppm +356.1916 7124.1 ? +370.3776 6139.7 ? +371.6662 11983.3 ? +374.1505 34930.7 ? +416.2029 8953.6 ? +417.1931 16940.5 a5/3.9ppm +425.2171 15449.8 b6-NH2-CO-CH2SH/6.6ppm +428.2024 8874.5 m5:8-H2O/14.4ppm +428.249 6831.1 ? +430.278 30118.6 y4/1.8ppm +445.1886 57883.2 b5/5.0ppm +446.2049 8868.2 m5:8/-4.2ppm +457.1753 5403.5 ? +467.2682 6117.4 ? +469.2735 6906.4 ? +471.2018 9435.3 ? +487.2299 15480.3 ? +488.2367 13813.7 a6/16.6ppm +490.7795 12739.9 y10^2/-0.6ppm +491.2839 6651.0 y10+i^2/5.4ppm +495.9153 6370.5 ? +499.2364 5273.9 m4:8-H2O/6.2ppm +516.2236 49862.4 b6/0.2ppm +528.2323 9881.3 ? +530.2752 10333.1 ? +541.3164 7041.3 ? +545.308 6833.8 y5/7.0ppm +558.2726 23958.7 ? +570.3232 6006.3 ? +601.3148 11919.6 a7/3.6ppm +617.2834 5841.2 ? +617.3372 4342.1 ? +629.3052 22419.3 b7/-3.7ppm +641.3058 11147.2 m2:8-H2O/-2.7ppm +659.3259 9164.2 m2:8/11.8ppm +685.3383 6861.0 ? +712.3472 13798.2 b8-H2O/3.6ppm +730.3578 16426.3 b8/3.5ppm +826.4765 18892.5 y8/-2.0ppm +855.017 7062.7 ? +855.5167 249849.0 ? +856.523 57946.3 ? +883.4993 29383.7 y9/-0.3ppm +922.4447 6413.2 ? +934.4906 6605.4 ? +963.5431 13545.1 y10-NH3/17.9ppm +980.556 559065.0 y10/3.7ppm +981.5587 216762.0 y10+i/3.6ppm +1037.5859 14218.2 ? +1038.5812 9574.2 ? +1067.8983 6611.6 ? +1081.6041 179858.0 y11/3.8ppm +1082.6077 79540.9 y11+i/4.5ppm +1143.0283 6099.8 ? +1194.6899 106907.0 y12/4.9ppm +1195.6869 50339.1 y12+i/-0.1ppm +1265.7273 83029.7 y13/4.8ppm +1266.7281 42164.7 y13+i/3.2ppm +1390.1328 5531.5 ? +1395.6904 8549.2 ? +1425.7538 51400.1 y14/1.4ppm +1426.7601 40643.8 y14+i/3.8ppm +1496.7792 11918.3 y15/-6.5ppm diff --git a/implementations/python/tests/test_data/complex_interpretations.mzlb.txt b/implementations/python/tests/test_data/complex_interpretations.mzlb.txt index 1617e91..bc547e5 100644 --- a/implementations/python/tests/test_data/complex_interpretations.mzlb.txt +++ b/implementations/python/tests/test_data/complex_interpretations.mzlb.txt @@ -1,5 +1,5 @@ -MS:1003061|spectrum name=Test +MS:1003061|library spectrum name=Test MS:1000744|selected ion m/z=880.8902 MS:1000888|peptidoform=DSDDVPM[Oxidation]VLVGNKCDLAAR diff --git a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json index 4c77e4d..8db18cf 100644 --- a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json +++ b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json @@ -49,7 +49,7 @@ }, { "accession": "MS:1003061", - "name": "spectrum name", + "name": "library spectrum name", "value": "Test" }, { diff --git a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.txt b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.txt index 3775eb3..5d648a9 100644 --- a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.txt +++ b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.txt @@ -1,5 +1,5 @@ -MS:1003061|spectrum name=Test +MS:1003061|library spectrum name=Test MS:1000744|selected ion m/z=880.8902 MS:1000888|peptidoform=DSDDVPM[Oxidation]VLVGNKCDLAAR diff --git a/implementations/python/tests/test_index.py b/implementations/python/tests/test_index.py index 01bb2a6..a10459b 100644 --- a/implementations/python/tests/test_index.py +++ b/implementations/python/tests/test_index.py @@ -28,7 +28,7 @@ def test_sequence_behavior(self): index = self._make_index(lib) assert len(index) == 7 record = index[3] - assert record.number == 3 + assert record.number == 4 assert record.name == "AAAAGSTSVKPIFSR/2_0_44eV" diff --git a/implementations/python/tests/test_library_backend.py b/implementations/python/tests/test_library_backend.py index a29f2a9..456c7b7 100644 --- a/implementations/python/tests/test_library_backend.py +++ b/implementations/python/tests/test_library_backend.py @@ -22,7 +22,7 @@ def test_sequence_behavior(self): assert len(lib) == 7 spec = lib[3] assert spec.get_attribute( - "MS:1003061|spectrum name") == "AAAAGSTSVKPIFSR/2_0_44eV" + "MS:1003061|library spectrum name") == "AAAAGSTSVKPIFSR/2_0_44eV" # TODO: Fix clipping in _buffer_from_stream first # def test_iteration(self): diff --git a/implementations/python/tests/test_spectrum.py b/implementations/python/tests/test_spectrum.py index 7e7288e..534486a 100644 --- a/implementations/python/tests/test_spectrum.py +++ b/implementations/python/tests/test_spectrum.py @@ -18,15 +18,15 @@ def get_spectrum(self, index): return library.get_spectrum(index) def test_write(self): - spectrum = self.get_spectrum(0) + spectrum = self.get_spectrum(1) buffer = spectrum.write('text') lines = buffer.splitlines() n_lines = len(lines) assert n_lines == 131 - assert buffer.startswith("\nMS:1003061|spectrum name") + assert buffer.startswith("\nMS:1003061|library spectrum name") def test_equality(self): - spectrum = self.get_spectrum(0) - spectrum2 = self.get_spectrum(0) + spectrum = self.get_spectrum(1) + spectrum2 = self.get_spectrum(1) assert spectrum == spectrum2 From f1cb7cb3daa3998a90cfc70bcc8e642845cae7e2 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Thu, 11 May 2023 22:51:31 -0400 Subject: [PATCH 05/24] Fix up slicing --- implementations/python/mzlib/backends/bibliospec.py | 2 +- implementations/python/mzlib/index/memory.py | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/implementations/python/mzlib/backends/bibliospec.py b/implementations/python/mzlib/backends/bibliospec.py index e2c5580..dfba98e 100644 --- a/implementations/python/mzlib/backends/bibliospec.py +++ b/implementations/python/mzlib/backends/bibliospec.py @@ -58,7 +58,7 @@ def __getitem__(self, i): if isinstance(i, int): return self.search(i + 1) elif isinstance(i, slice): - return [self.search(j + 1) for j in range(i.start, i.stop, i.step)] + return [self.search(j + 1) for j in range(i.start or 0, i.stop or len(self), i.step or 1)] else: raise TypeError(f"Cannot index {self.__class__.__name__} with {i}") diff --git a/implementations/python/mzlib/index/memory.py b/implementations/python/mzlib/index/memory.py index 6262875..e401a62 100644 --- a/implementations/python/mzlib/index/memory.py +++ b/implementations/python/mzlib/index/memory.py @@ -198,7 +198,13 @@ def search(self, i=None, **kwargs): except IndexError as err: raise KeyError(i) from err elif isinstance(i, slice): - return [self._by_key[i] for i in range(i.start, i.stop) if i in self._by_key] + start = i.start + stop = i.stop + if start is None: + start = min(self._by_key) if self._by_key else 0 + if stop is None: + stop = max(self._by_key) if self._by_key else 0 + return [self._by_key[i] for i in range(start, stop) if i in self._by_key] if i in self._by_name: records = self._by_name[i] if len(records) == 1: From 778cb32a03c4aa2fe63314b0352e964bb12bcc3e Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 12 May 2023 09:02:31 -0400 Subject: [PATCH 06/24] Fix cluster retrieval in JSON backend --- implementations/python/mzlib/backends/json.py | 40 ++++++++++++++----- implementations/python/mzlib/backends/text.py | 8 ++-- .../python/mzlib/spectrum_library.py | 12 ++++++ 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/implementations/python/mzlib/backends/json.py b/implementations/python/mzlib/backends/json.py index 336dc86..15fd462 100644 --- a/implementations/python/mzlib/backends/json.py +++ b/implementations/python/mzlib/backends/json.py @@ -1,5 +1,4 @@ import io -import enum import json import logging import warnings @@ -7,7 +6,6 @@ from typing import Any, Iterable, List, Dict, Mapping, Union from pathlib import Path -from xml.dom.minidom import Attr from mzlib.cluster import SpectrumCluster from mzlib.index import MemoryIndex @@ -17,6 +15,7 @@ from mzlib.spectrum import Spectrum from .base import SpectralLibraryBackendBase, SpectralLibraryWriterBase, FORMAT_VERSION_TERM, AttributeSetTypes +from .utils import open_stream logger = logging.getLogger(__name__) @@ -57,10 +56,11 @@ def __init__(self, filename, index_type=None, read_metadata=True): self.buffer = {} self._load_buffer(self.filename) self.attributes = AttributeManager() - self._fill_attributes(self.buffer.get(LIBRARY_METADATA_KEY), self.attributes) self.index, was_initialized = index_type.from_filename(self.filename) if not was_initialized: self.create_index() + if read_metadata: + self.read_header() @classmethod def guess_from_filename(cls, filename: Union[str, Path, io.FileIO, Mapping]) -> bool: @@ -68,24 +68,25 @@ def guess_from_filename(cls, filename: Union[str, Path, io.FileIO, Mapping]) -> return SPECTRA_KEY in filename and LIBRARY_METADATA_KEY in filename return super(JSONSpectralLibrary, cls).guess_from_filename(filename) - def _load_buffer(self, filename_or_stream): - if isinstance(filename_or_stream, dict): + def _load_buffer(self, filename_or_stream: Union[str, Path, io.FileIO, Mapping]): + if isinstance(filename_or_stream, Mapping): self.buffer = filename_or_stream else: if hasattr(filename_or_stream, 'read'): self.handle = filename_or_stream else: - self.handle = open(filename_or_stream, 'rt') + self.handle = open_stream(filename_or_stream, 'rt') self.buffer = json.load(self.handle) self.handle.close() def read_header(self) -> bool: if self.buffer: - pass + self._fill_attributes(self.buffer.get(LIBRARY_METADATA_KEY), self.attributes) + return True return False def create_index(self): - for i, record in enumerate(self.buffer[SPECTRA_KEY]): + for i, record in enumerate(self.buffer.get(SPECTRA_KEY, [])): name = None key = None for attrib in record['attributes']: @@ -101,6 +102,16 @@ def create_index(self): if not name and not key: raise ValueError(f"Unidentified spectrum at index {i}") self.index.add(key, i, name, None, None) + for i, record in enumerate(self.buffer.get(CLUSTERS_KEY, [])): + key = None + for attrib in record[ELEMENT_ATTRIBUTES_KEY]: + if attrib["accession"] == "MS:1003267": + key = attrib['value'] + break + else: + if not name and not key: + raise ValueError(f"Unidentified spectrum cluster at index {i}") + self.index.add_cluster(key, i, None) def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Spectrum: """ @@ -109,7 +120,7 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp Parameters ---------- spectrum_number : int, optional - The index of the specturm in the library + The index of the spectrum in the library spectrum_name : str, optional The name of the spectrum in the library @@ -128,7 +139,14 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp spectrum = self._make_spectrum_from_payload(data) return spectrum - def _fill_attributes(self, attributes: List, store: Attributed, context_type: AttributeSetTypes=None) -> Attributed: + def get_cluster(self, cluster_number: int) -> SpectrumCluster: + offset = self.index.offset_for_cluster(cluster_number) + data = self.buffer[CLUSTERS_KEY][offset] + cluster = self._make_cluster_from_payload(data) + return cluster + + def _fill_attributes(self, attributes: List[Dict[str, Any]], store: Attributed, + context_type: AttributeSetTypes=None) -> Attributed: for attrib in attributes: if attrib['accession'] == "MS:1003212": if context_type == AttributeSetTypes.analyte: @@ -137,6 +155,8 @@ def _fill_attributes(self, attributes: List, store: Attributed, context_type: At self.entry_attribute_sets[attrib['value']].apply(store) elif context_type == AttributeSetTypes.interpretation: self.interpretation_attribute_sets[attrib['value']].apply(store) + elif context_type == AttributeSetTypes.cluster: + self.cluster_attribute_sets[attrib['value']].apply(store) else: raise ValueError(f"Could not infer which attribute set type to use for {context_type}") else: diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py index f093c60..a197210 100644 --- a/implementations/python/mzlib/backends/text.py +++ b/implementations/python/mzlib/backends/text.py @@ -1,4 +1,3 @@ -from collections import deque import re import os import io @@ -6,6 +5,7 @@ import warnings import enum +from collections import deque from typing import ClassVar, List, Tuple, Union, Iterable from mzlib.annotation import parse_annotation @@ -193,11 +193,11 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]: return True, nbytes return False, 0 - def read_header(self) -> Tuple[bool, int]: + def read_header(self) -> bool: if isinstance(self.filename, io.IOBase): - return self._parse_header_from_stream(self.filename) + return self._parse_header_from_stream(self.filename)[0] with open_stream(self.filename, 'rt', encoding='utf8') as stream: - return self._parse_header_from_stream(stream) + return self._parse_header_from_stream(stream)[0] def create_index(self) -> int: """ diff --git a/implementations/python/mzlib/spectrum_library.py b/implementations/python/mzlib/spectrum_library.py index 7e82470..564c54b 100644 --- a/implementations/python/mzlib/spectrum_library.py +++ b/implementations/python/mzlib/spectrum_library.py @@ -207,6 +207,18 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp return self.backend.get_spectrum(spectrum_number, spectrum_name) def get_cluster(self, cluster_number: int) -> SpectrumCluster: + """ + Retrieve a single spectrum cluster from the library. + + Parameters + ---------- + cluster_number : int, optional + The index of the cluster in the library + + Returns + ------- + :class:`~.SpectrumCluster` + """ self._requires_backend() return self.backend.get_cluster(cluster_number) From a1a965e847a2b4f9a6c1372a08df7be6015b1479 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Mon, 15 May 2023 22:31:52 -0400 Subject: [PATCH 07/24] Update machinery for testing --- ...chinese_hamster_hcd_selected_head.mzlb.txt | 14 ++-- implementations/python/mzlib/backends/base.py | 28 +++++--- .../python/mzlib/backends/bibliospec.py | 18 ++--- .../python/mzlib/backends/diann.py | 11 +++- implementations/python/mzlib/backends/json.py | 8 +-- implementations/python/mzlib/backends/msp.py | 18 ++--- .../python/mzlib/backends/spectronaut.py | 8 ++- implementations/python/mzlib/backends/text.py | 3 + .../python/mzlib/backends/utils.py | 19 ++++-- implementations/python/mzlib/index/sql.py | 15 +++-- implementations/python/mzlib/spectrum.py | 3 +- .../python/mzlib/spectrum_library.py | 20 ++++-- implementations/python/mzlib/tools/cli.py | 9 +++ implementations/python/mzlib/tools/utils.py | 65 +++++++++++++++++++ ...hinese_hamster_hcd_selected_head.mzlb.json | 29 +++++---- ...chinese_hamster_hcd_selected_head.mzlb.txt | 14 ++-- ...lex_interpretations_with_members.mzlb.json | 1 + 17 files changed, 207 insertions(+), 76 deletions(-) create mode 100644 implementations/python/mzlib/tools/utils.py diff --git a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt index 79cd964..5cc3e74 100644 --- a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt +++ b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt @@ -7,7 +7,7 @@ MS:1003188|library name=examples/chinese_hamster_hcd_selected_head.msp MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=855.4538 +MS:1003208|experimental precursor monoisotopic m/z=855.4538 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=46 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -138,7 +138,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=855.4538 +MS:1003208|experimental precursor monoisotopic m/z=855.4538 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=53 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -386,7 +386,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5 MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=1207.1672 +MS:1003208|experimental precursor monoisotopic m/z=1207.1672 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=76 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -552,7 +552,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=731.9043 +MS:1003208|experimental precursor monoisotopic m/z=731.9043 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=44 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -707,7 +707,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=3 -MS:1000744|selected ion m/z=488.2719 +MS:1003208|experimental precursor monoisotopic m/z=488.2719 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=28 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -912,7 +912,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=830.8834 +MS:1003208|experimental precursor monoisotopic m/z=830.8834 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=50 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -1024,7 +1024,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=830.8834 +MS:1003208|experimental precursor monoisotopic m/z=830.8834 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=52 [1]UO:0000000|unit=UO:0000266|electronvolt diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py index b718414..8b6ad0f 100644 --- a/implementations/python/mzlib/backends/base.py +++ b/implementations/python/mzlib/backends/base.py @@ -19,7 +19,7 @@ from mzlib.attributes import Attributed, AttributedEntity, AttributeSet, AttributeManagedProperty from mzlib.ontology import _VocabularyResolverMixin -from .utils import open_stream, LineBuffer +from .utils import open_stream, _LineBuffer logger = logging.getLogger(__name__.rsplit(".", 1)[0]) logger.addHandler(logging.NullHandler()) @@ -129,7 +129,7 @@ def guess_from_filename(cls, filename: Union[str, Path, io.FileIO]) -> bool: return filename.endswith(cls.file_format) @classmethod - def guess_from_header(cls, filename) -> bool: + def guess_from_header(cls, filename: Union[str, Path, io.FileIO]) -> bool: """ Guess if the file is of this type by inspecting the file's header section @@ -146,7 +146,7 @@ def guess_from_header(cls, filename) -> bool: return False @classmethod - def guess_implementation(cls, filename, index_type=None, + def guess_implementation(cls, filename: Union[str, Path, io.FileIO], index_type=None, **kwargs) -> 'SpectralLibraryBackendBase': """ Guess the backend implementation to use with this file format. @@ -179,7 +179,7 @@ def guess_implementation(cls, filename, index_type=None, pass raise FormatInferenceFailure(f"Could not guess backend implementation for {filename}") - def __init__(self, filename): + def __init__(self, filename: Union[str, Path, io.FileIO]): self.filename = filename self.index = MemoryIndex() @@ -332,7 +332,7 @@ def __getitem__(self, i) -> Union[Spectrum, List[Spectrum]]: return result @classmethod - def has_index_preference(cls, filename: str) -> Type[IndexBase]: + def has_index_preference(cls, filename: Union[str, Path, io.FileIO]) -> Type[IndexBase]: """ Does this backend prefer a particular index for this file? @@ -358,7 +358,14 @@ def has_index_preference(cls, filename: str) -> Type[IndexBase]: except Exception: return MemoryIndex - def read(self): + def read(self) -> Iterator[Union[Spectrum, SpectrumCluster]]: + """ + Create an sequential iterator over the spectrum library. + + Yields + ------ + entry : Union[:class:`~.Spectrum`, :class:`~.SpectrumCluster`] + """ raise NotImplementedError() def _add_attribute_set(self, attribute_set: AttributeSet, @@ -383,8 +390,8 @@ def summarize_parsing_errors(self) -> Dict: class _PlainTextSpectralLibraryBackendBase(SpectralLibraryBackendBase): - def __init__(self, filename, index_type=None, read_metadata=True, - create_index: bool=True): + def __init__(self, filename: Union[str, Path, io.FileIO], index_type=None, + read_metadata: bool=True, create_index: bool=True): if index_type is None and create_index: index_type = self.has_index_preference(filename) @@ -428,7 +435,7 @@ def read(self) -> Iterator[Spectrum]: raise ValueError("Could not locate valid header") else: stream.seek(offset) - buffering_stream = LineBuffer(stream) + buffering_stream = _LineBuffer(stream) while True: # Will clip the first line of the next spectrum. Needs work buffer = self._buffer_from_stream(buffering_stream) @@ -490,7 +497,8 @@ def guess_from_header(cls, filename) -> bool: return False return False - def __init__(self, filename: str, index_type=None, delimiter='\t', read_metadata=True, create_index: bool = True, ** kwargs): + def __init__(self, filename: Union[str, Path, io.FileIO], index_type=None, delimiter='\t', + read_metadata: bool=True, create_index: bool = True, ** kwargs): if index_type is None: index_type = self.has_index_preference(filename) self._delimiter = delimiter diff --git a/implementations/python/mzlib/backends/bibliospec.py b/implementations/python/mzlib/backends/bibliospec.py index dfba98e..4a0a322 100644 --- a/implementations/python/mzlib/backends/bibliospec.py +++ b/implementations/python/mzlib/backends/bibliospec.py @@ -5,7 +5,7 @@ import sqlite3 import zlib -from typing import List, Mapping, Tuple, Iterable, Type +from typing import Iterator, List, Mapping, Tuple, Iterable, Type import numpy as np @@ -100,7 +100,6 @@ def __init__(self, filename, **kwargs): self.read_header() def read_header(self) -> bool: - '''Stub implementation, awaiting better understanding of Bibliospec to divine other metadata''' attribs = AttributeManager() attribs.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION) attribs.add_attribute("MS:1003207|library creation software", "Bibliospec") @@ -116,24 +115,25 @@ def read_header(self) -> bool: return True def _populate_analyte(self, analyte: Analyte, row: Mapping): - '''Fill an analyte with details describing a peptide sequence and inferring + """ + Fill an analyte with details describing a peptide sequence and inferring from context its traits based upon the assumptions Bibliospec makes. Bibliospec only stores modifications as delta masses. - ''' + """ peptide = self._correct_modifications_in_sequence(row) analyte.add_attribute("MS:1003169|proforma peptidoform sequence", str(peptide)) analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass) analyte.add_attribute("MS:1000888|stripped peptide sequence", row['peptideSeq']) analyte.add_attribute(CHARGE_STATE, row['precursorCharge']) - def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None): - '''Read a spectrum from the spectrum library. + """ + Read a spectrum from the spectrum library. Bibliospec does not support alternative labeling of spectra with a plain text name so looking up by `spectrum_name` is not supported. - ''' + """ if spectrum_number is None: raise ValueError("Only spectrum number queries are supported. spectrum_number must have an integer value") @@ -201,5 +201,7 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None): spectrum.peak_list = peak_list return spectrum - + def read(self) -> Iterator[Spectrum]: + for rec in self.index: + yield self.get_spectrum(rec.number) diff --git a/implementations/python/mzlib/backends/diann.py b/implementations/python/mzlib/backends/diann.py index a119310..ea7a7aa 100644 --- a/implementations/python/mzlib/backends/diann.py +++ b/implementations/python/mzlib/backends/diann.py @@ -1,11 +1,13 @@ import json +import os from typing import List, Tuple, Dict, Iterator, Any, Union from pyteomics import proforma from mzlib import annotation -from mzlib.backends.base import DEFAULT_VERSION, FORMAT_VERSION_TERM, _CSVSpectralLibraryBackendBase +from mzlib.backends.base import DEFAULT_VERSION, FORMAT_VERSION_TERM, LIBRARY_NAME_TERM, _CSVSpectralLibraryBackendBase +from mzlib.backends.utils import open_stream from mzlib.spectrum import Spectrum, SPECTRUM_NAME @@ -66,11 +68,16 @@ def _spectrum_type(self): def read_header(self) -> bool: result = super().read_header() self.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION) + if hasattr(self.filename, 'name'): + name = self.filename.name.replace(".gz", '').rsplit('.', 1)[0].split(os.sep)[-1] + else: + name = self.filename.replace(".gz", '').rsplit(".", 1)[0].split(os.sep)[-1] + self.add_attribute(LIBRARY_NAME_TERM, name) self.add_attribute("MS:1003207|library creation software", "MS:1003253|DIA-NN") return result def create_index(self): - with open(self.filename, 'rb') as stream: + with open_stream(self.filename, 'rb') as stream: header = stream.readline() header_cols = header.split(b'\t') column_key = header_cols.index(b'transition_group_id') diff --git a/implementations/python/mzlib/backends/json.py b/implementations/python/mzlib/backends/json.py index 15fd462..d26a165 100644 --- a/implementations/python/mzlib/backends/json.py +++ b/implementations/python/mzlib/backends/json.py @@ -322,10 +322,10 @@ def _format_attributes(self, attributes_manager: Iterable) -> List: attributes = [] for attribute in attributes_manager: reformed_attribute = {} - if attribute.group_id is None: - key, value = attribute - else: - key, value, cv_param_group = attribute + key = attribute.key + value = attribute.value + if attribute.group_id is not None: + cv_param_group = attribute.group_id reformed_attribute['cv_param_group'] = cv_param_group term = None diff --git a/implementations/python/mzlib/backends/msp.py b/implementations/python/mzlib/backends/msp.py index 2900972..0f3704f 100644 --- a/implementations/python/mzlib/backends/msp.py +++ b/implementations/python/mzlib/backends/msp.py @@ -285,13 +285,13 @@ def add(self, handler: AttributeHandler): "precursor_charge": "MS:1000041|charge state", "precursorcharge": "MS:1000041|charge state", - "Parent": "MS:1000744|selected ion m/z", - "ObservedPrecursorMZ": "MS:1000744|selected ion m/z", - "PrecursorMZ": "MS:1000744|selected ion m/z", - "PRECURSORMZ": "MS:1000744|selected ion m/z", - "precursor": "MS:1000744|selected ion m/z", - "precursor_mass": "MS:1000744|selected ion m/z", - "precursormass": "MS:1000744|selected ion m/z", + "Parent": "MS:1003208|experimental precursor monoisotopic m/z", + "ObservedPrecursorMZ": "MS:1003208|experimental precursor monoisotopic m/z", + "PrecursorMZ": "MS:1003208|experimental precursor monoisotopic m/z", + "PRECURSORMZ": "MS:1003208|experimental precursor monoisotopic m/z", + "precursor": "MS:1003208|experimental precursor monoisotopic m/z", + "precursor_mass": "MS:1003208|experimental precursor monoisotopic m/z", + "precursormass": "MS:1003208|experimental precursor monoisotopic m/z", "Single": ["MS:1003065|spectrum aggregation type", "MS:1003066|singleton spectrum"], "Consensus": ["MS:1003065|spectrum aggregation type", "MS:1003067|consensus spectrum"], @@ -865,9 +865,9 @@ def _parse_header_from_stream(self, stream: io.IOBase) -> Tuple[bool, int]: attributes = AttributeManager() attributes.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION) if isinstance(self.filename, (str, os.PathLike)): - attributes.add_attribute(LIBRARY_NAME_TERM, self.filename) + attributes.add_attribute(LIBRARY_NAME_TERM, self.filename.rsplit('.msp', 1)[0].split(os.sep)[-1]) elif hasattr(stream, 'name'): - attributes.add_attribute(LIBRARY_NAME_TERM, stream.name) + attributes.add_attribute(LIBRARY_NAME_TERM, stream.name.rsplit('.msp', 1)[0].split(os.sep)[-1]) self.attributes.clear() self.attributes._from_iterable(attributes) if leader_terms_pattern.match(first_line): diff --git a/implementations/python/mzlib/backends/spectronaut.py b/implementations/python/mzlib/backends/spectronaut.py index 77551ab..c797bf5 100644 --- a/implementations/python/mzlib/backends/spectronaut.py +++ b/implementations/python/mzlib/backends/spectronaut.py @@ -1,4 +1,5 @@ import json +import os from typing import List, Tuple, Dict, Iterator, Any, Deque, Union @@ -6,7 +7,7 @@ from mzlib import annotation from mzlib.analyte import Analyte -from mzlib.backends.base import _CSVSpectralLibraryBackendBase, FORMAT_VERSION_TERM, DEFAULT_VERSION +from mzlib.backends.base import LIBRARY_NAME_TERM, _CSVSpectralLibraryBackendBase, FORMAT_VERSION_TERM, DEFAULT_VERSION from mzlib.backends.utils import open_stream from mzlib.spectrum import Spectrum, SPECTRUM_NAME @@ -95,6 +96,11 @@ def _spectrum_type(self): def read_header(self) -> bool: result = super().read_header() self.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION) + if hasattr(self.filename, 'name'): + name = self.filename.name.replace(".gz", '').rsplit('.', 1)[0].split(os.sep)[-1] + else: + name = self.filename.replace(".gz", '').rsplit(".", 1)[0].split(os.sep)[-1] + self.add_attribute(LIBRARY_NAME_TERM, name) self.add_attribute("MS:1003207|library creation software", "MS:1001327|Spectronaut") return result diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py index a197210..04bad89 100644 --- a/implementations/python/mzlib/backends/text.py +++ b/implementations/python/mzlib/backends/text.py @@ -427,6 +427,9 @@ def real_line_number_or_nothing(): line = line.strip() if not line: break + # Skip comments for now, no round-trip + if line.startswith("#"): + continue if state == STATES.header: if START_OF_SPECTRUM_MARKER.match(line): match = START_OF_SPECTRUM_MARKER.match(line) diff --git a/implementations/python/mzlib/backends/utils.py b/implementations/python/mzlib/backends/utils.py index dd2f127..3711684 100644 --- a/implementations/python/mzlib/backends/utils.py +++ b/implementations/python/mzlib/backends/utils.py @@ -20,10 +20,16 @@ pass -class LineBuffer(object): +class _LineBuffer(object): + """ + An implementation detail that treats a stream/iterator over line strings as LIFO + queue that can have lines pushed back onto it. + """ + lines: deque stream: io.IOBase last_line: str + _stream_is_file_like: bool def __init__(self, stream: io.IOBase, lines: Iterable=None, last_line: str=None): if lines is None: @@ -31,12 +37,13 @@ def __init__(self, stream: io.IOBase, lines: Iterable=None, last_line: str=None) self.lines = deque(lines) self.stream = stream self.last_line = last_line + self._stream_is_file_like = hasattr(self.stream, 'readline') def readline(self) -> Union[bytes, str]: if self.lines: line = self.lines.popleft() else: - line = self.stream.readline() + line = self.stream.readline() if self._stream_is_file_like else next(self.stream) self.last_line = line return line @@ -77,13 +84,15 @@ def try_cast(value: Any) -> Union[str, int, float, Any]: return value -def test_gzipped(f): - """Checks the first two bytes of the +def test_gzipped(f) -> bool: + """ + Checks the first two bytes of the passed file for gzip magic numbers Parameters ---------- f : file-like or path-like + The file to test Returns ------- @@ -149,6 +158,8 @@ def open_stream(f: Union[io.IOBase, os.PathLike], mode='rt', buffer_size: Option class CaseInsensitiveDict(Dict[str, Any]): + """A case sensitive version of a dictionary with string keys.""" + def __init__(self, base=None, **kwargs): if base is not None: self.update(base) diff --git a/implementations/python/mzlib/index/sql.py b/implementations/python/mzlib/index/sql.py index c7829e6..24cc10b 100644 --- a/implementations/python/mzlib/index/sql.py +++ b/implementations/python/mzlib/index/sql.py @@ -1,3 +1,4 @@ +import io import os import numbers import pathlib @@ -12,8 +13,9 @@ except ImportError: from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship +from sqlalchemy.engine import Engine from sqlalchemy import create_engine, func -from sqlalchemy.orm import sessionmaker +from sqlalchemy.orm import sessionmaker, scoped_session from .base import IndexBase @@ -61,6 +63,12 @@ def __repr__(self): class SQLIndex(IndexBase): extension = '.splindex' + filename: str + index_filename: str + _cache: SpectrumLibraryIndexRecord + session: scoped_session + engine: Engine + @classmethod def from_filename(cls, filename, library=None): if not isinstance(filename, (str, pathlib.Path)): @@ -77,7 +85,7 @@ def from_filename(cls, filename, library=None): return inst, exists @classmethod - def exists(cls, filename): + def exists(cls, filename: Union[str, pathlib.Path, io.FileIO]): if not isinstance(filename, (str, pathlib.Path)): if not hasattr(filename, "name"): raise TypeError(f"Could not coerce filename from {filename}") @@ -104,8 +112,7 @@ def connect(self, create=None): engine = create_engine("sqlite:///"+filename) Base.metadata.create_all(engine) - DBSession = sessionmaker(bind=engine) - session = DBSession() + session = scoped_session(sessionmaker(bind=engine)) self.session = session self.engine = engine self._cache = None diff --git a/implementations/python/mzlib/spectrum.py b/implementations/python/mzlib/spectrum.py index a554a07..9dd8097 100644 --- a/implementations/python/mzlib/spectrum.py +++ b/implementations/python/mzlib/spectrum.py @@ -69,7 +69,8 @@ def __init__(self, attributes=None, peak_list=None, analytes=None, key = AttributeManagedProperty[int](LIBRARY_SPECTRUM_KEY) index = AttributeManagedProperty[int](LIBRARY_SPECTRUM_INDEX) - precursor_mz = AttributeListManagedProperty[float]([PRECURSOR_MZ, "MS:1000744|selected ion m/z"]) + precursor_mz = AttributeListManagedProperty[float]( + [PRECURSOR_MZ, "MS:1003208|experimental precursor monoisotopic m/z"]) precursor_charge = AttributeManagedProperty[int](CHARGE_STATE) spectrum_aggregation = AttributeFacet[SpectrumAggregation](SpectrumAggregation) diff --git a/implementations/python/mzlib/spectrum_library.py b/implementations/python/mzlib/spectrum_library.py index 564c54b..2a9878f 100644 --- a/implementations/python/mzlib/spectrum_library.py +++ b/implementations/python/mzlib/spectrum_library.py @@ -2,12 +2,14 @@ import os import pathlib -from typing import Type, List, Union +from typing import Optional, Type, List, Union +from mzlib.attributes import AttributeManagedProperty +from mzlib.backends.base import LIBRARY_DESCRIPTION_TERM, LIBRARY_NAME_TERM, LIBRARY_URI_TERM, LIBRARY_VERSION_TERM from mzlib.cluster import SpectrumCluster from mzlib.spectrum_library_index import SpectrumLibraryIndex from mzlib.spectrum import Spectrum -from mzlib.index import MemoryIndex, SQLIndex, IndexBase +from mzlib.index import IndexBase from mzlib.backends import guess_implementation, SpectralLibraryBackendBase, SpectralLibraryWriterBase @@ -47,6 +49,10 @@ class SpectrumLibrary: format: str index_type: Type[IndexBase] + name = AttributeManagedProperty[str](LIBRARY_NAME_TERM) + description = AttributeManagedProperty[str](LIBRARY_DESCRIPTION_TERM) + uri = AttributeManagedProperty[str](LIBRARY_URI_TERM) + library_version = AttributeManagedProperty[str](LIBRARY_VERSION_TERM) def __init__(self, identifier=None, filename=None, format=None, index_type=None): """ @@ -86,6 +92,7 @@ def _init_from_filename(self, index_type: Type[IndexBase]=None): self.backend = backend_type( self.filename, index_type=index_type) self._format = self.backend.format_name + self._identifier = self.backend.identifier def _backend_initialized(self): return self.backend is not None @@ -97,15 +104,17 @@ def _requires_backend(self): #### Define getter/setter for attribute identifier @property - def identifier(self): + def identifier(self) -> Optional[str]: if self._identifier is None: if self._backend_initialized(): return self.backend.identifier return self._identifier @identifier.setter - def identifier(self, identifier): + def identifier(self, identifier: Optional[str]): self._identifier = identifier + if self.backend is not None: + self.backend.identifier = identifier #### Define getter/setter for attribute filename @property @@ -136,7 +145,8 @@ def attributes(self): return None def read_header(self) -> bool: - """Read just the header of the whole library + """ + Read just the header of the whole library Returns ------- diff --git a/implementations/python/mzlib/tools/cli.py b/implementations/python/mzlib/tools/cli.py index 31840d4..04ded01 100644 --- a/implementations/python/mzlib/tools/cli.py +++ b/implementations/python/mzlib/tools/cli.py @@ -14,6 +14,8 @@ from mzlib.validate.level import RequirementLevel from mzlib.ontology import ControlledVocabularyResolver +from mzlib.tools.utils import ColoringFormatter + CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) logger = logging.getLogger(__name__) @@ -46,6 +48,13 @@ def main(): format=format_string, datefmt="%H:%M:%S") + fmtr = ColoringFormatter(format_string, datefmt='%H:%M:%S') + + for handler in logging.getLogger().handlers: + handler.setFormatter( + fmtr + ) + @main.command("describe", short_help=("Produce a minimal textual description" " of a spectral library")) diff --git a/implementations/python/mzlib/tools/utils.py b/implementations/python/mzlib/tools/utils.py new file mode 100644 index 0000000..ff8d2d8 --- /dev/null +++ b/implementations/python/mzlib/tools/utils.py @@ -0,0 +1,65 @@ +import logging +import re +from typing import Dict + + +class LevelAwareColoredLogFormatter(logging.Formatter): + try: + from colorama import Fore, Style + # GREY = Fore.WHITE + GREY = '' + BLUE = Fore.BLUE + GREEN = Fore.GREEN + YELLOW = Fore.YELLOW + RED = Fore.RED + BRIGHT = Style.BRIGHT + DIM = Style.DIM + BOLD_RED = Fore.RED + Style.BRIGHT + RESET = Style.RESET_ALL + except ImportError: + GREY = '' + BLUE = '' + GREEN = '' + YELLOW = '' + RED = '' + BRIGHT = '' + DIM = '' + BOLD_RED = '' + RESET = '' + + def _colorize_field(self, fmt: str, field: str, color: str) -> str: + return re.sub("(" + field + ")", color + r"\1" + self.RESET, fmt) + + def _patch_fmt(self, fmt: str, level_color: str) -> str: + fmt = self._colorize_field(fmt, r"%\(asctime\)s", self.GREEN) + fmt = self._colorize_field(fmt, r"%\(name\).*?s", self.BLUE) + fmt = self._colorize_field(fmt, r"%\(message\).*?s", self.GREY) + if level_color: + fmt = self._colorize_field(fmt, r"%\(levelname\).*?s", level_color) + return fmt + + def __init__(self, fmt, level_color=None, **kwargs): + fmt = self._patch_fmt(fmt, level_color=level_color) + super().__init__(fmt, **kwargs) + + +class ColoringFormatter(logging.Formatter): + level_to_color = { + logging.INFO: LevelAwareColoredLogFormatter.GREEN, + logging.DEBUG: LevelAwareColoredLogFormatter.GREY + LevelAwareColoredLogFormatter.DIM, + logging.WARN: LevelAwareColoredLogFormatter.YELLOW + LevelAwareColoredLogFormatter.BRIGHT, + logging.ERROR: LevelAwareColoredLogFormatter.BOLD_RED, + logging.CRITICAL: LevelAwareColoredLogFormatter.BOLD_RED, + logging.FATAL: LevelAwareColoredLogFormatter.RED + LevelAwareColoredLogFormatter.DIM, + } + + _formatters: Dict[int, LevelAwareColoredLogFormatter] + + def __init__(self, fmt: str, **kwargs): + self._formatters = {} + for level, style in self.level_to_color.items(): + self._formatters[level] = LevelAwareColoredLogFormatter(fmt, level_color=style, **kwargs) + + def format(self, record: logging.LogRecord) -> str: + fmtr = self._formatters[record.levelno] + return fmtr.format(record) diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json index 33d4ff7..06888c4 100644 --- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json +++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json @@ -14,6 +14,7 @@ "value": "tests/test_data/chinese_hamster_hcd_selected_head.msp" } ], + "clusters": [], "format_version": "1.0", "interpretation_attribute_sets": { "all": [] @@ -119,8 +120,8 @@ "value": 2 }, { - "accession": "MS:1000744", - "name": "selected ion m/z", + "accession": "MS:1003208", + "name": "experimental precursor monoisotopic m/z", "value": 855.4538 }, { @@ -626,8 +627,8 @@ "value": 2 }, { - "accession": "MS:1000744", - "name": "selected ion m/z", + "accession": "MS:1003208", + "name": "experimental precursor monoisotopic m/z", "value": 855.4538 }, { @@ -1484,8 +1485,8 @@ "value": 2 }, { - "accession": "MS:1000744", - "name": "selected ion m/z", + "accession": "MS:1003208", + "name": "experimental precursor monoisotopic m/z", "value": 1207.1672 }, { @@ -2096,8 +2097,8 @@ "value": 2 }, { - "accession": "MS:1000744", - "name": "selected ion m/z", + "accession": "MS:1003208", + "name": "experimental precursor monoisotopic m/z", "value": 731.9043 }, { @@ -2675,8 +2676,8 @@ "value": 3 }, { - "accession": "MS:1000744", - "name": "selected ion m/z", + "accession": "MS:1003208", + "name": "experimental precursor monoisotopic m/z", "value": 488.2719 }, { @@ -3404,8 +3405,8 @@ "value": 2 }, { - "accession": "MS:1000744", - "name": "selected ion m/z", + "accession": "MS:1003208", + "name": "experimental precursor monoisotopic m/z", "value": 830.8834 }, { @@ -3854,8 +3855,8 @@ "value": 2 }, { - "accession": "MS:1000744", - "name": "selected ion m/z", + "accession": "MS:1003208", + "name": "experimental precursor monoisotopic m/z", "value": 830.8834 }, { diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt index 1599ab0..aaca838 100644 --- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt +++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt @@ -7,7 +7,7 @@ MS:1003188|library name=tests/test_data/chinese_hamster_hcd_selected_head.msp MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=855.4538 +MS:1003208|experimental precursor monoisotopic m/z=855.4538 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=46 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -138,7 +138,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=855.4538 +MS:1003208|experimental precursor monoisotopic m/z=855.4538 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=53 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -386,7 +386,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5 MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=1207.1672 +MS:1003208|experimental precursor monoisotopic m/z=1207.1672 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=76 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -552,7 +552,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=731.9043 +MS:1003208|experimental precursor monoisotopic m/z=731.9043 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=44 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -707,7 +707,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=3 -MS:1000744|selected ion m/z=488.2719 +MS:1003208|experimental precursor monoisotopic m/z=488.2719 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=28 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -912,7 +912,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=830.8834 +MS:1003208|experimental precursor monoisotopic m/z=830.8834 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=50 [1]UO:0000000|unit=UO:0000266|electronvolt @@ -1024,7 +1024,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum MS:1000041|charge state=2 -MS:1000744|selected ion m/z=830.8834 +MS:1003208|experimental precursor monoisotopic m/z=830.8834 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=52 [1]UO:0000000|unit=UO:0000266|electronvolt diff --git a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json index 8db18cf..8fe0aee 100644 --- a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json +++ b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json @@ -3,6 +3,7 @@ "all": [] }, "attributes": [], + "clusters": [], "format_version": "1.0", "interpretation_attribute_sets": { "all": [] From ecadc0a20cb0e568d47ef7bd2475bf614480cfba Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Mon, 15 May 2023 23:08:20 -0400 Subject: [PATCH 08/24] Add tests for Spectronaut and DIA-NN libraries --- .../python/mzlib/backends/diann.py | 2 +- implementations/python/mzlib/ontology_term.py | 6 +- .../mzlib/spectrum_library_collection.py | 1 + .../python/mzlib/spectrum_library_index.py | 2 + .../mzlib/universal_spectrum_identifier.py | 3 +- .../human_serum.head.spectronaut.tsv | 145 +++++++++++++++++ ...04_canonical_sall_pv_plasma.head.diann.tsv | 147 ++++++++++++++++++ .../python/tests/test_library_backend.py | 40 ++++- 8 files changed, 338 insertions(+), 8 deletions(-) create mode 100644 implementations/python/tests/test_data/human_serum.head.spectronaut.tsv create mode 100644 implementations/python/tests/test_data/phl004_canonical_sall_pv_plasma.head.diann.tsv diff --git a/implementations/python/mzlib/backends/diann.py b/implementations/python/mzlib/backends/diann.py index ea7a7aa..f5b6755 100644 --- a/implementations/python/mzlib/backends/diann.py +++ b/implementations/python/mzlib/backends/diann.py @@ -106,7 +106,7 @@ def create_index(self): self.index.add( number=n, offset=offset, - name=key.decode("utf8"), + name=key.decode("utf8") , analyte=None ) n += 1 diff --git a/implementations/python/mzlib/ontology_term.py b/implementations/python/mzlib/ontology_term.py index 37a5384..5a96750 100644 --- a/implementations/python/mzlib/ontology_term.py +++ b/implementations/python/mzlib/ontology_term.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 - +# pragma: no cover #from __future__ import print_function #import sys #def eprint(*args, **kwargs): @@ -58,7 +58,7 @@ def __init__(self, line_list=None, verbose=0): #### If we have been given an input line_list on construction, parse it right away if line_list is not None: self.parse(line_list=line_list) - + ######################################################################### #### parse the line_list @@ -400,7 +400,7 @@ def parse(self, line_list=None, verbose=0): else: self.is_valid = False logging.critical("Number of errors while parsing term '%s': %i", self.name, self.n_errors) - + if self.n_errors > 0 or len(self.unparsable_line_list) > 0: print("=====================") self.show() diff --git a/implementations/python/mzlib/spectrum_library_collection.py b/implementations/python/mzlib/spectrum_library_collection.py index 08f4da6..d6541aa 100644 --- a/implementations/python/mzlib/spectrum_library_collection.py +++ b/implementations/python/mzlib/spectrum_library_collection.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# pragma: no cover from __future__ import print_function import sys def eprint(*args, **kwargs): diff --git a/implementations/python/mzlib/spectrum_library_index.py b/implementations/python/mzlib/spectrum_library_index.py index 07cbe5d..665b95a 100644 --- a/implementations/python/mzlib/spectrum_library_index.py +++ b/implementations/python/mzlib/spectrum_library_index.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 + +# pragma: no cover from __future__ import print_function import sys def eprint(*args, **kwargs): diff --git a/implementations/python/mzlib/universal_spectrum_identifier.py b/implementations/python/mzlib/universal_spectrum_identifier.py index e146f56..bfddb44 100644 --- a/implementations/python/mzlib/universal_spectrum_identifier.py +++ b/implementations/python/mzlib/universal_spectrum_identifier.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# pragma: no cover from __future__ import print_function import sys def eprint(*args, **kwargs): @@ -32,7 +33,7 @@ def __init__(self, usi=None): if usi: self.parse(verbose=None) - + # Attributes: # usi diff --git a/implementations/python/tests/test_data/human_serum.head.spectronaut.tsv b/implementations/python/tests/test_data/human_serum.head.spectronaut.tsv new file mode 100644 index 0000000..55db2fb --- /dev/null +++ b/implementations/python/tests/test_data/human_serum.head.spectronaut.tsv @@ -0,0 +1,145 @@ +ReferenceRun PrecursorCharge Workflow IntModifiedPeptide CV AllowForNormalization ModifiedPeptide StrippedPeptide iRT IonMobility iRTSourceSpecific BGSInferenceId IsProteotypic IntLabeledPeptide LabeledPeptide PrecursorMz ReferenceRunQvalue ReferenceRunMS1Response FragmentLossType FragmentNumber FragmentType FragmentCharge FragmentMz RelativeIntensity ExcludeFromAssay Database ProteinGroups UniProtIds Protein Name ProteinDescription Organisms OrganismId Genes Protein Existence Sequence Version FASTAName +IK_221028_C19_lib2_01 2 _AQIPILR_ -60 TRUE _AQIPILR_ AQIPILR 28.658491 0.7629655 28.450514|28.82925 P04114 TRUE _AQIPILR_ _AQIPILR_ 405.7634379 0.006434474 11824944 noloss 3 b 1 313.1870317 3.564491 FALSE sp P04114 P04114 APOB_HUMAN Apolipoprotein B-100 Homo sapiens 9606 APOB 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AQIPILR_ -60 TRUE _AQIPILR_ AQIPILR 28.658491 0.7629655 28.450514|28.82925 P04114 TRUE _AQIPILR_ _AQIPILR_ 405.7634379 0.006434474 11824944 noloss 3 y 1 401.2870801 3.3441753 FALSE sp P04114 P04114 APOB_HUMAN Apolipoprotein B-100 Homo sapiens 9606 APOB 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AQIPILR_ -60 TRUE _AQIPILR_ AQIPILR 28.658491 0.7629655 28.450514|28.82925 P04114 TRUE _AQIPILR_ _AQIPILR_ 405.7634379 0.006434474 11824944 noloss 4 y 1 498.339844 100 FALSE sp P04114 P04114 APOB_HUMAN Apolipoprotein B-100 Homo sapiens 9606 APOB 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AQIPILR_ -60 TRUE _AQIPILR_ AQIPILR 28.658491 0.7629655 28.450514|28.82925 P04114 TRUE _AQIPILR_ _AQIPILR_ 405.7634379 0.006434474 11824944 noloss 4 y 2 249.6735602 11.528888 FALSE sp P04114 P04114 APOB_HUMAN Apolipoprotein B-100 Homo sapiens 9606 APOB 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AQIPILR_ -60 TRUE _AQIPILR_ AQIPILR 28.658491 0.7629655 28.450514|28.82925 P04114 TRUE _AQIPILR_ _AQIPILR_ 405.7634379 0.006434474 11824944 noloss 5 y 1 611.423908 36.853043 FALSE sp P04114 P04114 APOB_HUMAN Apolipoprotein B-100 Homo sapiens 9606 APOB 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AQIPILR_ -60 TRUE _AQIPILR_ AQIPILR 28.658491 0.7629655 28.450514|28.82925 P04114 TRUE _AQIPILR_ _AQIPILR_ 405.7634379 0.006434474 11824944 NH3 4 y 1 481.3132956 5.708535 FALSE sp P04114 P04114 APOB_HUMAN Apolipoprotein B-100 Homo sapiens 9606 APOB 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 noloss 3 b 1 228.1342679 48.582985 FALSE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 noloss 3 y 1 377.1779236 4.8015785 FALSE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 noloss 4 b 1 341.2183319 4.1744895 TRUE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 noloss 4 y 1 508.2184085 5.056169 FALSE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 noloss 5 y 1 565.2398722 69.96272 FALSE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 noloss 6 y 1 712.3082861 100 FALSE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 noloss 7 y 1 825.3923501 51.118713 FALSE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 noloss 8 y 1 924.460764 1.5956572 TRUE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 NH3 3 y 1 360.1513752 3.1122544 TRUE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 NH3 4 y 1 491.1918601 1.0637656 TRUE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 H2O 6 y 1 694.2977213 1.3498487 TRUE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 NH3 6 y 1 695.2817378 2.9444132 TRUE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _AGVLFGMSDR_ -40 TRUE _AGVLFGMSDR_ AGVLFGMSDR 46.12812 0.8644136 46.12812|44.301308 P09172 TRUE _AGVLFGMSDR_ _AGVLFGMSDR_ 526.763309 0.000140481 1200641 H2O 3 y 1 359.1673588 2.1920948 TRUE sp P09172 P09172 DOPO_HUMAN Dopamine beta-hydroxylase Homo sapiens 9606 DBH 1 3 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 noloss 3 b 1 360.1037556 14.370086 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 noloss 3 y 1 377.2030757 69.91263 FALSE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 noloss 4 y 1 490.2871397 47.727154 FALSE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 noloss 5 b 1 588.1896105 3.50105 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 noloss 5 y 1 603.3712037 20.282309 FALSE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 noloss 6 b 1 701.2736745 4.035067 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 noloss 6 y 1 717.4141311 24.80533 FALSE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 noloss 7 y 1 831.4570586 21.473965 FALSE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 noloss 8 y 1 946.4840016 100 FALSE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 H2O 3 b 1 342.0931908 5.760026 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 H2O 3 y 1 359.1925109 5.090061 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 H2O 4 y 1 472.2765749 4.874231 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 NH3 5 b 1 571.1630621 4.1113725 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 NH3 6 b 1 684.2471261 4.05031 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 NH3 6 y 1 700.3875827 3.518 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 NH3 7 y 1 814.4305102 7.3916 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 H2O 8 y 1 928.4734368 5.1134515 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 NH3 8 y 1 929.4574532 10.1338825 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _DEDNNLLTEK_ -40 TRUE _DEDNNLLTEK_ DEDNNLLTEK 13.850179 0.90230334 26.061718|13.850179 P09486 TRUE _DEDNNLLTEK_ _DEDNNLLTEK_ 595.7804071 0.000191886 313203.16 noloss 9 y 1 1075.526595 14.727533 TRUE sp P09486 P09486 SPRC_HUMAN SPARC Homo sapiens 9606 SPARC 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 noloss 3 b 1 343.1764671 3.8687427 TRUE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 noloss 3 y 1 347.2288965 6.1732 FALSE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 noloss 4 b 1 444.2241455 1.421 TRUE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 noloss 4 y 1 434.260925 9.774844 FALSE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 noloss 5 y 1 581.3293389 2.66143 TRUE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 noloss 6 y 1 638.3508026 19.109653 FALSE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 noloss 7 y 1 753.3777456 20.841564 FALSE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 noloss 8 y 1 854.4254241 60.931602 FALSE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 noloss 9 y 1 953.493838 36.67316 FALSE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 H2O 4 y 1 416.2503601 2.702636 TRUE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 H2O 7 y 1 735.3671808 2.1958628 TRUE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 H2O 8 y 1 836.4148593 4.374068 TRUE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 H2O 9 y 1 935.4832732 4.7364564 TRUE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _GWVTDGFSSLK_ -40 TRUE _GWVTDGFSSLK_ GWVTDGFSSLK 62.688942 0.92365193 62.87711|62.59508 P02656 TRUE _GWVTDGFSSLK_ _GWVTDGFSSLK_ 598.8009456 0.000448056 9233573 H2O 9 y 2 468.2452748 1.662614 TRUE sp P02656 P02656 APOC3_HUMAN Apolipoprotein C-III Homo sapiens 9606 APOC3 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 3 b 1 288.1553973 13.402388 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 3 y 1 374.2397956 35.40592 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 4 y 1 560.3191085 46.133404 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 5 y 1 675.3460516 49.92076 FALSE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 6 y 1 803.4046291 88.30317 FALSE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 7 y 1 916.488693 35.01724 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 7 y 2 458.7479848 8.279677 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 8 y 1 1003.520721 100 FALSE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 9 y 1 1104.5684 49.551434 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 H2O 3 b 1 270.1448324 95.16555 FALSE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 H2O 6 y 1 785.3940643 24.796286 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 H2O 6 y 2 393.2006704 13.1079035 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 NH3 6 y 1 786.3780807 46.45572 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 H2O 8 y 1 985.5101566 7.3205233 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 H2O 8 y 2 493.2587166 4.255348 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 H2O 9 y 1 1086.557835 7.42398 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 4 b 2 201.1233688 88.65463 FALSE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 4 b 1 401.2394612 4.14027 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 noloss 9 y 2 552.7878382 4.422511 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 H2O 4 b 1 383.2288964 12.326735 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 NH3 8 y 1 986.4941731 24.063145 TRUE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _VTSIQDWVQK_ -40 TRUE _VTSIQDWVQK_ VTSIQDWVQK 39.73143 0.9102522 39.839462|39.662376 P00738 TRUE _VTSIQDWVQK_ _VTSIQDWVQK_ 602.3220451 0.000163706 907266.44 NH3 3 y 1 357.2132472 60.34427 FALSE sp P00738 P00738 HPT_HUMAN Haptoglobin Homo sapiens 9606 HP 1 1 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 noloss 3 b 1 371.192511 3.1688333 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 noloss 3 y 1 347.2037444 10.465045 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 noloss 4 y 1 475.2623219 23.276728 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 noloss 5 y 1 604.304915 37.09764 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 noloss 6 y 1 675.3420288 64.2233 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 noloss 7 y 1 804.3846219 37.812637 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 noloss 8 y 1 891.4166503 96.74726 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 noloss 9 y 1 1004.500714 100 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 H2O 3 b 1 353.1819462 4.8028316 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 NH3 3 b 1 354.1659627 5.46505 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 NH3 4 y 1 458.2357735 1.8879279 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 NH3 6 y 1 658.3154804 1.120537 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 H2O 7 y 1 786.3740571 7.649042 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 H2O 8 y 1 873.4060855 4.228747 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _QELSEAEQATR_ -40 TRUE _QELSEAEQATR_ QELSEAEQATR -14.638051 0.92576075 -14.638051|-17.592121 P01024 TRUE _QELSEAEQATR_ _QELSEAEQATR_ 631.3045807 0 1612831.6 H2O 9 y 1 986.4901495 1.3095266 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _EEGTDLEVTANR_ -40 TRUE _EEGTDLEVTANR_ EEGTDLEVTANR 5.3526073 0.94296074 5.3526073|3.901706 P20742 TRUE _EEGTDLEVTANR_ _EEGTDLEVTANR_ 667.3151454 0.000139519 230759.89 noloss 3 y 1 360.1989934 20.52395 TRUE sp P20742 P20742 PZP_HUMAN Pregnancy zone protein Homo sapiens 9606 PZP 1 4 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _EEGTDLEVTANR_ -40 TRUE _EEGTDLEVTANR_ EEGTDLEVTANR 5.3526073 0.94296074 5.3526073|3.901706 P20742 TRUE _EEGTDLEVTANR_ _EEGTDLEVTANR_ 667.3151454 0.000139519 230759.89 noloss 4 y 1 461.2466719 85.759026 FALSE sp P20742 P20742 PZP_HUMAN Pregnancy zone protein Homo sapiens 9606 PZP 1 4 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _EEGTDLEVTANR_ -40 TRUE _EEGTDLEVTANR_ EEGTDLEVTANR 5.3526073 0.94296074 5.3526073|3.901706 P20742 TRUE _EEGTDLEVTANR_ _EEGTDLEVTANR_ 667.3151454 0.000139519 230759.89 noloss 5 y 1 560.3150858 82.32931 FALSE sp P20742 P20742 PZP_HUMAN Pregnancy zone protein Homo sapiens 9606 PZP 1 4 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _EEGTDLEVTANR_ -40 TRUE _EEGTDLEVTANR_ EEGTDLEVTANR 5.3526073 0.94296074 5.3526073|3.901706 P20742 TRUE _EEGTDLEVTANR_ _EEGTDLEVTANR_ 667.3151454 0.000139519 230759.89 noloss 6 y 1 689.3576789 100 FALSE sp P20742 P20742 PZP_HUMAN Pregnancy zone protein Homo sapiens 9606 PZP 1 4 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _EEGTDLEVTANR_ -40 TRUE _EEGTDLEVTANR_ EEGTDLEVTANR 5.3526073 0.94296074 5.3526073|3.901706 P20742 TRUE _EEGTDLEVTANR_ _EEGTDLEVTANR_ 667.3151454 0.000139519 230759.89 noloss 7 y 1 802.4417428 79.06382 FALSE sp P20742 P20742 PZP_HUMAN Pregnancy zone protein Homo sapiens 9606 PZP 1 4 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _EEGTDLEVTANR_ -40 TRUE _EEGTDLEVTANR_ EEGTDLEVTANR 5.3526073 0.94296074 5.3526073|3.901706 P20742 TRUE _EEGTDLEVTANR_ _EEGTDLEVTANR_ 667.3151454 0.000139519 230759.89 noloss 8 y 1 917.4686859 72.09271 FALSE sp P20742 P20742 PZP_HUMAN Pregnancy zone protein Homo sapiens 9606 PZP 1 4 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _EEGTDLEVTANR_ -40 TRUE _EEGTDLEVTANR_ EEGTDLEVTANR 5.3526073 0.94296074 5.3526073|3.901706 P20742 TRUE _EEGTDLEVTANR_ _EEGTDLEVTANR_ 667.3151454 0.000139519 230759.89 noloss 9 y 1 1018.516364 21.396385 TRUE sp P20742 P20742 PZP_HUMAN Pregnancy zone protein Homo sapiens 9606 PZP 1 4 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _EEGTDLEVTANR_ -40 TRUE _EEGTDLEVTANR_ EEGTDLEVTANR 5.3526073 0.94296074 5.3526073|3.901706 P20742 TRUE _EEGTDLEVTANR_ _EEGTDLEVTANR_ 667.3151454 0.000139519 230759.89 noloss 10 y 1 1075.537828 83.90465 FALSE sp P20742 P20742 PZP_HUMAN Pregnancy zone protein Homo sapiens 9606 PZP 1 4 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _EEGTDLEVTANR_ -40 TRUE _EEGTDLEVTANR_ EEGTDLEVTANR 5.3526073 0.94296074 5.3526073|3.901706 P20742 TRUE _EEGTDLEVTANR_ _EEGTDLEVTANR_ 667.3151454 0.000139519 230759.89 H2O 6 y 1 671.3471141 8.983159 TRUE sp P20742 P20742 PZP_HUMAN Pregnancy zone protein Homo sapiens 9606 PZP 1 4 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 noloss 3 b 1 419.1231111 14.944622 TRUE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 noloss 3 y 1 436.1972791 30.738253 FALSE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 noloss 4 b 1 534.1500542 9.713619 TRUE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 noloss 4 y 1 583.2656931 51.723152 FALSE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 noloss 5 y 1 712.3082861 50.78903 FALSE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 noloss 6 y 1 841.3508792 59.771255 FALSE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 noloss 7 y 1 956.3778223 75.695114 FALSE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 noloss 8 y 1 1085.420415 100 FALSE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 noloss 9 y 1 1214.463008 15.559877 TRUE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 H2O 3 b 1 401.1125463 9.39616 TRUE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 H2O 4 b 1 516.1394894 5.2136855 TRUE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 NH3 4 y 1 566.2391447 6.1200476 TRUE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 H2O 5 y 1 694.2977213 6.602107 TRUE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 H2O 6 y 1 823.3403144 10.601803 TRUE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 H2O 7 y 1 938.3672575 6.748549 TRUE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 H2O 8 y 1 1067.409851 26.568663 TRUE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _C[+57]EEDEEFTC[+57]R_ -40 TRUE _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ CEEDEEFTCR -1.2823446 0.9011946 -1.2823446|-4.0177083 P00747 TRUE _C[+57]EEDEEFTC[+57]R_ _C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_ 687.7504667 0.00036315 385101.16 H2O 9 y 1 1196.452444 10.719879 TRUE sp P00747 P00747 PLMN_HUMAN Plasminogen Homo sapiens 9606 PLG 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 3 b 1 386.2034101 7.7662497 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 3 y 1 347.2037444 3.2926106 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 4 b 1 499.2874741 6.734219 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 4 y 1 475.2623219 14.221682 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 5 b 1 586.3195025 1.5547751 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 5 y 1 604.304915 12.158777 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 6 b 1 715.3620955 3.230758 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 6 y 1 675.3420288 25.758886 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 7 b 1 786.3992093 1.9753934 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 7 y 1 804.3846219 22.087849 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 8 b 1 915.4418024 1.8401791 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 8 b 2 458.2245394 1.6086894 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 8 y 1 891.4166503 74.71736 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 9 y 1 1004.500714 54.28254 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 10 b 1 1114.537494 1.0495659 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 10 y 1 1133.543307 100 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 11 y 1 1261.601885 24.588951 FALSE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 noloss 11 y 2 631.3045807 2.1570513 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 H2O 3 b 1 368.1928453 3.4925315 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 NH3 3 b 1 369.1768617 2.9452903 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 H2O 4 b 1 481.2769092 1.0290943 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 NH3 4 y 1 458.2357735 1.8126523 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 H2O 8 y 1 873.4060855 2.0101676 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 H2O 10 y 1 1115.532743 6.641937 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 H2O 11 y 2 622.2992983 6.5424037 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 H2O 11 y 1 1243.59132 3.3419182 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 NH3 11 y 1 1244.575336 10.56645 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 NH3 4 b 1 482.2609257 1.1637812 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 +IK_221028_C19_lib2_01 2 _KQELSEAEQATR_ -40 TRUE _KQELSEAEQATR_ KQELSEAEQATR -26.474043 0.9663669 -26.300653|-26.58661 P01024 TRUE _KQELSEAEQATR_ _KQELSEAEQATR_ 695.3520622 0.000291637 1674753 NH3 3 y 1 330.177196 1.6216956 TRUE sp P01024 P01024 CO3_HUMAN Complement C3 Homo sapiens 9606 C3 1 2 H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77 diff --git a/implementations/python/tests/test_data/phl004_canonical_sall_pv_plasma.head.diann.tsv b/implementations/python/tests/test_data/phl004_canonical_sall_pv_plasma.head.diann.tsv new file mode 100644 index 0000000..ea42718 --- /dev/null +++ b/implementations/python/tests/test_data/phl004_canonical_sall_pv_plasma.head.diann.tsv @@ -0,0 +1,147 @@ +FileName PrecursorMz ProductMz Tr_recalibrated IonMobility transition_name LibraryIntensity transition_group_id decoy PeptideSequence Proteotypic QValue PGQValue Ms1ProfileCorr ProteinGroup ProteinName Genes FullUniModPeptideName ModifiedPeptide PrecursorCharge PeptideGroupLabel UniprotID NTerm CTerm FragmentType FragmentCharge FragmentSeriesNumber FragmentLossType ExcludeFromAssay +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 427.22995 117.9 0 AAAAAAAAAAAAAAAASAGGK2_98_1_0_6 1 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 498.26706 117.9 0 AAAAAAAAAAAAAAAASAGGK2_98_1_0_7 0.88039231 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 569.3042 117.9 0 AAAAAAAAAAAAAAAASAGGK2_98_1_0_8 0.66275221 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 356.19284 117.9 0 AAAAAAAAAAAAAAAASAGGK2_98_1_0_5 0.58039355 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 640.34131 117.9 0 AAAAAAAAAAAAAAAASAGGK2_98_1_0_9 0.44118175 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 703.37335 117.9 0 AAAAAAAAAAAAAAAASAGGK2_121_1_0_12 0.40000239 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 916.48468 117.9 0 AAAAAAAAAAAAAAAASAGGK2_121_1_0_9 0.40000239 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 12 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 1129.5961 117.9 0 AAAAAAAAAAAAAAAASAGGK2_121_1_0_6 0.36078224 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 15 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 774.41046 117.9 0 AAAAAAAAAAAAAAAASAGGK2_121_1_0_11 0.34117815 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 845.44757 117.9 0 AAAAAAAAAAAAAAAASAGGK2_121_1_0_10 0.30195799 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 490.26199 117.9 0 AAAAAAAAAAAAAAAASAGGK2_121_1_0_15 0.301357 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 419.22488 117.9 0 AAAAAAAAAAAAAAAASAGGK2_121_1_0_16 0.29999879 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 1058.5588 117.9 0 AAAAAAAAAAAAAAAASAGGK2_121_1_0_7 0.25881943 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 14 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 632.33624 117.9 0 AAAAAAAAAAAAAAAASAGGK2_121_1_0_13 0.2572208 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 853.45264 117.9 0 AAAAAAAAAAAAAAAASAGGK2_98_1_0_12 0.18174933 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 12 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 987.52179 117.9 0 AAAAAAAAAAAAAAAASAGGK2_121_1_0_8 0.17647269 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 13 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 782.41553 117.9 0 AAAAAAAAAAAAAAAASAGGK2_98_1_0_11 0.13921174 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 924.48975 117.9 0 AAAAAAAAAAAAAAAASAGGK2_98_1_0_13 0.13725254 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 13 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 711.37842 117.9 0 AAAAAAAAAAAAAAAASAGGK2_98_1_0_10 0.11764847 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 778.41296 1200.6332 117.9 0 AAAAAAAAAAAAAAAASAGGK2_121_1_0_5 0.10526822 AAAAAAAAAAAAAAAASAGGK2 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 2 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 16 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 703.37335 117.4 0 AAAAAAAAAAAAAAAASAGGK3_121_1_0_12 1 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 419.22488 117.4 0 AAAAAAAAAAAAAAAASAGGK3_121_1_0_16 0.75598669 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 498.26706 117.4 0 AAAAAAAAAAAAAAAASAGGK3_98_1_0_7 0.67160469 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 640.34131 117.4 0 AAAAAAAAAAAAAAAASAGGK3_98_1_0_9 0.61734796 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 569.3042 117.4 0 AAAAAAAAAAAAAAAASAGGK3_98_1_0_8 0.56162828 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 356.19284 117.4 0 AAAAAAAAAAAAAAAASAGGK3_98_1_0_5 0.51522595 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 427.22995 117.4 0 AAAAAAAAAAAAAAAASAGGK3_98_1_0_6 0.51522595 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 774.41046 117.4 0 AAAAAAAAAAAAAAAASAGGK3_121_1_0_11 0.48234525 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 561.29907 117.4 0 AAAAAAAAAAAAAAAASAGGK3_121_1_0_14 0.41774848 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 490.26199 117.4 0 AAAAAAAAAAAAAAAASAGGK3_121_1_0_15 0.37849048 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 648.33875 117.4 0 AAAAAAAAAAAAAAAASAGGK3_98_2_0_18 0.37133196 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 2 18 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 632.33624 117.4 0 AAAAAAAAAAAAAAAASAGGK3_121_1_0_13 0.32955998 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 y 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 711.37842 117.4 0 AAAAAAAAAAAAAAAASAGGK3_98_1_0_10 0.27849901 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 519.27777 782.41553 117.4 0 AAAAAAAAAAAAAAAASAGGK3_98_1_0_11 0.23208249 AAAAAAAAAAAAAAAASAGGK3 0 AAAAAAAAAAAAAAAASAGGK 1 0 0 0 P0CG40 SP9_HUMAN SP9 AAAAAAAAAAAAAAAASAGGK AAAAAAAAAAAAAAAASAGGK 3 AAAAAAAAAAAAAAAASAGGK P0CG40 0 0 b 1 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 427.22995 110 0 AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_6 1 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 356.19284 110 0 AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_5 0.91602999 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 498.26706 110 0 AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_7 0.83227003 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 569.3042 110 0 AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_8 0.79017997 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 640.34131 110 0 AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_9 0.59228998 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 815.43701 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_11 0.54980999 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 886.47412 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_10 0.51920998 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 12 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 531.28851 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_15 0.50638998 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 957.51123 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_9 0.49983999 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 13 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 673.36279 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_13 0.46117997 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 1028.5483 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_8 0.44073999 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 14 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 711.37842 110 0 AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_10 0.42937002 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 744.3999 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_12 0.41159999 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 602.32562 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_14 0.40491998 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 782.41553 110 0 AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_11 0.38191 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 403.22995 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_17 0.36974999 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 1099.5854 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_7 0.34740001 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 15 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 1170.6226 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_6 0.31896001 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 16 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 853.45264 110 0 AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_12 0.27395001 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 12 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 474.26706 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_16 0.26207 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 995.52686 110 0 AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_14 0.20731001 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 14 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 1241.6597 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_5 0.2052 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 17 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 924.48975 110 0 AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_13 0.18191999 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 13 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 798.92627 1312.6968 110 0 AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_4 0.114 AAAAAAAAAAAAAAAGAGAGAK2 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 2 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 18 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 531.28851 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_15 1 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 498.26706 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_7 0.82642001 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 427.22995 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_6 0.75252002 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 356.19284 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_5 0.71425998 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 602.32562 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_14 0.66922998 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 569.3042 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_8 0.56159002 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 673.36279 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_13 0.52272004 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 640.34131 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_9 0.47526002 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 711.37842 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_10 0.40158999 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 744.3999 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_12 0.39754999 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 403.22995 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_17 0.31963 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 462.7485 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_98_2_0_13 0.27653 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 2 13 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 782.41553 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_11 0.27631998 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 815.43701 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_11 0.24988998 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 474.26706 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_16 0.24581002 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 391.7114 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_98_2_0_11 0.18824001 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 2 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 853.45264 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_12 0.17081 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 12 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 886.47412 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_10 0.16329999 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 12 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 924.48975 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_13 0.10528 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 b 1 13 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 532.95325 957.51123 109.8 0 AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_9 0.091109999 AAAAAAAAAAAAAAAGAGAGAK3 0 AAAAAAAAAAAAAAAGAGAGAK 1 0 0 0 P55011 S12A2_HUMAN SLC12A2 AAAAAAAAAAAAAAAGAGAGAK AAAAAAAAAAAAAAAGAGAGAK 3 AAAAAAAAAAAAAAAGAGAGAK P55011 0 0 y 1 13 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 646.31549 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_121_1_0_20 1 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 y 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 880.41589 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_121_1_0_18 0.72346348 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 y 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 498.26706 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_7 0.66272509 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 569.3042 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_8 0.60243261 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 356.19284 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_5 0.58793229 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 809.37878 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_121_1_0_19 0.48742947 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 y 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 711.37842 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_10 0.48370829 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 427.22995 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_6 0.48304707 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 1027.4844 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_121_1_0_17 0.38692665 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 y 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 853.45264 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_12 0.32992482 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 12 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 1171.5378 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_121_1_0_15 0.31720817 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 y 1 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 782.41553 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_11 0.21106207 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 746.36969 640.34131 151.3 0 AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_9 0.21104671 AAAAAAAAAAAAAAASGFAYPGTSER3 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 3 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 560.02911 646.31549 151.10001 0 AAAAAAAAAAAAAAASGFAYPGTSER4_121_1_0_20 1 AAAAAAAAAAAAAAASGFAYPGTSER4 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 4 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 y 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 560.02911 356.19284 151.10001 0 AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_5 0.66913706 AAAAAAAAAAAAAAASGFAYPGTSER4 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 4 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 560.02911 492.24124 151.10001 0 AAAAAAAAAAAAAAASGFAYPGTSER4_121_1_0_22 0.59304374 AAAAAAAAAAAAAAASGFAYPGTSER4 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 4 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 y 1 4 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 560.02911 498.26706 151.10001 0 AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_7 0.4210082 AAAAAAAAAAAAAAASGFAYPGTSER4 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 4 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 560.02911 809.37878 151.10001 0 AAAAAAAAAAAAAAASGFAYPGTSER4_121_1_0_19 0.4210082 AAAAAAAAAAAAAAASGFAYPGTSER4 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 4 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 y 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 560.02911 569.3042 151.10001 0 AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_8 0.36934987 AAAAAAAAAAAAAAASGFAYPGTSER4 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 4 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 560.02911 711.37842 151.10001 0 AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_10 0.344071 AAAAAAAAAAAAAAASGFAYPGTSER4 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 4 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 560.02911 514.24579 151.10001 0 AAAAAAAAAAAAAAASGFAYPGTSER4_121_2_0_17 0.33137658 AAAAAAAAAAAAAAASGFAYPGTSER4 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 4 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 y 2 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 560.02911 427.22995 151.10001 0 AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_6 0.33042264 AAAAAAAAAAAAAAASGFAYPGTSER4 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 4 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 560.02911 640.34131 151.10001 0 AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_9 0.31578368 AAAAAAAAAAAAAAASGFAYPGTSER4 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 4 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 b 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 560.02911 586.27252 151.10001 0 AAAAAAAAAAAAAAASGFAYPGTSER4_121_2_0_15 0.27729672 AAAAAAAAAAAAAAASGFAYPGTSER4 0 AAAAAAAAAAAAAAASGFAYPGTSER 1 0 0 0 P35453 HXD13_HUMAN HOXD13 AAAAAAAAAAAAAAASGFAYPGTSER AAAAAAAAAAAAAAASGFAYPGTSER 4 AAAAAAAAAAAAAAASGFAYPGTSER P35453 0 0 y 2 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 429.24561 573.33551 88.5 0 AAAAAAAAAAK2_121_1_0_4 1 AAAAAAAAAAK2 0 AAAAAAAAAAK 1 0 0 0 P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 AAAAAAAAAAK AAAAAAAAAAK 2 AAAAAAAAAAK P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 0 0 y 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 429.24561 502.29837 88.5 0 AAAAAAAAAAK2_121_1_0_5 0.84049195 AAAAAAAAAAK2 0 AAAAAAAAAAK 1 0 0 0 P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 AAAAAAAAAAK AAAAAAAAAAK 2 AAAAAAAAAAK P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 0 0 y 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 429.24561 644.37262 88.5 0 AAAAAAAAAAK2_121_1_0_3 0.62802154 AAAAAAAAAAK2 0 AAAAAAAAAAK 1 0 0 0 P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 AAAAAAAAAAK AAAAAAAAAAK 2 AAAAAAAAAAK P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 0 0 y 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 429.24561 715.40973 88.5 0 AAAAAAAAAAK2_121_1_0_2 0.53955686 AAAAAAAAAAK2 0 AAAAAAAAAAK 1 0 0 0 P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 AAAAAAAAAAK AAAAAAAAAAK 2 AAAAAAAAAAK P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 0 0 y 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 429.24561 356.19284 88.5 0 AAAAAAAAAAK2_98_1_0_5 0.52399796 AAAAAAAAAAK2 0 AAAAAAAAAAK 1 0 0 0 P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 AAAAAAAAAAK AAAAAAAAAAK 2 AAAAAAAAAAK P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 0 0 b 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 429.24561 431.26126 88.5 0 AAAAAAAAAAK2_121_1_0_6 0.46901798 AAAAAAAAAAK2 0 AAAAAAAAAAK 1 0 0 0 P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 AAAAAAAAAAK AAAAAAAAAAK 2 AAAAAAAAAAK P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 0 0 y 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 429.24561 427.22995 88.5 0 AAAAAAAAAAK2_98_1_0_6 0.32801768 AAAAAAAAAAK2 0 AAAAAAAAAAK 1 0 0 0 P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 AAAAAAAAAAK AAAAAAAAAAK 2 AAAAAAAAAAK P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 0 0 b 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 429.24561 360.22415 88.5 0 AAAAAAAAAAK2_121_1_0_7 0.30400032 AAAAAAAAAAK2 0 AAAAAAAAAAK 1 0 0 0 P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 AAAAAAAAAAK AAAAAAAAAAK 2 AAAAAAAAAAK P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 0 0 y 1 4 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 429.24561 498.26706 88.5 0 AAAAAAAAAAK2_98_1_0_7 0.22399409 AAAAAAAAAAK2 0 AAAAAAAAAAK 1 0 0 0 P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 AAAAAAAAAAK AAAAAAAAAAK 2 AAAAAAAAAAK P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 0 0 b 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 429.24561 786.44684 88.5 0 AAAAAAAAAAK2_121_1_0_1 0.19198385 AAAAAAAAAAK2 0 AAAAAAAAAAK 1 0 0 0 P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 AAAAAAAAAAK AAAAAAAAAAK 2 AAAAAAAAAAK P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341 0 0 y 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 443.24869 601.34161 38.400002 0 AAAAAAAAAAR2_121_1_0_4 1 AAAAAAAAAAR2 0 AAAAAAAAAAR 1 0 0 0 P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 AAAAAAAAAAR AAAAAAAAAAR 2 AAAAAAAAAAR P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 0 0 y 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 443.24869 530.3045 38.400002 0 AAAAAAAAAAR2_121_1_0_5 0.90602791 AAAAAAAAAAR2 0 AAAAAAAAAAR 1 0 0 0 P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 AAAAAAAAAAR AAAAAAAAAAR 2 AAAAAAAAAAR P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 0 0 y 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 443.24869 672.37872 38.400002 0 AAAAAAAAAAR2_121_1_0_3 0.83756346 AAAAAAAAAAR2 0 AAAAAAAAAAR 1 0 0 0 P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 AAAAAAAAAAR AAAAAAAAAAR 2 AAAAAAAAAAR P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 0 0 y 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 443.24869 356.19284 38.400002 0 AAAAAAAAAAR2_98_1_0_5 0.69194162 AAAAAAAAAAR2 0 AAAAAAAAAAR 1 0 0 0 P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 AAAAAAAAAAR AAAAAAAAAAR 2 AAAAAAAAAAR P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 0 0 b 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 443.24869 388.23029 38.400002 0 AAAAAAAAAAR2_121_1_0_7 0.52842641 AAAAAAAAAAR2 0 AAAAAAAAAAR 1 0 0 0 P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 AAAAAAAAAAR AAAAAAAAAAR 2 AAAAAAAAAAR P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 0 0 y 1 4 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 443.24869 459.2674 38.400002 0 AAAAAAAAAAR2_121_1_0_6 0.47519037 AAAAAAAAAAR2 0 AAAAAAAAAAR 1 0 0 0 P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 AAAAAAAAAAR AAAAAAAAAAR 2 AAAAAAAAAAR P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 0 0 y 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 443.24869 743.41583 38.400002 0 AAAAAAAAAAR2_121_1_0_2 0.39619291 AAAAAAAAAAR2 0 AAAAAAAAAAR 1 0 0 0 P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 AAAAAAAAAAR AAAAAAAAAAR 2 AAAAAAAAAAR P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 0 0 y 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 443.24869 427.22995 38.400002 0 AAAAAAAAAAR2_98_1_0_6 0.36180204 AAAAAAAAAAR2 0 AAAAAAAAAAR 1 0 0 0 P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 AAAAAAAAAAR AAAAAAAAAAR 2 AAAAAAAAAAR P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 0 0 b 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 443.24869 569.3042 38.400002 0 AAAAAAAAAAR2_98_1_0_8 0.20031726 AAAAAAAAAAR2 0 AAAAAAAAAAR 1 0 0 0 P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 AAAAAAAAAAR AAAAAAAAAAR 2 AAAAAAAAAAR P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40 0 0 b 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 789.49811 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_15 1 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 369.22449 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_19 0.58921003 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 3 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 427.22995 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_98_1_0_6 0.31046999 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 b 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 498.26706 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_98_1_0_7 0.30821002 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 b 1 7 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 569.3042 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_98_1_0_8 0.26800001 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 b 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 356.19284 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_98_1_0_5 0.25783998 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 b 1 5 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 640.34131 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_98_1_0_9 0.20063001 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 b 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 1101.6779 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_11 0.15594999 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 1188.71 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_10 0.15020999 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 12 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 902.58221 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_14 0.13153 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 8 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 1259.7471 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_9 0.1284 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 13 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 973.61932 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_13 0.11756 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 9 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 997.50616 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_98_1_0_14 0.11383001 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 b 1 14 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 711.37842 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_98_1_0_10 0.10726 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 b 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 1330.7842 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_8 0.10523 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 14 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 1110.5902 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_98_1_0_15 0.10514 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 b 1 15 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 926.46899 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_98_1_0_13 0.10461 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 b 1 13 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 1401.8213 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_7 0.080120005 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 15 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 855.43188 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_98_1_0_12 0.074140005 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 b 1 12 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 1044.6564 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_12 0.072379999 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 10 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 482.30853 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_18 0.067829996 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 4 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 798.41046 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_98_1_0_11 0.05923 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 b 1 11 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 1472.8584 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_6 0.055300001 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 16 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 692.44537 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_16 0.044360001 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 6 noloss False +/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv 950.04419 1543.8955 132.2 0 AAAAAAAAAASGAAIPPLIPPR2_121_1_0_5 0.036620002 AAAAAAAAAASGAAIPPLIPPR2 0 AAAAAAAAAASGAAIPPLIPPR 1 0 0 0 O14654 IRS4_HUMAN IRS4 AAAAAAAAAASGAAIPPLIPPR AAAAAAAAAASGAAIPPLIPPR 2 AAAAAAAAAASGAAIPPLIPPR O14654 0 0 y 1 17 noloss False diff --git a/implementations/python/tests/test_library_backend.py b/implementations/python/tests/test_library_backend.py index 456c7b7..1560066 100644 --- a/implementations/python/tests/test_library_backend.py +++ b/implementations/python/tests/test_library_backend.py @@ -1,9 +1,9 @@ -from mzlib.spectrum import Spectrum +import math import os import unittest -import tempfile -from mzlib.backends import MSPSpectralLibrary, TextSpectralLibrary, JSONSpectralLibrary +from mzlib.spectrum import Spectrum +from mzlib.backends import (MSPSpectralLibrary, TextSpectralLibrary, JSONSpectralLibrary, SpectronautTSVSpectralLibrary, DIANNTSVSpectralLibrary) from mzlib.analyte import ANALYTE_MIXTURE_TERM from .common import datafile @@ -62,3 +62,37 @@ class TestJSONLibrary(unittest.TestCase, MzSpecLibLibraryBehaviorBase): test_file = datafile("chinese_hamster_hcd_selected_head.mzlb.json") library_cls = JSONSpectralLibrary test_interpretation_file = datafile("complex_interpretations_with_members.mzlb.json") + + +class TestSpectronautLibrary(unittest.TestCase, LibraryBehaviorBase): + test_file = datafile("human_serum.head.spectronaut.tsv") + library_cls = SpectronautTSVSpectralLibrary + + def test_sequence_behavior(self): + lib = self._open_library() + assert len(lib) == 10 + + spec: Spectrum = lib[0] + assert spec.name == 'AQIPILR/2' + assert math.isclose(spec.precursor_mz, 405.7634379) + assert spec.precursor_charge == 2 + + spec = lib[5] + assert spec.name == 'QELSEAEQATR/2' + assert math.isclose(spec.precursor_mz, 631.3045807) + assert spec.get_analyte(1).proteins[0].name == 'CO3_HUMAN' + + +class TestDIANNTSVLibrary(unittest.TestCase, LibraryBehaviorBase): + test_file = datafile("phl004_canonical_sall_pv_plasma.head.diann.tsv") + library_cls = DIANNTSVSpectralLibrary + + def test_sequence_behavior(self): + lib = self._open_library() + assert len(lib) == 10 + + spec: Spectrum = lib[0] + analyte = spec.get_analyte(1) + assert analyte.peptide == 'AAAAAAAAAAAAAAAASAGGK' + assert spec.name == 'AAAAAAAAAAAAAAAASAGGK2' + From a59847a79e81c18744bcfcd816851960246500e1 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Mon, 15 May 2023 23:19:15 -0400 Subject: [PATCH 09/24] Fix cluster indexing --- implementations/python/mzlib/backends/text.py | 2 +- implementations/python/mzlib/index/memory.py | 25 +++++++++++++++---- implementations/python/tests/test_cluster.py | 21 ++++++++++++++++ 3 files changed, 42 insertions(+), 6 deletions(-) create mode 100644 implementations/python/tests/test_cluster.py diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py index 04bad89..e0e0317 100644 --- a/implementations/python/mzlib/backends/text.py +++ b/implementations/python/mzlib/backends/text.py @@ -286,7 +286,7 @@ def create_index(self) -> int: logger.info( f"Processed {file_offset} bytes, {n_spectra} spectra read, {n_clusters} read") else: - self.index.add_cluster(number=n_clusters, offset=spectrum_file_offset) + self.index.add_cluster(number=current_key, offset=spectrum_file_offset) if n_clusters % 10000 == 0: self.index.commit() logger.info( diff --git a/implementations/python/mzlib/index/memory.py b/implementations/python/mzlib/index/memory.py index e401a62..a4b0d3f 100644 --- a/implementations/python/mzlib/index/memory.py +++ b/implementations/python/mzlib/index/memory.py @@ -158,6 +158,7 @@ class MemoryIndex(IndexBase): _dirty: bool _by_key: Dict[int, IndexRecord] + _by_key_cluster: Dict[int, ClusterIndexRecord] _by_name: DefaultDict[str, List[IndexRecord]] _by_attr: DefaultDict[str, DefaultDict[Any, List[IndexRecord]]] @@ -171,6 +172,7 @@ def __init__(self, records=None, cluster_records=None, metadata=None): self.cluster_records = list(cluster_records or []) self._by_name = defaultdict(list) self._by_key = {} + self._by_key_cluster = {} self._by_attr = defaultdict(lambda: defaultdict(list)) self.metadata = metadata or {} self._dirty = True @@ -217,16 +219,22 @@ def search(self, i=None, **kwargs): def search_clusters(self, i=None, **kwargs): if self._dirty: self._update_index() + if i is None and kwargs: # Executing attribute query raise NotImplementedError() + if isinstance(i, Integral): - try: - return self.cluster_records[i] - except IndexError as err: - raise KeyError(i) from err + return self._by_key_cluster[i] + elif isinstance(i, slice): - return self.cluster_records[i] + start = i.start + stop = i.stop + if start is None: + start = min(self._by_key_cluster) if self._by_key_cluster else 0 + if stop is None: + stop = max(self._by_key_cluster) if self._by_key_cluster else 0 + return [self._by_key_cluster[i] for i in range(start, stop) if i in self._by_key_cluster] def __getitem__(self, i): return self._get_by_index(i) @@ -237,10 +245,17 @@ def _get_by_index(self, i: Union[int, slice]) -> Union[IndexRecord, List[IndexRe def _update_index(self): self.records.sort(key=lambda x: x.number) + self._by_key.clear() self._by_name = defaultdict(list) for record in self: self._by_key[record.number] = record self._by_name[record.name].append(record) + + self.cluster_records.sort(key=lambda x: x.number) + self._by_key_cluster.clear() + for record in self.cluster_records: + self._by_key_cluster[record.number] = record + self._dirty = False def add(self, number: int, offset: int, name: str, analyte: Any, attributes=None): diff --git a/implementations/python/tests/test_cluster.py b/implementations/python/tests/test_cluster.py new file mode 100644 index 0000000..a4a3047 --- /dev/null +++ b/implementations/python/tests/test_cluster.py @@ -0,0 +1,21 @@ +import os +import unittest + +from mzlib.backends import TextSpectralLibrary +from mzlib.cluster import SpectrumCluster + +from .common import datafile + + +class TestSpectrumCluster(unittest.TestCase): + + def get_library(self): + test_file = datafile("clusters_example.mzlb") + return TextSpectralLibrary(test_file) + + def test_text_cluster_parsing(self): + lib = self.get_library() + cluster: SpectrumCluster = lib.get_cluster(1) + + assert cluster.key == 1 + assert cluster.size == 6 \ No newline at end of file From 890d8faf07f394523994e4d0011fd1c5fece4861 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Thu, 25 May 2023 22:20:06 -0400 Subject: [PATCH 10/24] Make enums internal --- implementations/python/mzlib/backends/text.py | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py index e0e0317..d9873ff 100644 --- a/implementations/python/mzlib/backends/text.py +++ b/implementations/python/mzlib/backends/text.py @@ -36,7 +36,7 @@ r"^\d+(.\d+)?") -class SpectrumParserStateEnum(enum.Enum): +class _SpectrumParserStateEnum(enum.Enum): unknown = 0 header = 1 analyte = 2 @@ -47,7 +47,7 @@ class SpectrumParserStateEnum(enum.Enum): cluster = 7 -class LibraryParserStateEnum(enum.Enum): +class _LibraryParserStateEnum(enum.Enum): unknown = 0 header = 1 attribute_sets = 2 @@ -71,7 +71,8 @@ class LibraryParserStateEnum(enum.Enum): attribute_set_types = { "spectrum": AttributeSetTypes.spectrum, "analyte": AttributeSetTypes.analyte, - "interpretation": AttributeSetTypes.interpretation + "interpretation": AttributeSetTypes.interpretation, + "cluster": AttributeSetTypes.cluster, } @@ -104,7 +105,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]: first_line = stream.readline() nbytes += len(first_line) - state = LibraryParserStateEnum.unknown + state = _LibraryParserStateEnum.unknown current_attribute_set = None current_attribute_set_type = None @@ -112,7 +113,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]: if not _is_header_line(first_line): return True, 0 elif START_OF_LIBRARY_MARKER.match(first_line): - state = LibraryParserStateEnum.header + state = _LibraryParserStateEnum.header match = START_OF_LIBRARY_MARKER.match(first_line) version = match.group(1) attributes = AttributeManager() @@ -126,7 +127,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]: continue match = START_OF_ATTRIBUTE_SET.match(line) if match: - state = LibraryParserStateEnum.attribute_sets + state = _LibraryParserStateEnum.attribute_sets if current_attribute_set is not None: self._add_attribute_set( current_attribute_set, current_attribute_set_type) @@ -142,7 +143,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]: if match is not None: d = match.groupdict() # If we're in an attribute set, store it in the attribute set - if state == LibraryParserStateEnum.attribute_sets: + if state == _LibraryParserStateEnum.attribute_sets: current_attribute_set.add_attribute( d['term'], try_cast(d['value'])) else: # Otherwise store it in the library level attributes @@ -160,7 +161,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]: d = match.groupdict() # If we're in an attribute set, store it in the attribute # set - if state == LibraryParserStateEnum.attribute_sets: + if state == _LibraryParserStateEnum.attribute_sets: current_attribute_set.add_attribute( d['term'], try_cast(d['value']), d['group_id']) current_attribute_set.group_counter = int(d['group_id']) @@ -177,7 +178,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]: f"Malformed grouped attribute {line}") elif "=" in line: name, value = line.split("=", 1) - if state == LibraryParserStateEnum.attribute_sets: + if state == _LibraryParserStateEnum.attribute_sets: current_attribute_set.add_attribute(name, value) else: attributes.add_attribute(name, value) @@ -355,19 +356,19 @@ def _prepare_attribute_dict(self, match): def _parse_attribute_into(self, line: str, store: Attributed, line_number_message=lambda:'', - state: SpectrumParserStateEnum=None) -> bool: + state: _SpectrumParserStateEnum=None) -> bool: match = key_value_term_pattern.match(line) if match is not None: d = match.groupdict() self._prepare_attribute_dict(d) if d['term'] == ATTRIBUTE_SET_NAME: - if SpectrumParserStateEnum.header == state: + if _SpectrumParserStateEnum.header == state: attr_set = self.entry_attribute_sets[d['value']] - elif SpectrumParserStateEnum.analyte == state: + elif _SpectrumParserStateEnum.analyte == state: attr_set = self.analyte_attribute_sets[d['value']] - elif SpectrumParserStateEnum.interpretation == state: + elif _SpectrumParserStateEnum.interpretation == state: attr_set = self.interpretation_attribute_sets[d['value']] - elif SpectrumParserStateEnum.cluster == state: + elif _SpectrumParserStateEnum.cluster == state: attr_set = self.cluster_attribute_sets[d['value']] else: raise ValueError(f"Cannot define attribute sets for {state}") @@ -403,8 +404,8 @@ def _parse(self, buffer: Iterable[str], spectrum_index: int = None, interpretation_member: InterpretationMember = None cluster: SpectrumCluster = None - STATES = SpectrumParserStateEnum - state: SpectrumParserStateEnum = STATES.header + STATES = _SpectrumParserStateEnum + state: _SpectrumParserStateEnum = STATES.header peak_list = [] line_number = -1 From 080faccc3585a137bbe5b4ef30224a465b449c2f Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 26 May 2023 12:28:39 -0400 Subject: [PATCH 11/24] Violating MAY rules is still valid --- implementations/python/mzlib/validate/validator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/implementations/python/mzlib/validate/validator.py b/implementations/python/mzlib/validate/validator.py index eacfc2e..b5816d5 100644 --- a/implementations/python/mzlib/validate/validator.py +++ b/implementations/python/mzlib/validate/validator.py @@ -221,10 +221,10 @@ def apply_rules(self, obj: Attributed, path: str, identifier_path: Tuple) -> boo for rule in itertools.chain(self.semantic_rules, self.object_rules): if rule.path == path: v = rule(obj, path, identifier_path, self) - result &= v level = logging.DEBUG if not v and rule.requirement_level > RequirementLevel.may: level = logging.WARN + result &= v logger.log(level, f"Applied {rule.id} to {path}:{identifier_path} {v}/{result}") return result From a784c270e8d4d3cded4bb3d2de180618bce79d11 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 26 May 2023 12:29:15 -0400 Subject: [PATCH 12/24] Rebuild references --- ...chinese_hamster_hcd_selected_head.mzlb.txt | 16 ++--- implementations/python/mzlib/analyte.py | 29 +++++++- implementations/python/mzlib/attributes.py | 19 +++-- .../python/mzlib/backends/diann.py | 3 +- implementations/python/mzlib/backends/msp.py | 17 ++--- .../python/mzlib/backends/spectronaut.py | 2 + .../python/mzlib/validate/rules/base.json | 22 +++++- ...hinese_hamster_hcd_selected_head.mzlb.json | 72 +++++++++---------- ...chinese_hamster_hcd_selected_head.mzlb.txt | 16 ++--- 9 files changed, 126 insertions(+), 70 deletions(-) diff --git a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt index 5cc3e74..4991bc3 100644 --- a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt +++ b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt @@ -1,12 +1,11 @@ -MS:1003188|library name=examples/chinese_hamster_hcd_selected_head.msp +MS:1003188|library name=examples/chinese_hamster_hcd_selected_head MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=855.4538 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=46 @@ -30,6 +29,7 @@ MS:1003059|number of peaks=87 MS:1000224|molecular mass=1710.9076 MS:1000888|stripped peptide sequence=AAAACALTPGPLADLAAR +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=1.4 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=855.455 @@ -137,7 +137,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=855.4538 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=53 @@ -161,6 +160,7 @@ MS:1003059|number of peaks=204 MS:1000224|molecular mass=1710.9076 MS:1000888|stripped peptide sequence=AAAACALTPGPLADLAAR +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=4.2 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=855.4574 @@ -385,7 +385,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5 MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=1207.1672 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=76 @@ -409,6 +408,7 @@ MS:1003059|number of peaks=122 MS:1000224|molecular mass=2414.3344 MS:1000888|stripped peptide sequence=AAAAGQTGTVPPGAPGALPLPGMAIVK +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=-0.9 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=1207.1661 @@ -551,7 +551,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=731.9043 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=44 @@ -575,6 +574,7 @@ MS:1003059|number of peaks=111 MS:1000224|molecular mass=1463.8086 MS:1000888|stripped peptide sequence=AAAAGSTSVKPIFSR +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=-2.7 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=731.9023 @@ -706,7 +706,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=3 MS:1003208|experimental precursor monoisotopic m/z=488.2719 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=28 @@ -730,6 +729,7 @@ MS:1003059|number of peaks=161 MS:1000224|molecular mass=1464.8157 MS:1000888|stripped peptide sequence=AAAAGSTSVKPIFSR +MS:1000041|charge state=3 [1]MS:1001975|delta m/z=3.8 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=488.2738 @@ -911,7 +911,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=830.8834 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=50 @@ -935,6 +934,7 @@ MS:1003059|number of peaks=68 MS:1000224|molecular mass=1661.7668 MS:1000888|stripped peptide sequence=AAAALGSHGSCSSEVEK +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=4.1 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=830.8868 @@ -1023,7 +1023,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=830.8834 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=52 @@ -1047,6 +1046,7 @@ MS:1003059|number of peaks=402 MS:1000224|molecular mass=1661.7668 MS:1000888|stripped peptide sequence=AAAALGSHGSCSSEVEK +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=-2.0 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=830.8817 diff --git a/implementations/python/mzlib/analyte.py b/implementations/python/mzlib/analyte.py index e787ea0..dac3cb4 100644 --- a/implementations/python/mzlib/analyte.py +++ b/implementations/python/mzlib/analyte.py @@ -5,8 +5,9 @@ except ImportError: from collections import (MutableMapping, Mapping) -import textwrap -from typing import Iterable, KeysView, ItemsView, ValuesView, Dict +from typing import Iterable, KeysView, ItemsView, Optional, ValuesView, Dict + +from pyteomics import proforma from mzlib.attributes import AttributedEntity, IdentifiedAttributeManager, AttributeManagedProperty, AttributeProxy, AttributeGroupFacet @@ -15,6 +16,9 @@ FIRST_INTERPRETATION_KEY = '1' ANALYTE_MIXTURE_TERM = "MS:1003163|analyte mixture members" +CHARGE_STATE = "MS:1000041|charge state" +PROFORMA_ION = "MS:1003270|proforma peptidoform ion notation" +PROFORMA_SEQ = "MS:1000889|proforma peptidoform sequence" class _AnalyteMappingProxy(Mapping): @@ -205,3 +209,24 @@ class Analyte(IdentifiedAttributeManager): mass = AttributeManagedProperty[float]("MS:1001117|theoretical mass") peptide = AttributeManagedProperty[str]("MS:1003169|proforma peptidoform sequence") proteins = AttributeGroupFacet[ProteinDescription](ProteinDescription) + + @property + def charge(self) -> Optional[int]: + if self.has_attribute(CHARGE_STATE): + return self.get_attribute(CHARGE_STATE) + elif self.has_attribute(PROFORMA_ION): + ion_val = self.get_attribute(PROFORMA_ION) + val = proforma.ProForma.parse(ion_val) + return val.charge_state + else: + return None + + @charge.setter + def charge(self, value): + if value is not None: + if self.has_attribute(CHARGE_STATE): + self.replace_attribute(CHARGE_STATE, value) + else: + self.add_attribute(CHARGE_STATE, value) + else: + self.remove_attribute(CHARGE_STATE) \ No newline at end of file diff --git a/implementations/python/mzlib/attributes.py b/implementations/python/mzlib/attributes.py index fc30180..1b21747 100644 --- a/implementations/python/mzlib/attributes.py +++ b/implementations/python/mzlib/attributes.py @@ -132,7 +132,8 @@ def get_next_group_identifier(self) -> str: #### Add an attribute to the list and update the lookup tables def add_attribute(self, key: str, value, group_identifier: Optional[str] = None): - """Add an attribute to the list and update the lookup tables + """ + Add an attribute to the list and update the lookup tables Parameters ---------- @@ -184,7 +185,8 @@ def add_attribute_group(self, attributes: List[Union[Attribute, Tuple[str, Any]] def get_attribute(self, key: str, group_identifier: Optional[str] = None, raw: bool = False) -> Union[Any, List[Any], Attribute, List[Attribute]]: - """Get the value or values associated with a given + """ + Get the value or values associated with a given attribute key. Parameters @@ -193,6 +195,8 @@ def get_attribute(self, key: str, group_identifier: Optional[str] = None, The name of the attribute to retrieve group_identifier : str, optional The specific group identifier to return from. + raw : bool + Whether to return the :class:`Attribute` object or unwrap the value Returns ------- @@ -246,7 +250,8 @@ def replace_attribute(self, key, value, group_identifier=None): raise NotImplementedError() def get_by_name(self, name: str): - '''Search for an attribute by human-readable name. + """ + Search for an attribute by human-readable name. Parameters ---------- @@ -257,7 +262,7 @@ def get_by_name(self, name: str): ------- object: The attribute value if found or :const:`None`. - ''' + """ matches = [] for attr in self: if attr.key.split("|")[-1] == name: @@ -276,7 +281,8 @@ def clear(self): self._clear_attributes() def remove_attribute(self, key, group_identifier=None): - """Remove the value or values associated with a given + """ + Remove the value or values associated with a given attribute key from the store. This rebuilds the entire store, which may be expensive. @@ -329,7 +335,8 @@ def _iter_attribute_groups(self): yield None, acc def has_attribute(self, key): - """Test for the presence of a given attribute + """ + Test for the presence of a given attribute Parameters ---------- diff --git a/implementations/python/mzlib/backends/diann.py b/implementations/python/mzlib/backends/diann.py index f5b6755..cef1868 100644 --- a/implementations/python/mzlib/backends/diann.py +++ b/implementations/python/mzlib/backends/diann.py @@ -130,7 +130,7 @@ def _parse_from_buffer(self, buffer: List[Dict[str, Any]], spectrum_index: int = spec.add_attribute(SPECTRUM_NAME, descr['transition_group_id']) spec.add_attribute(SELECTED_ION_MZ, float(descr['PrecursorMz'])) - spec.add_attribute(CHARGE_STATE, int(descr['PrecursorCharge'])) + if 'FileName' in descr: spec.add_attribute(SOURCE_FILE, descr['FileName']) spec.add_attribute(*self._spectrum_type()) @@ -150,6 +150,7 @@ def _parse_from_buffer(self, buffer: List[Dict[str, Any]], spectrum_index: int = analyte.add_attribute(STRIPPED_PEPTIDE_TERM, descr['PeptideSequence']) analyte.add_attribute(PROFORMA_PEPTIDE_TERM, pf_seq) analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass) + analyte.add_attribute(CHARGE_STATE, int(descr['PrecursorCharge'])) protein_group_id = analyte.get_next_group_identifier() if "UniprotID" in descr: diff --git a/implementations/python/mzlib/backends/msp.py b/implementations/python/mzlib/backends/msp.py index 0f3704f..9b289e6 100644 --- a/implementations/python/mzlib/backends/msp.py +++ b/implementations/python/mzlib/backends/msp.py @@ -233,6 +233,10 @@ def add(self, handler: AttributeHandler): analyte_terms = CaseInsensitiveDict({ + "Charge": "MS:1000041|charge state", + "precursor_charge": "MS:1000041|charge state", + "precursorcharge": "MS:1000041|charge state", + "MW": "MS:1000224|molecular mass", "total exact mass": "MS:1000224|molecular mass", "ExactMass": "MS:1000224|molecular mass", @@ -281,10 +285,6 @@ def add(self, handler: AttributeHandler): other_terms = CaseInsensitiveDict({ - "Charge": "MS:1000041|charge state", - "precursor_charge": "MS:1000041|charge state", - "precursorcharge": "MS:1000041|charge state", - "Parent": "MS:1003208|experimental precursor monoisotopic m/z", "ObservedPrecursorMZ": "MS:1003208|experimental precursor monoisotopic m/z", "PrecursorMZ": "MS:1003208|experimental precursor monoisotopic m/z", @@ -773,7 +773,7 @@ def protein_handler(key, value, container: Attributed): match.group(1), group_identifier=group_identifier) container.add_attribute("MS:1001113|c-terminal flanking residue", match.group(2), group_identifier=group_identifier) - container.add_attribute(key, re.sub(r"\(pre=(.),post=(.)\)", '', value), + container.add_attribute(key.strip('"').strip("'"), re.sub(r"\(pre=(.),post=(.)\)", '', value), group_identifier=group_identifier) return True @@ -1162,7 +1162,7 @@ def _parse_comment(self, value: str, attributes: Attributed): new_item = new_item + " " new_item = new_item + item n_quotes = new_item.count('"') - if n_quotes/2 == int(n_quotes/2): + if n_quotes % 2 == 0: fixed_comment_items.append(new_item) new_item = "" @@ -1264,7 +1264,7 @@ def _make_spectrum(self, peak_list: List, attributes: Mapping[str, str]): analyte.add_attribute( "MS:1001113|c-terminal flanking residue", match.group(3)) if match.group(4): - spectrum.add_attribute( + analyte.add_attribute( "MS:1000041|charge state", try_cast(match.group(4))) else: spectrum.add_attribute( @@ -1287,7 +1287,7 @@ def _make_spectrum(self, peak_list: List, attributes: Mapping[str, str]): if match: analyte.add_attribute( STRIPPED_PEPTIDE_TERM, match.group(1)) - spectrum.add_attribute( + analyte.add_attribute( "MS:1000041|charge state", try_cast(match.group(2))) #### Handle the uninterpretable terms @@ -1429,6 +1429,7 @@ class MSPSpectralLibraryWriter(SpectralLibraryWriterBase): "MS:1003054|theoretical average m/z": "Mz_av", "MS:1003169|proforma peptidoform sequence": "ProForma", "MS:1000888|stripped peptide sequence": "Peptide", + "MS:1000041|charge state": "Charge", } for species_name, keys in species_map.items(): diff --git a/implementations/python/mzlib/backends/spectronaut.py b/implementations/python/mzlib/backends/spectronaut.py index c797bf5..9f2735a 100644 --- a/implementations/python/mzlib/backends/spectronaut.py +++ b/implementations/python/mzlib/backends/spectronaut.py @@ -209,9 +209,11 @@ def _build_analyte(self, description: Dict[str, Any], analyte: Analyte) -> Analy analyte.add_attribute(STRIPPED_PEPTIDE_TERM, description['StrippedPeptide']) analyte.add_attribute(PROFORMA_PEPTIDE_TERM, pf_seq) analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass) + analyte.add_attribute(CHARGE_STATE, int(description['PrecursorCharge'])) protein_group_id = analyte.get_next_group_identifier() + if 'UniProtIds' in description: analyte.add_attribute( "MS:1000885|protein accession", diff --git a/implementations/python/mzlib/validate/rules/base.json b/implementations/python/mzlib/validate/rules/base.json index 223c50c..d22733c 100644 --- a/implementations/python/mzlib/validate/rules/base.json +++ b/implementations/python/mzlib/validate/rules/base.json @@ -92,7 +92,7 @@ ], "combination_logic": "OR", "id": "Spectrum_has_precursor_charge", - "level": "SHOULD", + "level": "MAY", "path": "/Library/Spectrum" }, { @@ -148,6 +148,26 @@ "id": "Analyte_has_any_mass", "level": "SHOULD", "path": "/Library/Spectrum/Analyte" + }, + { + "attr": [ + { + "accession": "MS:1000041", + "allow_children": false, + "name": "charge state", + "repeatable": false + }, + { + "accession": "MS:1000633", + "allow_children": false, + "name": "possible charge state", + "repeatable": false + } + ], + "combination_logic": "OR", + "id": "Analyte_has_charge", + "level": "SHOULD", + "path": "/Library/Spectrum/Analyte" } ] } \ No newline at end of file diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json index 06888c4..bc6d6ba 100644 --- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json +++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json @@ -11,7 +11,7 @@ { "accession": "MS:1003188", "name": "library name", - "value": "tests/test_data/chinese_hamster_hcd_selected_head.msp" + "value": "tests/test_data/chinese_hamster_hcd_selected_head" } ], "clusters": [], @@ -34,6 +34,11 @@ "name": "stripped peptide sequence", "value": "AAAACALTPGPLADLAAR" }, + { + "accession": "MS:1000041", + "name": "charge state", + "value": 2 + }, { "accession": "MS:1001975", "cv_param_group": "1", @@ -114,11 +119,6 @@ "value": "singleton spectrum", "value_accession": "MS:1003066" }, - { - "accession": "MS:1000041", - "name": "charge state", - "value": 2 - }, { "accession": "MS:1003208", "name": "experimental precursor monoisotopic m/z", @@ -541,6 +541,11 @@ "name": "stripped peptide sequence", "value": "AAAACALTPGPLADLAAR" }, + { + "accession": "MS:1000041", + "name": "charge state", + "value": 2 + }, { "accession": "MS:1001975", "cv_param_group": "1", @@ -621,11 +626,6 @@ "value": "singleton spectrum", "value_accession": "MS:1003066" }, - { - "accession": "MS:1000041", - "name": "charge state", - "value": 2 - }, { "accession": "MS:1003208", "name": "experimental precursor monoisotopic m/z", @@ -1399,6 +1399,11 @@ "name": "stripped peptide sequence", "value": "AAAAGQTGTVPPGAPGALPLPGMAIVK" }, + { + "accession": "MS:1000041", + "name": "charge state", + "value": 2 + }, { "accession": "MS:1001975", "cv_param_group": "1", @@ -1479,11 +1484,6 @@ "value": "singleton spectrum", "value_accession": "MS:1003066" }, - { - "accession": "MS:1000041", - "name": "charge state", - "value": 2 - }, { "accession": "MS:1003208", "name": "experimental precursor monoisotopic m/z", @@ -2011,6 +2011,11 @@ "name": "stripped peptide sequence", "value": "AAAAGSTSVKPIFSR" }, + { + "accession": "MS:1000041", + "name": "charge state", + "value": 2 + }, { "accession": "MS:1001975", "cv_param_group": "1", @@ -2091,11 +2096,6 @@ "value": "singleton spectrum", "value_accession": "MS:1003066" }, - { - "accession": "MS:1000041", - "name": "charge state", - "value": 2 - }, { "accession": "MS:1003208", "name": "experimental precursor monoisotopic m/z", @@ -2590,6 +2590,11 @@ "name": "stripped peptide sequence", "value": "AAAAGSTSVKPIFSR" }, + { + "accession": "MS:1000041", + "name": "charge state", + "value": 3 + }, { "accession": "MS:1001975", "cv_param_group": "1", @@ -2670,11 +2675,6 @@ "value": "singleton spectrum", "value_accession": "MS:1003066" }, - { - "accession": "MS:1000041", - "name": "charge state", - "value": 3 - }, { "accession": "MS:1003208", "name": "experimental precursor monoisotopic m/z", @@ -3319,6 +3319,11 @@ "name": "stripped peptide sequence", "value": "AAAALGSHGSCSSEVEK" }, + { + "accession": "MS:1000041", + "name": "charge state", + "value": 2 + }, { "accession": "MS:1001975", "cv_param_group": "1", @@ -3399,11 +3404,6 @@ "value": "singleton spectrum", "value_accession": "MS:1003066" }, - { - "accession": "MS:1000041", - "name": "charge state", - "value": 2 - }, { "accession": "MS:1003208", "name": "experimental precursor monoisotopic m/z", @@ -3769,6 +3769,11 @@ "name": "stripped peptide sequence", "value": "AAAALGSHGSCSSEVEK" }, + { + "accession": "MS:1000041", + "name": "charge state", + "value": 2 + }, { "accession": "MS:1001975", "cv_param_group": "1", @@ -3849,11 +3854,6 @@ "value": "singleton spectrum", "value_accession": "MS:1003066" }, - { - "accession": "MS:1000041", - "name": "charge state", - "value": 2 - }, { "accession": "MS:1003208", "name": "experimental precursor monoisotopic m/z", diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt index aaca838..226cdf8 100644 --- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt +++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt @@ -1,12 +1,11 @@ -MS:1003188|library name=tests/test_data/chinese_hamster_hcd_selected_head.msp +MS:1003188|library name=tests/test_data/chinese_hamster_hcd_selected_head MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=855.4538 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=46 @@ -30,6 +29,7 @@ MS:1003059|number of peaks=87 MS:1000224|molecular mass=1710.9076 MS:1000888|stripped peptide sequence=AAAACALTPGPLADLAAR +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=1.4 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=855.455 @@ -137,7 +137,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=855.4538 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=53 @@ -161,6 +160,7 @@ MS:1003059|number of peaks=204 MS:1000224|molecular mass=1710.9076 MS:1000888|stripped peptide sequence=AAAACALTPGPLADLAAR +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=4.2 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=855.4574 @@ -385,7 +385,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5 MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=1207.1672 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=76 @@ -409,6 +408,7 @@ MS:1003059|number of peaks=122 MS:1000224|molecular mass=2414.3344 MS:1000888|stripped peptide sequence=AAAAGQTGTVPPGAPGALPLPGMAIVK +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=-0.9 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=1207.1661 @@ -551,7 +551,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=731.9043 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=44 @@ -575,6 +574,7 @@ MS:1003059|number of peaks=111 MS:1000224|molecular mass=1463.8086 MS:1000888|stripped peptide sequence=AAAAGSTSVKPIFSR +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=-2.7 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=731.9023 @@ -706,7 +706,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=3 MS:1003208|experimental precursor monoisotopic m/z=488.2719 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=28 @@ -730,6 +729,7 @@ MS:1003059|number of peaks=161 MS:1000224|molecular mass=1464.8157 MS:1000888|stripped peptide sequence=AAAAGSTSVKPIFSR +MS:1000041|charge state=3 [1]MS:1001975|delta m/z=3.8 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=488.2738 @@ -911,7 +911,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=830.8834 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=50 @@ -935,6 +934,7 @@ MS:1003059|number of peaks=68 MS:1000224|molecular mass=1661.7668 MS:1000888|stripped peptide sequence=AAAALGSHGSCSSEVEK +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=4.1 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=830.8868 @@ -1023,7 +1023,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum -MS:1000041|charge state=2 MS:1003208|experimental precursor monoisotopic m/z=830.8834 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation [1]MS:1000045|collision energy=52 @@ -1047,6 +1046,7 @@ MS:1003059|number of peaks=402 MS:1000224|molecular mass=1661.7668 MS:1000888|stripped peptide sequence=AAAALGSHGSCSSEVEK +MS:1000041|charge state=2 [1]MS:1001975|delta m/z=-2.0 [1]UO:0000000|unit=UO:0000169|parts per million MS:1003208|experimental precursor monoisotopic m/z=830.8817 From 760df7b449cced569f89c3cbbf753bc8818afea3 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Sat, 17 Jun 2023 21:34:53 -0400 Subject: [PATCH 13/24] Checkpoint --- implementations/python/examples/draw_entry.py | 27 + .../python/examples/first_n_entries.py | 42 ++ implementations/python/mzlib/analyte.py | 5 +- implementations/python/mzlib/attributes.py | 25 +- implementations/python/mzlib/backends/base.py | 13 +- implementations/python/mzlib/backends/json.py | 4 +- implementations/python/mzlib/backends/msp.py | 2 +- implementations/python/mzlib/backends/text.py | 530 ++++++++++-------- implementations/python/mzlib/spectrum.py | 2 +- .../python/mzlib/spectrum_library.py | 33 +- 10 files changed, 426 insertions(+), 257 deletions(-) create mode 100644 implementations/python/examples/draw_entry.py create mode 100644 implementations/python/examples/first_n_entries.py diff --git a/implementations/python/examples/draw_entry.py b/implementations/python/examples/draw_entry.py new file mode 100644 index 0000000..8126d7e --- /dev/null +++ b/implementations/python/examples/draw_entry.py @@ -0,0 +1,27 @@ +import sys +import matplotlib +matplotlib.use("agg") + +from matplotlib import pyplot as plt + +from mzlib.spectrum_library import SpectrumLibrary +from mzlib.draw import draw_spectrum + + +def main(path, spectrum_key): + lib = SpectrumLibrary(filename=path) + spec = lib.get_spectrum(spectrum_number=spectrum_key) + draw_spectrum(spec) + plt.savefig(f"{path}.{spectrum_key}.annotated.pdf", bbox_inches='tight') + + +if __name__ == '__main__': + try: + path = sys.argv[1] + index = sys.argv[2] + main(path, int(index)) + sys.exit(0) + except (IndexError, TypeError): + print("USAGE: ") + print("\tWrites the annotated spectrum to ..annotated.pdf") + sys.exit(1) diff --git a/implementations/python/examples/first_n_entries.py b/implementations/python/examples/first_n_entries.py new file mode 100644 index 0000000..0aa4c05 --- /dev/null +++ b/implementations/python/examples/first_n_entries.py @@ -0,0 +1,42 @@ +import click + +from mzlib import SpectrumLibrary +from mzlib.backends import SpectralLibraryBackendBase, FormatInferenceFailure, TextSpectralLibraryWriter +from mzlib.cluster import SpectrumCluster +from mzlib.index import MemoryIndex, SQLIndex +from mzlib.spectrum import Spectrum + +@click.command('first_n_entries') +@click.argument('inpath', type=click.Path(exists=True)) +@click.option("-i", "--input-format", type=click.Choice(sorted(SpectralLibraryBackendBase._file_extension_to_implementation)), + default=None) +@click.option("-n", '--spectra-to-read', type=int, default=20) +def main(inpath, input_format, spectra_to_read: int=20): + if SQLIndex.exists(inpath): + index_type = SQLIndex + else: + index_type = MemoryIndex + click.echo(f"Opening {inpath}", err=True) + try: + library = SpectrumLibrary(filename=inpath, index_type=index_type, format=input_format) + except FormatInferenceFailure as err: + click.echo(f"{err}", err=True) + raise click.Abort() + + stream = click.get_text_stream('stdout') + writer = TextSpectralLibraryWriter(stream) + writer.write_header(library) + + for i, entry in enumerate(library, 1): + if i > spectra_to_read: + break + if isinstance(entry, Spectrum): + writer.write_spectrum(entry) + elif isinstance(entry, SpectrumCluster): + writer.write_cluster(entry) + + writer.close() + + +if __name__ == "__main__": + main.main() \ No newline at end of file diff --git a/implementations/python/mzlib/analyte.py b/implementations/python/mzlib/analyte.py index dac3cb4..a53df52 100644 --- a/implementations/python/mzlib/analyte.py +++ b/implementations/python/mzlib/analyte.py @@ -151,12 +151,13 @@ def remove_member_interpretation(self, member_id): del self.member_interpretations[str(member_id)] def validate(self) -> bool: - '''Perform validation on each component to confirm this object is well formed. + """ + Perform validation on each component to confirm this object is well formed. Returns ------- bool - ''' + """ analyte_ids = set(self.analytes) member_ids = set(self.member_interpretations) valid = True diff --git a/implementations/python/mzlib/attributes.py b/implementations/python/mzlib/attributes.py index 1b21747..097ac50 100644 --- a/implementations/python/mzlib/attributes.py +++ b/implementations/python/mzlib/attributes.py @@ -76,7 +76,8 @@ def __hash__(self): class AttributeManager(object): - """A key-value pair store with optional attribute grouping + """ + A key-value pair store with optional attribute grouping Attributes ---------- @@ -91,6 +92,7 @@ class AttributeManager(object): The number of attribute groups assigned. """ + attributes: List[Attribute] attribute_dict: Dict group_dict: Dict @@ -118,14 +120,14 @@ def __init__(self, attributes: Iterable = None): self._from_iterable(attributes) def get_next_group_identifier(self) -> str: - """Retrieve the next un-used attribute group identifier + """ + Retrieve the next un-used attribute group identifier and increment the internal counter. Returns ------- str """ - next_value = self.group_counter self.group_counter += 1 return str(next_value) @@ -275,9 +277,7 @@ def get_by_name(self, name: str): return None def clear(self): - """Remove all content from the store. - - """ + """Remove all content from the store.""" self._clear_attributes() def remove_attribute(self, key, group_identifier=None): @@ -600,11 +600,13 @@ def __init__(self, attributes: Iterable=None, **kwargs): class AttributeManagedProperty(Generic[T]): - __slots__ = ("attribute", ) + __slots__ = ("attribute", "multiple") attribute: str + multiple: bool - def __init__(self, attribute: str): + def __init__(self, attribute: str, multiple: bool = False): self.attribute = attribute + self.multiple = multiple def _get_group_id(self, inst: AttributeManager) -> Optional[str]: return getattr(inst, "group_id", None) @@ -613,7 +615,10 @@ def __get__(self, inst: AttributeManager, cls: Type) -> T: if inst is None: return self if inst.has_attribute(self.attribute): - return inst.get_attribute(self.attribute, group_identifier=self._get_group_id(inst)) + value = inst.get_attribute(self.attribute, group_identifier=self._get_group_id(inst)) + if self.multiple and not isinstance(value, list): + value = [value] + return value return None def __set__(self, inst: AttributeManager, value: T): @@ -642,8 +647,6 @@ def __get__(self, inst: AttributeManager, cls: Type) -> T: if inst is None: return self key, val = self._find_key_used(inst) - if key is None: - raise KeyError(self.attributes[0]) return val def _find_key_used(self, inst: AttributeManager) -> Optional[Tuple[str, T]]: diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py index 8b6ad0f..d6aa4b3 100644 --- a/implementations/python/mzlib/backends/base.py +++ b/implementations/python/mzlib/backends/base.py @@ -100,7 +100,7 @@ class SpectralLibraryBackendBase(AttributedEntity, _VocabularyResolverMixin, _Li index: IndexBase - entry_attribute_sets: Dict[str, AttributeSet] + spectrum_attribute_sets: Dict[str, AttributeSet] analyte_attribute_sets: Dict[str, AttributeSet] interpretation_attribute_sets: Dict[str, AttributeSet] cluster_attribute_sets: Dict[str, AttributeSet] @@ -183,7 +183,7 @@ def __init__(self, filename: Union[str, Path, io.FileIO]): self.filename = filename self.index = MemoryIndex() - self.entry_attribute_sets = { + self.spectrum_attribute_sets = { "all": AttributeSet("all", []) } self.analyte_attribute_sets = { @@ -210,7 +210,7 @@ def read_header(self) -> bool: def _new_spectrum(self) -> Spectrum: spec = Spectrum() - attr_set = self.entry_attribute_sets.get("all") + attr_set = self.spectrum_attribute_sets.get("all") if attr_set: attr_set.apply(spec) return spec @@ -371,7 +371,7 @@ def read(self) -> Iterator[Union[Spectrum, SpectrumCluster]]: def _add_attribute_set(self, attribute_set: AttributeSet, attribute_set_type: AttributeSetTypes): if attribute_set_type == AttributeSetTypes.spectrum: - self.entry_attribute_sets[attribute_set.name] = attribute_set + self.spectrum_attribute_sets[attribute_set.name] = attribute_set elif attribute_set_type == AttributeSetTypes.analyte: self.analyte_attribute_sets[attribute_set.name] = attribute_set elif attribute_set_type == AttributeSetTypes.interpretation: @@ -667,6 +667,11 @@ def close(self): class LibraryIterator(AttributedEntity, _LibraryViewMixin, Iterator[Spectrum]): + backend: SpectralLibraryBackendBase + attributes: Attributed + iter: Iterator[Spectrum] + _buffer: Spectrum + def __init__(self, backend: SpectralLibraryBackendBase) -> None: self.backend = backend self.attributes = backend diff --git a/implementations/python/mzlib/backends/json.py b/implementations/python/mzlib/backends/json.py index d26a165..cd4d164 100644 --- a/implementations/python/mzlib/backends/json.py +++ b/implementations/python/mzlib/backends/json.py @@ -152,7 +152,7 @@ def _fill_attributes(self, attributes: List[Dict[str, Any]], store: Attributed, if context_type == AttributeSetTypes.analyte: self.analyte_attribute_sets[attrib['value']].apply(store) elif context_type == AttributeSetTypes.spectrum: - self.entry_attribute_sets[attrib['value']].apply(store) + self.spectrum_attribute_sets[attrib['value']].apply(store) elif context_type == AttributeSetTypes.interpretation: self.interpretation_attribute_sets[attrib['value']].apply(store) elif context_type == AttributeSetTypes.cluster: @@ -309,7 +309,7 @@ def write_header(self, library: SpectralLibraryBackendBase): attributes = self._format_attributes(library.attributes) self.buffer[LIBRARY_METADATA_KEY] = attributes self.buffer[SPECTRUM_CLASSES] = { - c.name: self._format_attributes(c.attributes) for c in library.entry_attribute_sets.values() + c.name: self._format_attributes(c.attributes) for c in library.spectrum_attribute_sets.values() } self.buffer[ANALYTE_CLASSES] = { c.name: self._format_attributes(c.attributes) for c in library.analyte_attribute_sets.values() diff --git a/implementations/python/mzlib/backends/msp.py b/implementations/python/mzlib/backends/msp.py index 9b289e6..5fb2c39 100644 --- a/implementations/python/mzlib/backends/msp.py +++ b/implementations/python/mzlib/backends/msp.py @@ -773,7 +773,7 @@ def protein_handler(key, value, container: Attributed): match.group(1), group_identifier=group_identifier) container.add_attribute("MS:1001113|c-terminal flanking residue", match.group(2), group_identifier=group_identifier) - container.add_attribute(key.strip('"').strip("'"), re.sub(r"\(pre=(.),post=(.)\)", '', value), + container.add_attribute(key, re.sub(r"\(pre=(.),post=(.)\)", '', value.strip('"').strip("'")), group_identifier=group_identifier) return True diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py index d9873ff..a3e4afc 100644 --- a/implementations/python/mzlib/backends/text.py +++ b/implementations/python/mzlib/backends/text.py @@ -6,7 +6,7 @@ import enum from collections import deque -from typing import ClassVar, List, Tuple, Union, Iterable +from typing import ClassVar, List, Optional, Tuple, Union, Iterable from mzlib.annotation import parse_annotation from mzlib.spectrum import Spectrum @@ -55,6 +55,7 @@ class _LibraryParserStateEnum(enum.Enum): ATTRIBUTE_SET_NAME = "MS:1003212|library attribute set name" +PEAK_ATTRIBUTE = "MS:1003254|peak attribute" START_OF_SPECTRUM_MARKER = re.compile(r"^<(?:Spectrum)(?:=(.+))?>") START_OF_INTERPRETATION_MARKER = re.compile(r"^") @@ -76,6 +77,282 @@ class _LibraryParserStateEnum(enum.Enum): } +class _EntryParser: + """ + Moves the complexity and state management involved in parsing + a full entry out of :class:`TextSpectrumLibrary`, allowing it + to be factored into a bunch of helper methods around a single + piece of shared stated too granular for the main parser. + """ + + library: 'TextSpectralLibrary' + state: _SpectrumParserStateEnum + spectrum: Optional[Spectrum] + cluster: Optional[SpectrumCluster] + analyte: Optional[Analyte] + interpretation: Optional[Interpretation] + interpretation_member: Optional[InterpretationMember] + + aggregation_types: List[str] + peak_list: List[Tuple] + + start_line_number: int + line_number: int = -1 + + def __init__(self, library, start_line_number: int, spectrum_index: Optional[int]) -> None: + self.library = library + self.start_line_number = start_line_number + self.spectrum_index = spectrum_index + self.state = _SpectrumParserStateEnum.header + + self.aggregation_types = None + self.peak_list = [] + + self.spectrum = None + self.cluster = None + self.analyte = None + self.interpretation = None + self.interpretation_member = None + + def real_line_number_or_nothing(self): + if self.start_line_number is None: + return '' + message = f" on line {self.line_number + self.start_line_number}" + if self.spectrum_index is not None: + message += f" in spectrum {self.spectrum_index}" + message += f" in state {self.state}" + return message + + def _parse_header(self, line): + if START_OF_SPECTRUM_MARKER.match(line): + self.state = _SpectrumParserStateEnum.header + self.spectrum = self.library._new_spectrum() + self.spectrum.index = self.spectrum_index + match = START_OF_SPECTRUM_MARKER.match(line) + self.spectrum.key = int(match.group(1)) or self.spectrum.index - 1 + return + + elif START_OF_PEAKS_MARKER.match(line): + self.state = _SpectrumParserStateEnum.peaks + return + + elif START_OF_INTERPRETATION_MARKER.match(line): + self.state = _SpectrumParserStateEnum.interpretation + match = START_OF_INTERPRETATION_MARKER.match(line) + if self.interpretation is not None: + self.spectrum.add_interpretation(self.interpretation) + self.interpretation = self.library._new_interpretation(match.group(1)) + self.spectrum.add_interpretation(self.interpretation) + self.analyte = None + return + + elif START_OF_ANALYTE_MARKER.match(line): + self.state = _SpectrumParserStateEnum.analyte + match = START_OF_ANALYTE_MARKER.match(line) + self.analyte = self.library._new_analyte(match.group(1)) + self.spectrum.add_analyte(self.analyte) + return + + elif START_OF_CLUSTER.match(line): + self.state = _SpectrumParserStateEnum.cluster + self.cluster = self.library._new_cluster() + match = START_OF_CLUSTER.match(line) + self.cluster.key = int(match.group(1)) or self.cluster.index - 1 + return + + self.library._parse_attribute_into( + line, self.spectrum, self.real_line_number_or_nothing, self.state) + + def _parse_interpretation(self, line): + if START_OF_ANALYTE_MARKER.match(line): + warnings.warn( + f"An analyte found after an interpretation was encountered, {self.real_line_number_or_nothing()}") + self.state = _SpectrumParserStateEnum.analyte + match = START_OF_ANALYTE_MARKER.match(line) + if self.analyte is not None: + self.spectrum.add_analyte(self.analyte) + self.analyte = self.library._new_analyte(match.group(1)) + self.spectrum.add_analyte(self.analyte) + return + elif START_OF_INTERPRETATION_MARKER.match(line): + self.state = _SpectrumParserStateEnum.interpretation + match = START_OF_INTERPRETATION_MARKER.match(line) + if self.interpretation is not None: + self.spectrum.add_interpretation(self.interpretation) + self.interpretation = self.library._new_interpretation(match.group(1)) + self.spectrum.add_interpretation(self.interpretation) + self.analyte = None + return + elif START_OF_PEAKS_MARKER.match(line): + self.state = _SpectrumParserStateEnum.peaks + return + elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line): + self.state = _SpectrumParserStateEnum.interpretation_member + match = START_OF_INTERPRETATION_MEMBER_MARKER.match(line) + + if self.interpretation_member is not None: + self.interpretation.add_member_interpretation(self.interpretation_member) + + self.interpretation_member = InterpretationMember(match.group(1)) + self.interpretation.add_member_interpretation(self.interpretation_member) + return + + self.library._parse_attribute_into( + line, self.interpretation.attributes, self.real_line_number_or_nothing) + self.library._analyte_interpretation_link(self.spectrum, self.interpretation) + + def _parse_interpretation_member(self, line): + if START_OF_PEAKS_MARKER.match(line): + self.state = _SpectrumParserStateEnum.peaks + self.interpretation_member = None + self.interpretation = None + return + elif START_OF_INTERPRETATION_MARKER.match(line): + self.state = _SpectrumParserStateEnum.interpretation + match = START_OF_INTERPRETATION_MARKER.match(line) + if self.interpretation is not None: + self.spectrum.add_interpretation(self.interpretation) + self.interpretation = self.library._new_interpretation(match.group(1)) + self.spectrum.add_interpretation(self.interpretation) + self.interpretation_member = None + return + elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line): + self.state = _SpectrumParserStateEnum.interpretation_member + match = START_OF_INTERPRETATION_MEMBER_MARKER.match(line) + if self.interpretation_member is not None: + self.interpretation.add_member_interpretation(self.interpretation_member) + self.interpretation_member = InterpretationMember(match.group(1)) + self.interpretation.add_member_interpretation(self.interpretation_member) + return + + self.library._parse_attribute_into( + line, self.interpretation_member, self.real_line_number_or_nothing) + + def _parse_analyte(self, line): + if START_OF_PEAKS_MARKER.match(line): + self.state = _SpectrumParserStateEnum.peaks + if self.analyte is not None: + self.spectrum.add_analyte(self.analyte) + self.analyte = None + return + + elif START_OF_ANALYTE_MARKER.match(line): + self.state = _SpectrumParserStateEnum.analyte + match = START_OF_ANALYTE_MARKER.match(line) + if self.analyte is not None: + self.spectrum.add_analyte(self.analyte) + self.analyte = self.library._new_analyte(match.group(1)) + return + + elif START_OF_INTERPRETATION_MARKER.match(line): + self.state = _SpectrumParserStateEnum.interpretation + match = START_OF_INTERPRETATION_MARKER.match(line) + if self.analyte is not None: + self.spectrum.add_analyte(self.analyte) + self.analyte = None + + # Somehow we have an in-progress Interpretation that hasn't been cleared yet. + # This should probably be an error strictly speaking. + if self.interpretation is not None: + warnings.warn( + f"Interleaved analytes and interpretations detected at {self.real_line_number_or_nothing()}") + self.spectrum.add_interpretation(self.interpretation) + self.interpretation = self.library._new_interpretation(match.group(1)) + self.spectrum.add_interpretation(self.interpretation) + return + + self.library._parse_attribute_into(line, self.analyte, self.real_line_number_or_nothing) + + def _parse_peaks(self, line): + # TODO: When we know more about how different aggregations are formatted, + # look that up here once so we remember it and can use it to process the + # aggregation columns + if self.aggregation_types is None: + self.aggregation_types = self.spectrum.peak_aggregations + match = float_number.match(line) + if match is not None: + tokens = line.split("\t") + n_tokens = len(tokens) + if n_tokens == 2: + mz, intensity = tokens + annotation = parse_annotation("?") + self.peak_list.append([float(mz), float(intensity), annotation, []]) + elif n_tokens == 3: + mz, intensity, annotation = tokens + if not annotation: + annotation = "?" + annotation = parse_annotation(annotation) + self.peak_list.append([float(mz), float(intensity), annotation, []]) + elif n_tokens > 3: + mz, intensity, annotation, *aggregation = tokens + if not annotation: + annotation = "?" + annotation = parse_annotation(annotation) + self.peak_list.append( + [float(mz), float(intensity), annotation, [try_cast(agg) for agg in aggregation]]) + else: + raise ValueError( + f"Malformed peak line {line} with {n_tokens} entries{self.real_line_number_or_nothing()}") + else: + raise ValueError(f"Malformed peak line {line}{self.real_line_number_or_nothing()}") + + def _parse_cluster(self, line): + if START_OF_SPECTRUM_MARKER.match(line): + raise ValueError( + f"Clusters should not include spectrum sections {self.real_line_number_or_nothing()}") + + elif START_OF_PEAKS_MARKER.match(line): + raise ValueError( + f"Clusters should not include peaks {self.real_line_number_or_nothing()}") + + elif START_OF_INTERPRETATION_MARKER.match(line): + raise ValueError( + f"Clusters should not include interpretation sections {self.real_line_number_or_nothing()}") + + elif START_OF_ANALYTE_MARKER.match(line): + raise ValueError( + f"Clusters should not include analyte sections {self.real_line_number_or_nothing()}") + + elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line): + raise ValueError( + f"Clusters should not include interpretation member sections {self.real_line_number_or_nothing()}") + + self.library._parse_attribute_into( + line, self.cluster, self.real_line_number_or_nothing, self.state) + + def parse(self, buffer: Iterable[str]): + line: str + for line_number, line in enumerate(buffer): + self.line_number = line_number + line = line.strip() + if not line: + break + # Skip comments for now, no round-trip + if line.startswith("#"): + continue + elif self.state == _SpectrumParserStateEnum.header: + self._parse_header(line) + elif self.state == _SpectrumParserStateEnum.interpretation: + self._parse_interpretation(line) + elif self.state == _SpectrumParserStateEnum.interpretation_member: + self._parse_interpretation_member(line) + elif self.state == _SpectrumParserStateEnum.analyte: + self._parse_analyte(line) + elif self.state == _SpectrumParserStateEnum.peaks: + self._parse_peaks(line) + elif self.state == _SpectrumParserStateEnum.cluster: + self._parse_cluster(line) + else: + raise ValueError( + f"Unknown state {self.state}{self.real_line_number_or_nothing()}") + if self.cluster: + return self.cluster + self.spectrum.peak_list = self.peak_list + # Backfill analytes into interpretations that never explicitly listed them. + self.library._default_interpretation_to_analytes(self.spectrum) + return self.spectrum + + def _is_header_line(line: str) -> bool: if START_OF_SPECTRUM_MARKER.match(line): return False @@ -363,7 +640,7 @@ def _parse_attribute_into(self, line: str, store: Attributed, self._prepare_attribute_dict(d) if d['term'] == ATTRIBUTE_SET_NAME: if _SpectrumParserStateEnum.header == state: - attr_set = self.entry_attribute_sets[d['value']] + attr_set = self.spectrum_attribute_sets[d['value']] elif _SpectrumParserStateEnum.analyte == state: attr_set = self.analyte_attribute_sets[d['value']] elif _SpectrumParserStateEnum.interpretation == state: @@ -397,233 +674,8 @@ def _parse_attribute_into(self, line: str, store: Attributed, def _parse(self, buffer: Iterable[str], spectrum_index: int = None, start_line_number: int=None) -> Union[Spectrum, SpectrumCluster]: - spec: Spectrum = self._new_spectrum() - spec.index = spectrum_index if spectrum_index is not None else -1 - interpretation: Interpretation = None - analyte: Analyte = None - interpretation_member: InterpretationMember = None - cluster: SpectrumCluster = None - - STATES = _SpectrumParserStateEnum - state: _SpectrumParserStateEnum = STATES.header - - peak_list = [] - line_number = -1 - - def real_line_number_or_nothing(): - nonlocal start_line_number - nonlocal line_number - nonlocal spectrum_index - - if start_line_number is None: - return '' - message = f" on line {line_number + start_line_number}" - if spectrum_index is not None: - message += f" in spectrum {spectrum_index}" - message += f" in state {state}" - return message - - line: str - for line_number, line in enumerate(buffer): - line = line.strip() - if not line: - break - # Skip comments for now, no round-trip - if line.startswith("#"): - continue - if state == STATES.header: - if START_OF_SPECTRUM_MARKER.match(line): - match = START_OF_SPECTRUM_MARKER.match(line) - spec.key = int(match.group(1)) or spec.index - 1 - continue - - elif START_OF_PEAKS_MARKER.match(line): - state = STATES.peaks - continue - - elif START_OF_INTERPRETATION_MARKER.match(line): - state = STATES.interpretation - match = START_OF_INTERPRETATION_MARKER.match(line) - if interpretation is not None: - spec.add_interpretation(interpretation) - interpretation = self._new_interpretation(match.group(1)) - spec.add_interpretation(interpretation) - analyte = None - continue - - elif START_OF_ANALYTE_MARKER.match(line): - state = STATES.analyte - match = START_OF_ANALYTE_MARKER.match(line) - analyte = self._new_analyte(match.group(1)) - spec.add_analyte(analyte) - continue - - elif START_OF_CLUSTER.match(line): - state = STATES.cluster - cluster = self._new_cluster() - match = START_OF_CLUSTER.match(line) - cluster.key = int(match.group(1)) or cluster.index - 1 - continue - - self._parse_attribute_into( - line, spec, real_line_number_or_nothing, state) - - elif state == STATES.interpretation: - if START_OF_ANALYTE_MARKER.match(line): - warnings.warn( - f"An analyte found after an interpretation was encountered, {real_line_number_or_nothing()}") - state = STATES.analyte - match = START_OF_ANALYTE_MARKER.match(line) - if analyte is not None: - spec.add_analyte(analyte) - analyte = self._new_analyte(match.group(1)) - spec.add_analyte(analyte) - continue - elif START_OF_INTERPRETATION_MARKER.match(line): - state = STATES.interpretation - match = START_OF_INTERPRETATION_MARKER.match(line) - if interpretation is not None: - spec.add_interpretation(interpretation) - interpretation = self._new_interpretation(match.group(1)) - spec.add_interpretation(interpretation) - analyte = None - continue - elif START_OF_PEAKS_MARKER.match(line): - state = STATES.peaks - continue - elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line): - state = STATES.interpretation_member - match = START_OF_INTERPRETATION_MEMBER_MARKER.match(line) - - if interpretation_member is not None: - interpretation.add_member_interpretation(interpretation_member) - - interpretation_member = InterpretationMember(match.group(1)) - interpretation.add_member_interpretation(interpretation_member) - continue - - self._parse_attribute_into( - line, interpretation.attributes, real_line_number_or_nothing) - self._analyte_interpretation_link(spec, interpretation) - - elif state == STATES.interpretation_member: - if START_OF_PEAKS_MARKER.match(line): - state = STATES.peaks - interpretation_member = None - interpretation = None - continue - elif START_OF_INTERPRETATION_MARKER.match(line): - state = STATES.interpretation - match = START_OF_INTERPRETATION_MARKER.match(line) - if interpretation is not None: - spec.add_interpretation(interpretation) - interpretation = self._new_interpretation(match.group(1)) - spec.add_interpretation(interpretation) - interpretation_member = None - continue - elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line): - state = STATES.interpretation_member - match = START_OF_INTERPRETATION_MEMBER_MARKER.match(line) - if interpretation_member is not None: - interpretation.add_member_interpretation(interpretation_member) - interpretation_member = InterpretationMember(match.group(1)) - interpretation.add_member_interpretation(interpretation_member) - continue - - self._parse_attribute_into( - line, interpretation_member, real_line_number_or_nothing) - - elif state == STATES.analyte: - if START_OF_PEAKS_MARKER.match(line): - state = STATES.peaks - if analyte is not None: - spec.add_analyte(analyte) - analyte = None - continue - - elif START_OF_ANALYTE_MARKER.match(line): - state = STATES.analyte - match = START_OF_ANALYTE_MARKER.match(line) - if analyte is not None: - spec.add_analyte(analyte) - analyte = self._new_analyte(match.group(1)) - continue - - elif START_OF_INTERPRETATION_MARKER.match(line): - state = STATES.interpretation - match = START_OF_INTERPRETATION_MARKER.match(line) - if analyte is not None: - spec.add_analyte(analyte) - analyte = None - - # Somehow we have an in-progress Interpretation that hasn't been cleared yet. - # This should probably be an error strictly speaking. - if interpretation is not None: - warnings.warn( - f"Interleaved analytes and interpretations detected at {real_line_number_or_nothing()}") - spec.add_interpretation(interpretation) - interpretation = self._new_interpretation(match.group(1)) - spec.add_interpretation(interpretation) - continue - - self._parse_attribute_into(line, analyte, real_line_number_or_nothing) - - elif state == STATES.peaks: - match = float_number.match(line) - if match is not None: - tokens = line.split("\t") - n_tokens = len(tokens) - if n_tokens == 3: - mz, intensity, annotation = tokens - if not annotation: - annotation = "?" - annotation = parse_annotation(annotation) - peak_list.append([float(mz), float(intensity), annotation, ""]) - elif n_tokens > 3: - mz, intensity, annotation, *aggregation = tokens - if not annotation: - annotation = "?" - annotation = parse_annotation(annotation) - peak_list.append( - [float(mz), float(intensity), annotation, aggregation]) - else: - raise ValueError( - f"Malformed peak line {line} with {n_tokens} entries{real_line_number_or_nothing()}") - else: - raise ValueError(f"Malformed peak line {line}{real_line_number_or_nothing()}") - - elif state == STATES.cluster: - if START_OF_SPECTRUM_MARKER.match(line): - raise ValueError( - f"Clusters should not include spectrum sections {real_line_number_or_nothing()}") - - elif START_OF_PEAKS_MARKER.match(line): - raise ValueError( - f"Clusters should not include peaks {real_line_number_or_nothing()}") - - elif START_OF_INTERPRETATION_MARKER.match(line): - raise ValueError( - f"Clusters should not include interpretation sections {real_line_number_or_nothing()}") - - elif START_OF_ANALYTE_MARKER.match(line): - raise ValueError( - f"Clusters should not include analyte sections {real_line_number_or_nothing()}") - - elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line): - raise ValueError( - f"Clusters should not include interpretation member sections {real_line_number_or_nothing()}") - - self._parse_attribute_into( - line, cluster, real_line_number_or_nothing, state) - else: - raise ValueError( - f"Unknown state {state}{real_line_number_or_nothing()}") - if cluster: - return cluster - spec.peak_list = peak_list - # Backfill analytes into interpretations that never explicitly listed them. - self._default_interpretation_to_analytes(spec) - return spec + parser = _EntryParser(self, start_line_number, spectrum_index) + return parser.parse(buffer) def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Spectrum: @@ -656,10 +708,11 @@ class TextSpectralLibraryWriter(SpectralLibraryWriterBase): format_name = "text" default_version = '1.0' - def __init__(self, filename, version=None, **kwargs): + def __init__(self, filename, version=None, compact_interpretations: bool=True, **kwargs): super(TextSpectralLibraryWriter, self).__init__(filename) self.version = version self._coerce_handle(self.filename) + self.compact_interpretations = compact_interpretations def _write_attributes(self, attributes: Attributed): for attribute in attributes: @@ -687,7 +740,7 @@ def write_header(self, library: SpectralLibraryBackendBase): self._write_attributes( self._filter_attributes(library.attributes, lambda x: x.key != FORMAT_VERSION_TERM) ) - for attr_set in library.entry_attribute_sets.values(): + for attr_set in library.spectrum_attribute_sets.values(): self.write_attribute_set(attr_set, AttributeSetTypes.spectrum) for attr_set in library.analyte_attribute_sets.values(): @@ -736,10 +789,17 @@ def write_spectrum(self, spectrum: Spectrum): self.handle.write(f"\n") self._write_attributes(attribs_of) - for member in interpretation.member_interpretations.values(): - member: InterpretationMember - self.handle.write(f"\n") - self._write_attributes(member.attributes) + # When there is only one interpretation and only one interpretation member + # interpretation member attributes are written out as part of the interpretation + # itself. + if _n_interps == 1 and len(interpretation.member_interpretations) == 1 and self.compact_interpretations: + for member in interpretation.member_interpretations.values(): + self._write_attributes(member.attributes) + else: + for member in interpretation.member_interpretations.values(): + member: InterpretationMember + self.handle.write(f"\n") + self._write_attributes(member.attributes) self.handle.write("\n") for peak in spectrum.peak_list: peak_parts = [ diff --git a/implementations/python/mzlib/spectrum.py b/implementations/python/mzlib/spectrum.py index 9dd8097..9b3dce1 100644 --- a/implementations/python/mzlib/spectrum.py +++ b/implementations/python/mzlib/spectrum.py @@ -74,7 +74,7 @@ def __init__(self, attributes=None, peak_list=None, analytes=None, precursor_charge = AttributeManagedProperty[int](CHARGE_STATE) spectrum_aggregation = AttributeFacet[SpectrumAggregation](SpectrumAggregation) - peak_aggregations = AttributeManagedProperty("MS:1003254|peak attribute") + peak_aggregations = AttributeManagedProperty("MS:1003254|peak attribute", multiple=True) def add_analyte(self, analyte: Analyte): self.analytes[str(analyte.id)] = analyte diff --git a/implementations/python/mzlib/spectrum_library.py b/implementations/python/mzlib/spectrum_library.py index 2a9878f..bd1b0ff 100644 --- a/implementations/python/mzlib/spectrum_library.py +++ b/implementations/python/mzlib/spectrum_library.py @@ -102,6 +102,26 @@ def _requires_backend(self): raise ValueError( "Cannot read library data, library parser not yet initialized") + @property + def spectrum_attribute_sets(self): + self._requires_backend() + return self.backend.spectrum_attribute_sets + + @property + def analyte_attribute_sets(self): + self._requires_backend() + return self.backend.analyte_attribute_sets + + @property + def interpretation_attribute_sets(self): + self._requires_backend() + return self.backend.interpretation_attribute_sets + + @property + def cluster_attribute_sets(self): + self._requires_backend() + return self.backend.cluster_attribute_sets + #### Define getter/setter for attribute identifier @property def identifier(self) -> Optional[str]: @@ -157,6 +177,13 @@ def read_header(self) -> bool: return self.backend.read_header() def read(self): + """ + Create a sequential iterator over the spectrum library entries. + + Yields + ------ + Spectrum or SpectrumCluster + """ self._requires_backend() return self.backend.read() @@ -234,7 +261,11 @@ def get_cluster(self, cluster_number: int) -> SpectrumCluster: def find_spectra(self, specification, **query_keys) -> List[Spectrum]: """ - find_spectra - Return a list of spectra given query constraints + Return a list of spectra given query constraints + + Returns + ------- + List[Spectrum] """ self._requires_backend() return self.backend.find_spectra(specification, **query_keys) From 539af14710202967094f41218635630df18a44bf Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Sat, 17 Jun 2023 21:55:35 -0400 Subject: [PATCH 14/24] Fix Spectronaut spectrum origin type --- implementations/python/mzlib/backends/spectronaut.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/implementations/python/mzlib/backends/spectronaut.py b/implementations/python/mzlib/backends/spectronaut.py index 9f2735a..2ecadb5 100644 --- a/implementations/python/mzlib/backends/spectronaut.py +++ b/implementations/python/mzlib/backends/spectronaut.py @@ -90,7 +90,7 @@ def __init__(self, filename: str, index_type=None, **kwargs): def _spectrum_type(self): key = "MS:1003072|spectrum origin type" - value = "MS:1003074|predicted spectrum" + value = "MS:1003073|observed spectrum" return key, value def read_header(self) -> bool: From d95af648f2f7c5862d7082f72a92bd550f3c42e4 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Sat, 17 Jun 2023 22:03:24 -0400 Subject: [PATCH 15/24] Make method public --- implementations/python/mzlib/ontology.py | 2 +- implementations/python/mzlib/tools/cli.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/implementations/python/mzlib/ontology.py b/implementations/python/mzlib/ontology.py index 9b3d623..8213831 100644 --- a/implementations/python/mzlib/ontology.py +++ b/implementations/python/mzlib/ontology.py @@ -51,7 +51,7 @@ def name_to_curie(self, name: str) -> str: term = self.find_term_by_name(name) return term.id - def _make_attribute_syntax(self, name: str) -> str: + def attribute_syntax(self, name: str) -> str: if self.is_curie(name): if "|" in name: return name diff --git a/implementations/python/mzlib/tools/cli.py b/implementations/python/mzlib/tools/cli.py index 04ded01..dee3771 100644 --- a/implementations/python/mzlib/tools/cli.py +++ b/implementations/python/mzlib/tools/cli.py @@ -124,7 +124,7 @@ def convert(inpath, outpath, format=None, header_file=None, library_attributes=( if library_attributes: resolver = ControlledVocabularyResolver() for k, v in library_attributes: - k = resolver._make_attribute_syntax(k) + k = resolver.attribute_syntax(k) library.add_attribute(k, v) click.echo(f"Writing to {outpath}", err=True) fh = click.open_file(outpath, mode='w') From 152c00612326e971388be5ae7a091e87cdfb64ec Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Mon, 19 Jun 2023 13:46:08 -0400 Subject: [PATCH 16/24] Emit warnings when space delimiters are found --- implementations/python/mzlib/backends/text.py | 40 +++++++++----- .../python/mzlib/validate/object_rule.py | 11 ++++ .../python/mzlib/validate/semantic_rule.py | 7 ++- .../python/mzlib/validate/validator.py | 53 +++++++++++++++---- 4 files changed, 85 insertions(+), 26 deletions(-) diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py index a3e4afc..0f3d59b 100644 --- a/implementations/python/mzlib/backends/text.py +++ b/implementations/python/mzlib/backends/text.py @@ -13,6 +13,7 @@ from mzlib.cluster import SpectrumCluster from mzlib.attributes import AttributeManager, Attributed, AttributeSet from mzlib.analyte import Analyte, Interpretation, InterpretationMember +from mzlib.validate.object_rule import ValidationWarning from .base import ( SpectralLibraryBackendBase, @@ -29,9 +30,9 @@ term_pattern = re.compile( r"^(?P(?P\S+:(?:\d|X)+)\|(?P[^=]+))") key_value_term_pattern = re.compile( - r"^(?P(?P[A-Za-z0-9:.]+:(?:\d|X)+)\|(?P[^=]+))=(?P.+)") + r"^(?P(?P[A-Za-z0-9:.]+:(?:\d|X)+)\|(?P[^=]+?))\s*=\s*(?P.+)") grouped_key_value_term_pattern = re.compile( - r"^\[(?P\d+)\](?P(?P\S+:(?:\d|X)+)\|(?P[^=]+))=(?P.+)") + r"^\[(?P\d+)\](?P(?P\S+:(?:\d|X)+)\|(?P[^=]+?))\s*=\s*(?P.+)") float_number = re.compile( r"^\d+(.\d+)?") @@ -57,17 +58,20 @@ class _LibraryParserStateEnum(enum.Enum): ATTRIBUTE_SET_NAME = "MS:1003212|library attribute set name" PEAK_ATTRIBUTE = "MS:1003254|peak attribute" -START_OF_SPECTRUM_MARKER = re.compile(r"^<(?:Spectrum)(?:=(.+))?>") -START_OF_INTERPRETATION_MARKER = re.compile(r"^") -START_OF_ANALYTE_MARKER = re.compile(r"^") +START_OF_SPECTRUM_MARKER = re.compile(r"^<(?:Spectrum)(?:\s*=\s*(.+))?>") +START_OF_INTERPRETATION_MARKER = re.compile(r"^") +START_OF_ANALYTE_MARKER = re.compile(r"^") START_OF_PEAKS_MARKER = re.compile(r"^") START_OF_LIBRARY_MARKER = re.compile(r"^") -SPECTRUM_NAME_PRESENT = re.compile(r'MS:1003061\|(?:library )?spectrum name=') -START_OF_INTERPRETATION_MEMBER_MARKER = re.compile(r"") +START_OF_INTERPRETATION_MEMBER_MARKER = re.compile(r"") START_OF_ATTRIBUTE_SET = re.compile( - r"") -START_OF_CLUSTER = re.compile(r"") + r"") +START_OF_CLUSTER = re.compile(r"") +SPECTRUM_NAME_PRESENT = re.compile(r'MS:1003061\|(?:library )?spectrum name\s*=\s*') +SPECTRUM_NAME_MATCH = re.compile(r'MS:1003061\|(?:library )?spectrum name\s*=\s*(.+)') + +FALLBACK_PEAK_LINE_PATTERN = re.compile(r'(?P\d+(?:\.\d+)?)\s+(?P\d+(?:\.\d+)?)(?:\s+(?P.+))?') attribute_set_types = { "spectrum": AttributeSetTypes.spectrum, @@ -101,7 +105,7 @@ class _EntryParser: def __init__(self, library, start_line_number: int, spectrum_index: Optional[int]) -> None: self.library = library - self.start_line_number = start_line_number + self.start_line_number = start_line_number or 0 self.spectrum_index = spectrum_index self.state = _SpectrumParserStateEnum.header @@ -115,8 +119,6 @@ def __init__(self, library, start_line_number: int, spectrum_index: Optional[int self.interpretation_member = None def real_line_number_or_nothing(self): - if self.start_line_number is None: - return '' message = f" on line {self.line_number + self.start_line_number}" if self.spectrum_index is not None: message += f" in spectrum {self.spectrum_index}" @@ -273,6 +275,14 @@ def _parse_peaks(self, line): if match is not None: tokens = line.split("\t") n_tokens = len(tokens) + if n_tokens == 1 and ' ' in line: + if match := FALLBACK_PEAK_LINE_PATTERN.match(line): + tokens = match.groups() + n_tokens = len(tokens) + warnings.warn( + f"Space character delimiter found in peak line{self.real_line_number_or_nothing()}", + ValidationWarning + ) if n_tokens == 2: mz, intensity = tokens annotation = parse_annotation("?") @@ -576,8 +586,10 @@ def create_index(self) -> int: entry_is_cluster = bool(is_clus) spectrum_file_offset = line_beginning_file_offset spectrum_name = '' - if re.match(r'MS:1003061\|(?:library )?spectrum name', line): - spectrum_name = re.match(r'MS:1003061\|(?:library )?spectrum name=(.+)', line).group(1) + + if SPECTRUM_NAME_PRESENT.match(line): + if match := SPECTRUM_NAME_MATCH.match(line): + spectrum_name = match.group(1) entry_buffer.append(line) diff --git a/implementations/python/mzlib/validate/object_rule.py b/implementations/python/mzlib/validate/object_rule.py index 1f80d5f..680352c 100644 --- a/implementations/python/mzlib/validate/object_rule.py +++ b/implementations/python/mzlib/validate/object_rule.py @@ -1,5 +1,6 @@ import logging + from typing import TYPE_CHECKING, List, Tuple from mzlib.attributes import Attributed @@ -17,6 +18,16 @@ logger.addHandler(logging.NullHandler()) +class ValidationWarning(UserWarning): + """ + Indicates that something was parsed that did not halt the parser but + which violates the expectations of the parser. + + The parser will make a best-effort attempt to interpret the value + correctly but when validating this will count as a violation. + """ + + class ScopedObjectRuleBase: id: str path: str diff --git a/implementations/python/mzlib/validate/semantic_rule.py b/implementations/python/mzlib/validate/semantic_rule.py index a6c6b36..4d6f030 100644 --- a/implementations/python/mzlib/validate/semantic_rule.py +++ b/implementations/python/mzlib/validate/semantic_rule.py @@ -137,8 +137,11 @@ def __init__(self, *args, **kwargs): self.seen = set() def validate(self, attribute: 'AttributeSemanticRule', value: str, validator_context: "ValidatorBase"): - if isinstance(value, list) and attribute.repeatable: - return all(self.validate(attribute, v, validator_context) for v in value) + if isinstance(value, list): + if attribute.repeatable: + return all(self.validate(attribute, v, validator_context) for v in value) + else: + return False if value in self.seen: return False self.seen.add(value) diff --git a/implementations/python/mzlib/validate/validator.py b/implementations/python/mzlib/validate/validator.py index b5816d5..35554c1 100644 --- a/implementations/python/mzlib/validate/validator.py +++ b/implementations/python/mzlib/validate/validator.py @@ -1,8 +1,8 @@ import itertools import logging - -from dataclasses import dataclass, field +import warnings import re +from dataclasses import dataclass, field from typing import Any, Callable, Deque, Dict, Iterator, List, Optional, Sequence, Tuple, Union from psims.controlled_vocabulary.entity import Entity, ListOfType @@ -18,7 +18,7 @@ from mzlib.validate.level import RequirementLevel from mzlib.validate.semantic_rule import ScopedSemanticRule, load_rule_set -from mzlib.validate.object_rule import ScopedObjectRuleBase, SpectrumPeakAnnotationRule +from mzlib.validate.object_rule import ScopedObjectRuleBase, SpectrumPeakAnnotationRule, ValidationWarning from mzlib.defaults import DEFAULT_UNITS logger = logging.getLogger(__name__) @@ -61,6 +61,34 @@ def visited_attribute(self, attribute: Union[Tuple[str, str], Attribute]) -> boo +def _warning_iterator(iterator: Iterator[Spectrum]) -> Iterator[Spectrum]: + while True: + try: + with warnings.catch_warnings(record=True) as w: + value = next(iterator) + vw = [a for a in w if issubclass(a.category, ValidationWarning)] + yield value, vw + except StopIteration: + break + except: + raise + + +def _is_of_type(attrib, relation) -> bool: + if isinstance(relation.value_type.type_definition, type): + return isinstance(attrib.value, relation.value_type.type_definition) + else: + return _try_convert(attrib.value, relation.value_type.type_definition) + + +def _try_convert(value, converter): + try: + converter(value) + return True + except (ValueError, TypeError): + return False + + class ValidatorBase(_VocabularyResolverMixin): error_log: List current_context: ValidationContext @@ -71,7 +99,7 @@ def reset_context(self): def add_warning(self, obj: Attributed, path: str, identifier_path: Tuple, attrib: Any, value: Any, requirement_level: RequirementLevel, message: str): raise NotImplementedError() - def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary): + def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary, parsing_warnings: Optional[List[warnings.WarningMessage]] = None): raise NotImplementedError() def validate_analyte(self, analyte: Analyte, path: str, spectrum: Spectrum, library: SpectrumLibrary): @@ -127,7 +155,7 @@ def check_attributes(self, obj: Attributed, path: str, identifer_path: Tuple) -> break if hit: break - elif isinstance(attrib.value, rel.value_type.type_definition): + elif _is_of_type(attrib, rel): break else: self.add_warning(obj, path, identifer_path, attrib.key, attrib.value, RequirementLevel.must, @@ -177,8 +205,8 @@ def validate_library(self, library: SpectrumLibrary, spectrum_iterator: Optional if spectrum_iterator is None: spectrum_iterator = library - for spectrum in spectrum_iterator: - result &= self.validate_spectrum(spectrum, path, library) + for spectrum, warns in _warning_iterator(spectrum_iterator): + result &= self.validate_spectrum(spectrum, path, library, parsing_warnings=warns) return result def chain(self, validator: 'ValidatorBase') -> 'ValidatorBase': @@ -228,13 +256,18 @@ def apply_rules(self, obj: Attributed, path: str, identifier_path: Tuple) -> boo logger.log(level, f"Applied {rule.id} to {path}:{identifier_path} {v}/{result}") return result - def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary): + def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary, parsing_warnings: Optional[List[warnings.WarningMessage]] = None): path = f"{path}/Spectrum" identifier_path = (spectrum.key, ) result = self.apply_rules(spectrum, path, identifier_path) result &= self.check_attributes(spectrum, path, identifier_path) self.reset_context() + if parsing_warnings: + result = False + for parsing_warning in parsing_warnings: + logger.warn(str(parsing_warning.message)) + for _key, analyte in spectrum.analytes.items(): result &= self.validate_analyte(analyte, path, spectrum, library) @@ -287,10 +320,10 @@ def error_log(self): log.extend(validator.error_log) return log - def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary): + def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary, parsing_warnings: Optional[List[warnings.WarningMessage]] = None): result = True for validator in self.validators: - result &= validator.validate_spectrum(spectrum, path, library) + result &= validator.validate_spectrum(spectrum, path, library, parsing_warnings) return result def validate_analyte(self, analyte: Analyte, path: str, spectrum: Spectrum, library: SpectrumLibrary): From 2588b41489bc9df856b572d71f7e9f8f6e23e406 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Mon, 19 Jun 2023 16:10:48 -0400 Subject: [PATCH 17/24] Ensure the object is an iterator --- implementations/python/mzlib/validate/validator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/implementations/python/mzlib/validate/validator.py b/implementations/python/mzlib/validate/validator.py index 35554c1..1ec21cc 100644 --- a/implementations/python/mzlib/validate/validator.py +++ b/implementations/python/mzlib/validate/validator.py @@ -62,6 +62,8 @@ def visited_attribute(self, attribute: Union[Tuple[str, str], Attribute]) -> boo def _warning_iterator(iterator: Iterator[Spectrum]) -> Iterator[Spectrum]: + # coerce to an actual iterator in case we were passed only an iterable + iterator = iter(iterator) while True: try: with warnings.catch_warnings(record=True) as w: From 832ce4b2e74df6ae184c090b55463acf97f06c40 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 23 Jun 2023 11:48:28 -0400 Subject: [PATCH 18/24] Handle integers in aggregation --- implementations/python/mzlib/backends/text.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py index 0f3d59b..24d11df 100644 --- a/implementations/python/mzlib/backends/text.py +++ b/implementations/python/mzlib/backends/text.py @@ -4,6 +4,7 @@ import logging import warnings import enum +import numbers from collections import deque from typing import ClassVar, List, Optional, Tuple, Union, Iterable @@ -837,8 +838,8 @@ def close(self): self.handle.close() -def format_aggregation(value: Union[float, str]) -> str: - if isinstance(value, float): +def format_aggregation(value: Union[numbers.Number, str]) -> str: + if isinstance(value, numbers.Number): return "%0.4g" % value else: return value From 9d7e64aa97d421cf9ab7c3ea4d62ca5587214b3e Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 30 Jun 2023 12:28:37 -0400 Subject: [PATCH 19/24] checkpoint --- .../python/mzlib/backends/bibliospec.py | 5 +-- .../python/mzlib/backends/encyclopedia.py | 43 +++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 implementations/python/mzlib/backends/encyclopedia.py diff --git a/implementations/python/mzlib/backends/bibliospec.py b/implementations/python/mzlib/backends/bibliospec.py index 4a0a322..23c1b4d 100644 --- a/implementations/python/mzlib/backends/bibliospec.py +++ b/implementations/python/mzlib/backends/bibliospec.py @@ -1,9 +1,6 @@ -from dataclasses import dataclass -from multiprocessing import connection -import re -import os import sqlite3 import zlib +from dataclasses import dataclass from typing import Iterator, List, Mapping, Tuple, Iterable, Type diff --git a/implementations/python/mzlib/backends/encyclopedia.py b/implementations/python/mzlib/backends/encyclopedia.py new file mode 100644 index 0000000..513a84e --- /dev/null +++ b/implementations/python/mzlib/backends/encyclopedia.py @@ -0,0 +1,43 @@ +import sqlite3 +import zlib +from dataclasses import dataclass + +from typing import Iterator, List, Mapping, Tuple, Iterable, Type + +import numpy as np + +from pyteomics import proforma + +from mzlib import annotation +from mzlib.analyte import FIRST_ANALYTE_KEY, FIRST_INTERPRETATION_KEY, Analyte +from mzlib.spectrum import Spectrum, SPECTRUM_NAME, CHARGE_STATE +from mzlib.attributes import AttributeManager, Attributed + +from mzlib.backends.base import SpectralLibraryBackendBase, FORMAT_VERSION_TERM, DEFAULT_VERSION + +from mzlib.index.base import IndexBase + + +def _compress_array(array: np.ndarray, dtype: str) -> bytes: + """Compress the array to the EncyclopeDIA format.""" + packed = struct.pack(">" + (dtype * len(array)), *array) + compressed = zlib.compress(packed, 9) + return compressed + + +def _extract_array(byte_array: bytes, type_str="d") -> np.ndarray: + dtype = np.dtype(type_str) + decompressed = zlib.decompress(byte_array, 32) + decompressed_length = len(decompressed) // dtype.itemsize + unpacked = struct.unpack(">" + (type_str * decompressed_length), decompressed) + return np.array(unpacked, dtype=dtype) + + +@dataclass +class EncyclopediaIndexRecord: + number: int + precursor_mz: float + precursor_charge: int + peptide: str + + From 7142e5404cea69123fb8e66aaa3526ea43a2acdb Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 30 Jun 2023 21:15:26 -0400 Subject: [PATCH 20/24] Working draft --- .../python/mzlib/backends/bibliospec.py | 5 +- .../python/mzlib/backends/encyclopedia.py | 142 ++++++++++++++++-- 2 files changed, 132 insertions(+), 15 deletions(-) diff --git a/implementations/python/mzlib/backends/bibliospec.py b/implementations/python/mzlib/backends/bibliospec.py index 23c1b4d..88c58ed 100644 --- a/implementations/python/mzlib/backends/bibliospec.py +++ b/implementations/python/mzlib/backends/bibliospec.py @@ -22,10 +22,11 @@ class BibliospecBase: connection: sqlite3.Connection def _correct_modifications_in_sequence(self, row: Mapping) -> proforma.ProForma: - '''Correct the modifications in Bibliospec's modified peptide sequence. + """ + Correct the modifications in Bibliospec's modified peptide sequence. Bibliospec only stores modifications as delta masses. - ''' + """ mods = self.connection.execute("SELECT * FROM Modifications WHERE RefSpectraID = ?", (row['id'], )).fetchall() peptide = proforma.ProForma.parse(row["peptideModSeq"]) for mod in mods: diff --git a/implementations/python/mzlib/backends/encyclopedia.py b/implementations/python/mzlib/backends/encyclopedia.py index 513a84e..163de90 100644 --- a/implementations/python/mzlib/backends/encyclopedia.py +++ b/implementations/python/mzlib/backends/encyclopedia.py @@ -18,19 +18,12 @@ from mzlib.index.base import IndexBase -def _compress_array(array: np.ndarray, dtype: str) -> bytes: - """Compress the array to the EncyclopeDIA format.""" - packed = struct.pack(">" + (dtype * len(array)), *array) - compressed = zlib.compress(packed, 9) - return compressed - - -def _extract_array(byte_array: bytes, type_str="d") -> np.ndarray: - dtype = np.dtype(type_str) - decompressed = zlib.decompress(byte_array, 32) - decompressed_length = len(decompressed) // dtype.itemsize - unpacked = struct.unpack(">" + (type_str * decompressed_length), decompressed) - return np.array(unpacked, dtype=dtype) +def _decode_peaks(record: sqlite3.Row): + raw_data = zlib.decompress(record['MassArray']) + mass_array = np.frombuffer(raw_data, dtype='>d') + raw_data = zlib.decompress(record['IntensityArray']) + intensity_array = np.frombuffer(raw_data, dtype='>f') + return mass_array, intensity_array @dataclass @@ -41,3 +34,126 @@ class EncyclopediaIndexRecord: peptide: str +class EncyclopediaIndex(IndexBase): + connection: sqlite3.Connection + + def __init__(self, connection): + self.connection = connection + + def __getitem__(self, i): + if isinstance(i, int): + return self.search(i + 1) + elif isinstance(i, slice): + return [self.search(j + 1) for j in range(i.start or 0, i.stop or len(self), i.step or 1)] + else: + raise TypeError(f"Cannot index {self.__class__.__name__} with {i}") + + def _record_from(self, row: Mapping) -> EncyclopediaIndexRecord: + peptide_sequence = row['PeptideModSeq'] + return EncyclopediaIndexRecord(row['rowid'], row['PrecursorMz'], row['PrecursorCharge'], peptide_sequence) + + def search(self, i): + if isinstance(i, int): + info = self.connection.execute("SELECT rowid, PrecursorMz, PrecursorCharge, PeptideModSeq FROM entries WHERE rowid = ?", (i, )).fetchone() + return self._record_from(info) + elif isinstance(i, str): + raise NotImplementedError() + + def __iter__(self): + return map(self._record_from, self.connection.execute("SELECT rowid, PrecursorMz, PrecursorCharge, PeptideModSeq FROM entries ORDER BY rowid").fetchall()) + + def __len__(self): + return self.connection.execute("SELECT count(rowid) FROM entries;").fetchone()[0] + + +class EncyclopediaSpectralLibrary(SpectralLibraryBackendBase): + """Read EncyclopeDIA SQLite3 spectral library files.""" + + connection: sqlite3.Connection + + file_format = "dlib" + format_name = "encyclopedia" + + @classmethod + def has_index_preference(cls, filename) -> Type[IndexBase]: + return EncyclopediaIndex + + def __init__(self, filename, **kwargs): + super().__init__(filename) + self.connection = sqlite3.connect(filename) + self.connection.row_factory = sqlite3.Row + self.index = EncyclopediaIndex(self.connection) + self.read_header() + + def read_header(self) -> bool: + attribs = AttributeManager() + attribs.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION) + attribs.add_attribute("MS:1003207|library creation software", "EncyclopeDIA") + self.attributes = attribs + return True + + def _populate_analyte(self, analyte: Analyte, row: Mapping): + """ + Fill an analyte with details describing a peptide sequence and inferring + from context its traits based upon the assumptions EncyclopeDIA makes. + + EncyclopeDIA only stores modifications as delta masses. + """ + peptide = proforma.ProForma.parse(row['PeptideModSeq']) + analyte.add_attribute("MS:1003169|proforma peptidoform sequence", str(peptide)) + analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass) + analyte.add_attribute("MS:1000888|stripped peptide sequence", row['PeptideSeq']) + analyte.add_attribute(CHARGE_STATE, row['PrecursorCharge']) + + def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None): + """ + Read a spectrum from the spectrum library. + + EncyclopeDIA does not support alternative labeling of spectra with a + plain text name so looking up by `spectrum_name` is not supported. + """ + if spectrum_number is None: + raise ValueError("Only spectrum number queries are supported. spectrum_number must have an integer value") + + info = self.connection.execute("SELECT rowid, * FROM entries WHERE rowid = ?;", (spectrum_number, )).fetchone() + spectrum = self._new_spectrum() + spectrum.key = info['rowid'] + spectrum.index = info['rowid'] - 1 + spectrum.precursor_mz = info['PrecursorMz'] + try: + spectrum.add_attribute("MS:1000894|retention time", info['RTInSeconds'] / 60.0) + except KeyError: + pass + + try: + spectrum.add_attribute( + "MS:1003203|constituent spectrum file", + info['SourceFile'] + ) + except KeyError: + pass + + + analyte = self._new_analyte(1) + self._populate_analyte(analyte, info) + + spectrum.add_analyte(analyte) + + interp = self._new_interpretation(1) + interp.add_analyte(analyte) + spectrum.add_interpretation(interp) + + mz_array, intensity_array = _decode_peaks(info) + n_peaks = len(mz_array) + spectrum.add_attribute("MS:1003059|number of peaks", n_peaks) + + peak_list = [] + for i, mz in enumerate(mz_array): + row = (mz, intensity_array[i], [], '') + peak_list.append(row) + spectrum.peak_list = peak_list + return spectrum + + def read(self) -> Iterator[Spectrum]: + for rec in self.index: + yield self.get_spectrum(rec.number) From 70e4fb3e9a7949d8fd9ad15e69a49ea690548ba5 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Sat, 15 Jul 2023 22:06:57 -0400 Subject: [PATCH 21/24] Working draft --- .../python/mzlib/backends/encyclopedia.py | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/implementations/python/mzlib/backends/encyclopedia.py b/implementations/python/mzlib/backends/encyclopedia.py index 163de90..b36ec02 100644 --- a/implementations/python/mzlib/backends/encyclopedia.py +++ b/implementations/python/mzlib/backends/encyclopedia.py @@ -2,16 +2,16 @@ import zlib from dataclasses import dataclass -from typing import Iterator, List, Mapping, Tuple, Iterable, Type +from typing import Any, Iterator, List, Mapping, Tuple, Iterable, Type import numpy as np from pyteomics import proforma from mzlib import annotation -from mzlib.analyte import FIRST_ANALYTE_KEY, FIRST_INTERPRETATION_KEY, Analyte +from mzlib.analyte import FIRST_ANALYTE_KEY, FIRST_INTERPRETATION_KEY, Analyte, ProteinDescription from mzlib.spectrum import Spectrum, SPECTRUM_NAME, CHARGE_STATE -from mzlib.attributes import AttributeManager, Attributed +from mzlib.attributes import AttributeManager, Attributed, Attribute from mzlib.backends.base import SpectralLibraryBackendBase, FORMAT_VERSION_TERM, DEFAULT_VERSION @@ -78,7 +78,7 @@ class EncyclopediaSpectralLibrary(SpectralLibraryBackendBase): def has_index_preference(cls, filename) -> Type[IndexBase]: return EncyclopediaIndex - def __init__(self, filename, **kwargs): + def __init__(self, filename: str, **kwargs): super().__init__(filename) self.connection = sqlite3.connect(filename) self.connection.row_factory = sqlite3.Row @@ -92,7 +92,7 @@ def read_header(self) -> bool: self.attributes = attribs return True - def _populate_analyte(self, analyte: Analyte, row: Mapping): + def _populate_analyte(self, analyte: Analyte, row: Mapping[str, Any]): """ Fill an analyte with details describing a peptide sequence and inferring from context its traits based upon the assumptions EncyclopeDIA makes. @@ -105,6 +105,14 @@ def _populate_analyte(self, analyte: Analyte, row: Mapping): analyte.add_attribute("MS:1000888|stripped peptide sequence", row['PeptideSeq']) analyte.add_attribute(CHARGE_STATE, row['PrecursorCharge']) + cursor = self.connection.execute( + "SELECT ProteinAccession FROM peptidetoprotein WHERE PeptideSeq = ?;", (row['PeptideSeq'], )) + for protrow in cursor: + accession = protrow['ProteinAccession'] + analyte.add_attribute_group([ + Attribute('MS:1000885|protein accession', accession) + ]) + def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None): """ Read a spectrum from the spectrum library. @@ -128,7 +136,7 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None): try: spectrum.add_attribute( "MS:1003203|constituent spectrum file", - info['SourceFile'] + f"file://{info['SourceFile']}" ) except KeyError: pass @@ -148,8 +156,9 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None): spectrum.add_attribute("MS:1003059|number of peaks", n_peaks) peak_list = [] + # EncyclopeDIA does not encode product ion identities for i, mz in enumerate(mz_array): - row = (mz, intensity_array[i], [], '') + row = (mz, intensity_array[i], [], []) peak_list.append(row) spectrum.peak_list = peak_list return spectrum From ae93e948835406245ee1a2d408c100eaabb9665e Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 28 Jul 2023 12:28:47 -0400 Subject: [PATCH 22/24] Decoy label --- .../python/mzlib/backends/encyclopedia.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/implementations/python/mzlib/backends/encyclopedia.py b/implementations/python/mzlib/backends/encyclopedia.py index b36ec02..d05ba4b 100644 --- a/implementations/python/mzlib/backends/encyclopedia.py +++ b/implementations/python/mzlib/backends/encyclopedia.py @@ -18,6 +18,10 @@ from mzlib.index.base import IndexBase +DECOY_SPECTRUM = "MS:1003192|decoy spectrum" +DECOY_PEPTIDE_SPECTRUM = "MS:1003195|unnatural peptidoform decoy spectrum" + + def _decode_peaks(record: sqlite3.Row): raw_data = zlib.decompress(record['MassArray']) mass_array = np.frombuffer(raw_data, dtype='>d') @@ -106,12 +110,17 @@ def _populate_analyte(self, analyte: Analyte, row: Mapping[str, Any]): analyte.add_attribute(CHARGE_STATE, row['PrecursorCharge']) cursor = self.connection.execute( - "SELECT ProteinAccession FROM peptidetoprotein WHERE PeptideSeq = ?;", (row['PeptideSeq'], )) + "SELECT ProteinAccession, isDecoy FROM peptidetoprotein WHERE PeptideSeq = ?;", (row['PeptideSeq'], )) + + had_decoy = False for protrow in cursor: accession = protrow['ProteinAccession'] + is_decoy = bool(int(protrow['isDecoy'])) + had_decoy = had_decoy or is_decoy analyte.add_attribute_group([ Attribute('MS:1000885|protein accession', accession) ]) + return had_decoy def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None): """ @@ -143,7 +152,9 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None): analyte = self._new_analyte(1) - self._populate_analyte(analyte, info) + had_decoy = self._populate_analyte(analyte, info) + if had_decoy: + spectrum.add_attribute(DECOY_SPECTRUM, DECOY_PEPTIDE_SPECTRUM) spectrum.add_analyte(analyte) From 0c07226482c4b3677525dc04bbcdde4b06bd2d1d Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 11 Aug 2023 11:12:46 -0400 Subject: [PATCH 23/24] Fix N-terminal modification parsing --- implementations/python/mzlib/backends/base.py | 7 +++++-- .../python/mzlib/backends/spectronaut.py | 20 ++++++++++++++++--- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py index d6aa4b3..b93a35d 100644 --- a/implementations/python/mzlib/backends/base.py +++ b/implementations/python/mzlib/backends/base.py @@ -577,9 +577,12 @@ def read(self) -> Iterator[Spectrum]: with open_stream(self.filename, 'rt') as stream: i = 0 reader = self._open_reader(stream) + if self._headers: + # Skip the header line if we've already parsed them + _ = next(reader) buffering_reader = self._batch_rows(reader) for i, buffer in enumerate(buffering_reader): - yield self._parse(buffer, i) + yield self._parse_from_buffer(buffer, i) class SpectralLibraryWriterBase(_VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass): @@ -629,7 +632,7 @@ def write_library(self, library: SpectralLibraryBackendBase): step = max(min(n // 100, 5000), 1) ident = '' i = 0 - for i, entry in enumerate(library): + for i, entry in enumerate(library.read()): if i % step == 0 and i: if isinstance(entry, SpectrumCluster): tag = "cluster " diff --git a/implementations/python/mzlib/backends/spectronaut.py b/implementations/python/mzlib/backends/spectronaut.py index 9b0f685..32209c3 100644 --- a/implementations/python/mzlib/backends/spectronaut.py +++ b/implementations/python/mzlib/backends/spectronaut.py @@ -34,9 +34,12 @@ def _rewrite_modified_peptide_as_proforma(sequence: str) -> str: last_paren = None for i, c in enumerate(sequence): if c == ']': + # Erase any text in parentheses as these indicate the modification + # rule and not the modificatin name. We could look at the modification + # rule to infer N-term and C-term rules, but we don't have enough examples if last_paren is not None: k = i - last_paren - for j in range(k + 1): + for _ in range(k + 1): buffer.pop() last_paren = None buffer.append(c) @@ -45,7 +48,15 @@ def _rewrite_modified_peptide_as_proforma(sequence: str) -> str: buffer.append(c) else: buffer.append(c) - return ''.join(buffer) + pf_seq = ''.join(buffer) + # A peptide with an N-terminal modification will start with a square brace + # but needs to have a "-" added to be well-formed ProForma + if pf_seq.startswith("["): + i = pf_seq.find(']') + 1 + if i == 0: + raise ValueError(f"Malformed peptide sequence {sequence}") + pf_seq = f"{pf_seq[:i]}-{pf_seq[i:]}" + return pf_seq def _parse_value(value: str) -> Union[float, int, str, bool]: @@ -204,7 +215,10 @@ def _generate_peaks(self, batch: List[Dict[str, Any]]) -> List[Tuple[float, floa def _build_analyte(self, description: Dict[str, Any], analyte: Analyte) -> Analyte: pf_seq = _rewrite_modified_peptide_as_proforma(description['ModifiedPeptide']) - peptide = proforma.ProForma.parse(pf_seq) + try: + peptide = proforma.ProForma.parse(pf_seq) + except Exception as err: + breakpoint() analyte.add_attribute(STRIPPED_PEPTIDE_TERM, description['StrippedPeptide']) analyte.add_attribute(PROFORMA_PEPTIDE_TERM, pf_seq) From b9e0d5a847637b762d58827a0e0100e6bd229e70 Mon Sep 17 00:00:00 2001 From: Joshua Klein Date: Fri, 11 Aug 2023 11:20:58 -0400 Subject: [PATCH 24/24] Remove breakpoint --- implementations/python/mzlib/backends/spectronaut.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/implementations/python/mzlib/backends/spectronaut.py b/implementations/python/mzlib/backends/spectronaut.py index 32209c3..4506992 100644 --- a/implementations/python/mzlib/backends/spectronaut.py +++ b/implementations/python/mzlib/backends/spectronaut.py @@ -215,11 +215,7 @@ def _generate_peaks(self, batch: List[Dict[str, Any]]) -> List[Tuple[float, floa def _build_analyte(self, description: Dict[str, Any], analyte: Analyte) -> Analyte: pf_seq = _rewrite_modified_peptide_as_proforma(description['ModifiedPeptide']) - try: - peptide = proforma.ProForma.parse(pf_seq) - except Exception as err: - breakpoint() - + peptide = proforma.ProForma.parse(pf_seq) analyte.add_attribute(STRIPPED_PEPTIDE_TERM, description['StrippedPeptide']) analyte.add_attribute(PROFORMA_PEPTIDE_TERM, pf_seq) analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass)