From ccba6254b15c3eac3dd1f8d766fce6abc88686e7 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Fri, 31 Mar 2023 11:41:38 -0400
Subject: [PATCH 01/24] checkpoint

---
 implementations/python/mzlib/attributes.py | 11 +++++++---
 implementations/python/mzlib/cluster.py    |  6 ++++--
 implementations/python/mzlib/spectrum.py   | 25 +++++++++++++++++-----
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/implementations/python/mzlib/attributes.py b/implementations/python/mzlib/attributes.py
index 8349187..04010fd 100644
--- a/implementations/python/mzlib/attributes.py
+++ b/implementations/python/mzlib/attributes.py
@@ -7,18 +7,20 @@
 
 
 class Attribute(object):
-    __slots__ = ("key", "value", "group_id")
+    __slots__ = ("key", "value", "group_id", "owner_id")
     key: str
     value: Union[str, int, float, 'Attribute', List]
     group_id: Optional[str]
+    owner_id: int = -1
 
-    def __init__(self, key, value, group_id=None):
+    def __init__(self, key, value, group_id=None, owner_id=-1):
         self.key = key
         self.value = value
         self.group_id = group_id
+        self.owner_id = owner_id
 
     def copy(self):
-        return self.__class__(self.key, self.value, self.group_id)
+        return self.__class__(self.key, self.value, self.group_id, self.owner_id)
 
     def __getitem__(self, i):
         if i == 0:
@@ -27,6 +29,8 @@ def __getitem__(self, i):
             return self.value
         elif i == 2:
             return self.group_id
+        elif i == 3:
+            return self.owner_id
         else:
             raise IndexError(i)
 
@@ -35,6 +39,7 @@ def __iter__(self):
         yield self.value
         if self.group_id:
             yield self.group_id
+        yield self.owner_id
 
     def __len__(self):
         if self.group_id is None:
diff --git a/implementations/python/mzlib/cluster.py b/implementations/python/mzlib/cluster.py
index a3f1f31..2d5a3f3 100644
--- a/implementations/python/mzlib/cluster.py
+++ b/implementations/python/mzlib/cluster.py
@@ -7,7 +7,7 @@
 from mzlib.attributes import AttributeManager, AttributeManagedProperty
 from .utils import ensure_iter, flatten
 
-SIMILAR_SPECTRUM_KEYS = ""
+SIMILAR_SPECTRUM_KEYS = "MS:1003263|similar spectrum keys"
 SIMILAR_SPECTRUM_USI = "MS:1003264|similar spectrum USI"
 
 CLUSTER_KEY = "MS:1003267|spectrum cluster key"
@@ -45,6 +45,8 @@ def __init__(self, attributes: List):
 
     @property
     def members(self) -> List[ClusterMemberRef]:
-        internal_refs = [SpectrumRef(k) for k in flatten(ensure_iter(self._member_references))]
+        internal_refs = [
+            SpectrumRef(k) for k in flatten(ensure_iter(self._member_references))
+        ]
         usi_members = [USIRef(k) for k in ensure_iter(self._cluster_member_usis)]
         return internal_refs + usi_members
diff --git a/implementations/python/mzlib/spectrum.py b/implementations/python/mzlib/spectrum.py
index 5f86853..8f363f1 100644
--- a/implementations/python/mzlib/spectrum.py
+++ b/implementations/python/mzlib/spectrum.py
@@ -2,11 +2,17 @@
 
 import textwrap
 
-from typing import Dict,  List
+from typing import Any, Dict,  List, Optional, TYPE_CHECKING
 
-from mzlib.attributes import AttributeManager, AttributeManagedProperty, AttributeListManagedProperty, AttributeProxy as _AttributeProxy, AttributeFacet
+from mzlib.attributes import (
+    AttributeManager, AttributeManagedProperty, AttributeListManagedProperty,
+    AttributeProxy as _AttributeProxy, AttributeFacet
+)
 from mzlib.analyte import Analyte, InterpretationCollection, Interpretation
 
+if TYPE_CHECKING:
+    from mzlib.spectrum_library import SpectrumLibrary
+
 #A class that holds data for each spectrum that is read from the SpectralLibrary class
 
 SPECTRUM_NAME = "MS:1003061|spectrum name"
@@ -26,16 +32,25 @@ class Spectrum(AttributeManager):
     peak_list: List
     analytes: Dict[str, Analyte]
     interpretations: InterpretationCollection
+    _source: Optional['SpectrumLibrary']
 
     #### Constructor
-    def __init__(self, attributes=None, peak_list=None, analytes=None, interpretations=None):
+    def __init__(self, attributes=None, peak_list=None, analytes=None,
+                 interpretations=None):
         """
-        __init__ - SpectrumLibrary constructor
 
         Parameters
         ----------
-        attributes: list
+        attributes : list
             A list of attribute [key, value (, group)] sets to initialize to.
+        peak_list : list
+            A list of tuples representing (annotated) peaks
+        analytes : dict[str, :class:`~.Analyte`]
+            A mapping from identifier to :class:`~.Analyte` unique within this
+            :class:`Spectrum`.
+        interpretations : :class:`~.InterpretationCollection`
+            A mapping from identifier to :class:`~.Interpretation` unique within
+            this :class:`Spectrum`.
         """
         if peak_list is None:
             peak_list = []

From 2fb766ffde5e86e7af6200ffd3d2043278f6c07d Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Tue, 18 Apr 2023 06:01:57 -0400
Subject: [PATCH 02/24] Checkpoint

---
 implementations/python/mzlib/attributes.py    | 21 +++++-
 implementations/python/mzlib/backends/base.py | 60 ++++++++++++----
 implementations/python/mzlib/backends/text.py | 71 +++++++++++++------
 3 files changed, 113 insertions(+), 39 deletions(-)

diff --git a/implementations/python/mzlib/attributes.py b/implementations/python/mzlib/attributes.py
index cbcbfd3..5c32302 100644
--- a/implementations/python/mzlib/attributes.py
+++ b/implementations/python/mzlib/attributes.py
@@ -1,6 +1,11 @@
 import textwrap
 
-from typing import Any, DefaultDict, Iterable, Iterator, Optional, Tuple, Union, List, Dict, Generic, TypeVar, Type
+from typing import (
+    Any, DefaultDict, Iterable,
+    Iterator, Optional, Tuple,
+    Union, List, Dict,
+    Generic, TypeVar, Type
+)
 
 
 T = TypeVar('T')
@@ -11,7 +16,7 @@ class Attribute(object):
     key: str
     value: Union[str, int, float, 'Attribute', List]
     group_id: Optional[str]
-    owner_id: int = -1
+    owner_id: int
 
     def __init__(self, key, value, group_id=None, owner_id=-1):
         self.key = key
@@ -176,7 +181,9 @@ def add_attribute_group(self, attributes: List[Union[Attribute, Tuple[str, Any]]
                 key, value = attr
             self.add_attribute(key, value, group_id)
 
-    def get_attribute(self, key: str, group_identifier: Optional[str] = None, raw: bool = False) -> Union[Any, List[Any], Attribute, List[Attribute]]:
+    def get_attribute(self, key: str, group_identifier: Optional[str] = None,
+                      raw: bool = False) -> Union[Any, List[Any], Attribute,
+                                                  List[Attribute]]:
         """Get the value or values associated with a given
         attribute key.
 
@@ -696,6 +703,14 @@ def __init__(self, name: str, attributes: Iterable = None, **kwargs):
         super().__init__(attributes, **kwargs)
         self.name = name
 
+    def member_of(self, target: Attributed) -> bool:
+        for attrib in self.attributes:
+            if attrib.group_id:
+                raise NotImplementedError()
+            if not target.has_attribute(attrib.key):
+                return False
+        return True
+
     def apply(self, target: Attributed):
         terms_to_remove: List[Tuple[str, Union[Attribute, List[Attribute]]]] = []
         for key in self.attributes.keys():
diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py
index d49fd42..b438c37 100644
--- a/implementations/python/mzlib/backends/base.py
+++ b/implementations/python/mzlib/backends/base.py
@@ -7,12 +7,16 @@
 
 
 from psims.controlled_vocabulary import Entity
-from psims.controlled_vocabulary.controlled_vocabulary import load_uo, load_unimod, load_psims
+from psims.controlled_vocabulary.controlled_vocabulary import (
+    load_uo, load_unimod, load_psims)
 
 from mzlib.index import MemoryIndex, SQLIndex, IndexBase
 from mzlib.spectrum import LIBRARY_ENTRY_INDEX, LIBRARY_ENTRY_KEY, Spectrum
-from mzlib.analyte import Analyte, Interpretation, InterpretationMember, ANALYTE_MIXTURE_TERM
-from mzlib.attributes import Attributed, AttributedEntity, AttributeSet, AttributeManagedProperty
+from mzlib.analyte import (
+    Analyte, Interpretation, InterpretationMember, ANALYTE_MIXTURE_TERM)
+from mzlib.cluster import SpectrumCluster
+from mzlib.attributes import (
+    Attributed, AttributedEntity, AttributeSet, AttributeManagedProperty)
 
 from .utils import open_stream, LineBuffer
 
@@ -36,6 +40,7 @@ class AttributeSetTypes(enum.Enum):
     spectrum = enum.auto()
     analyte = enum.auto()
     interpretation = enum.auto()
+    cluster = enum.auto()
 
 
 class VocabularyResolverMixin(object):
@@ -86,20 +91,24 @@ def type_for_format(cls, format_or_extension):
         return cls._file_extension_to_implementation.get(format_or_extension)
 
 
-class SpectralLibraryBackendBase(AttributedEntity, VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass):
+class SpectralLibraryBackendBase(AttributedEntity, VocabularyResolverMixin,
+                                 metaclass=SubclassRegisteringMetaclass):
     """A base class for all spectral library formats.
 
     """
     file_format = None
 
-    _file_extension_to_implementation: Dict[str, Type['SpectralLibraryBackendBase']] = {}
-    _format_name_to_implementation: Dict[str, Type['SpectralLibraryBackendBase']] = {}
+    _file_extension_to_implementation: Dict[str,
+                                            Type['SpectralLibraryBackendBase']] = {}
+    _format_name_to_implementation: Dict[str,
+                                         Type['SpectralLibraryBackendBase']] = {}
 
     index: IndexBase
 
     entry_attribute_sets: Dict[str, AttributeSet]
     analyte_attribute_sets: Dict[str, AttributeSet]
     interpretation_attribute_sets: Dict[str, AttributeSet]
+    cluster_attribute_sets: Dict[str, AttributeSet]
 
     name = AttributeManagedProperty[str](LIBRARY_NAME_TERM)
     identifier = AttributeManagedProperty[str](LIBRARY_IDENTIFIER_TERM)
@@ -145,7 +154,8 @@ def guess_from_header(cls, filename) -> bool:
         return False
 
     @classmethod
-    def guess_implementation(cls, filename, index_type=None, **kwargs) -> 'SpectralLibraryBackendBase':
+    def guess_implementation(cls, filename, index_type=None,
+                             **kwargs) -> 'SpectralLibraryBackendBase':
         """Guess the backend implementation to use with this file format.
 
         Parameters
@@ -233,8 +243,17 @@ def _new_analyte(self, id=None) -> Analyte:
             attr_set.apply(analyte)
         return analyte
 
-    def _analyte_interpretation_link(self, spectrum: Spectrum, interpretation: Interpretation):
-        if interpretation.has_attribute(ANALYTE_MIXTURE_TERM) and not interpretation.analytes:
+    def _new_cluster(self) -> SpectrumCluster:
+        cluster = SpectrumCluster()
+        attr_set = self.cluster_attribute_sets.get('all')
+        if attr_set:
+            attr_set.apply(cluster)
+        return cluster
+
+    def _analyte_interpretation_link(self, spectrum: Spectrum,
+                                     interpretation: Interpretation):
+        if (interpretation.has_attribute(ANALYTE_MIXTURE_TERM) and
+            not interpretation.analytes):
             analyte_ids = interpretation.get_attribute(ANALYTE_MIXTURE_TERM)
             if isinstance(analyte_ids, str):
                 term = self.find_term_for(ANALYTE_MIXTURE_CURIE)
@@ -256,7 +275,8 @@ def _default_interpretation_to_analytes(self, spectrum: Spectrum):
                 for analyte in spectrum.analytes.values():
                     interpretation.add_analyte(analyte)
 
-    def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None):
+    def get_spectrum(self, spectrum_number: int=None,
+                     spectrum_name: str=None) -> Spectrum:
         """Retrieve a single spectrum from the library.
 
         Parameters
@@ -272,6 +292,9 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None):
         """
         raise NotImplementedError()
 
+    def get_cluster(self, cluster_number: int) -> SpectrumCluster:
+        raise NotImplementedError()
+
     def find_spectra(self, specification, **query_keys):
         raise NotImplementedError()
 
@@ -334,13 +357,16 @@ def has_index_preference(cls, filename: str) -> Type[IndexBase]:
     def read(self):
         raise NotImplementedError()
 
-    def _add_attribute_set(self, attribute_set: AttributeSet, attribute_set_type: AttributeSetTypes):
+    def _add_attribute_set(self, attribute_set: AttributeSet,
+                           attribute_set_type: AttributeSetTypes):
         if attribute_set_type == AttributeSetTypes.spectrum:
             self.entry_attribute_sets[attribute_set.name] = attribute_set
         elif attribute_set_type == AttributeSetTypes.analyte:
             self.analyte_attribute_sets[attribute_set.name] = attribute_set
         elif attribute_set_type == AttributeSetTypes.interpretation:
             self.interpretation_attribute_sets[attribute_set.name] = attribute_set
+        elif attribute_set_type == AttributeSetTypes.cluster:
+            self.cluster_attribute_sets[attribute_set.name] = attribute_set
         else:
             raise ValueError(f"Could not map {attribute_set_type}")
 
@@ -352,7 +378,8 @@ def summarize_parsing_errors(self) -> Dict:
 
 class _PlainTextSpectralLibraryBackendBase(SpectralLibraryBackendBase):
 
-    def __init__(self, filename, index_type=None, read_metadata=True, create_index: bool=True):
+    def __init__(self, filename, index_type=None, read_metadata=True,
+                 create_index: bool=True):
         if index_type is None and create_index:
             index_type = self.has_index_preference(filename)
 
@@ -439,12 +466,14 @@ def search(self, specification, **query_keys) -> List[Spectrum]:
         return spectra
 
 
-class SpectralLibraryWriterBase(VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass):
+class SpectralLibraryWriterBase(VocabularyResolverMixin,
+                                metaclass=SubclassRegisteringMetaclass):
     def __init__(self, filename, **kwargs):
         self.filename = filename
         super().__init__(**kwargs)
 
-    def _filter_attributes(self, attributes: Attributed, filter_fn: Callable) -> Iterable:
+    def _filter_attributes(self, attributes: Attributed,
+                           filter_fn: Callable) -> Iterable:
         if isinstance(attributes, AttributedEntity):
             attributes = attributes.attributes
         for attrib in attributes:
@@ -498,6 +527,9 @@ def write_library(self, library: SpectralLibraryBackendBase):
     def write_spectrum(self, spectrum: Spectrum):
         raise NotImplementedError()
 
+    def write_cluster(self, cluster: SpectrumCluster):
+        raise NotImplementedError()
+
     def __enter__(self) -> 'SpectralLibraryWriterBase':
         return self
 
diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py
index 091cc21..151875d 100644
--- a/implementations/python/mzlib/backends/text.py
+++ b/implementations/python/mzlib/backends/text.py
@@ -7,11 +7,11 @@
 
 from typing import ClassVar, List, Tuple, Union, Iterable
 
-from mzlib.index import MemoryIndex
 from mzlib.annotation import parse_annotation
 from mzlib.spectrum import Spectrum
+from mzlib.cluster import SpectrumCluster
 from mzlib.attributes import AttributeManager, Attributed, AttributeSet
-from mzlib.analyte import ANALYTE_MIXTURE_TERM, Analyte, Interpretation, InterpretationMember
+from mzlib.analyte import Analyte, Interpretation, InterpretationMember
 
 from .base import (
     SpectralLibraryBackendBase,
@@ -43,6 +43,7 @@ class SpectrumParserStateEnum(enum.Enum):
     interpretation_member = 4
     peaks = 5
     done = 6
+    cluster = 7
 
 
 class LibraryParserStateEnum(enum.Enum):
@@ -61,7 +62,8 @@ class LibraryParserStateEnum(enum.Enum):
 START_OF_LIBRARY_MARKER = re.compile(r"^<mzSpecLib\s+(.+)>")
 SPECTRUM_NAME_PRESENT = re.compile(r'MS:1003061\|spectrum name=')
 START_OF_INTERPRETATION_MEMBER_MARKER = re.compile(r"<InterpretationMember(?:=(.+))>")
-START_OF_ATTRIBUTE_SET = re.compile(r"<AttributeSet (Spectrum|Analyte|Interpretation)=(.+)>")
+START_OF_ATTRIBUTE_SET = re.compile(
+    r"<AttributeSet (Spectrum|Analyte|Interpretation|Cluster)=(.+)>")
 START_OF_CLUSTER = re.compile(r"<Cluster(?:=(.+))>")
 
 
@@ -90,7 +92,9 @@ class TextSpectralLibrary(_PlainTextSpectralLibraryBackendBase):
     def guess_from_header(cls, filename: str) -> bool:
         with open_stream(filename, 'r', encoding='utf8') as stream:
             first_line = stream.readline()
-            if START_OF_SPECTRUM_MARKER.match(first_line) or START_OF_LIBRARY_MARKER.match(first_line):
+            if (START_OF_SPECTRUM_MARKER.match(first_line) or
+                START_OF_LIBRARY_MARKER.match(first_line) or
+                START_OF_CLUSTER.match(first_line)):
                 return True
         return False
 
@@ -123,9 +127,11 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]:
                 if match:
                     state = LibraryParserStateEnum.attribute_sets
                     if current_attribute_set is not None:
-                        self._add_attribute_set(current_attribute_set, current_attribute_set_type)
+                        self._add_attribute_set(
+                            current_attribute_set, current_attribute_set_type)
 
-                    current_attribute_set_type = attribute_set_types[match.group(1).lower()]
+                    current_attribute_set_type = attribute_set_types[
+                        match.group(1).lower()]
                     attrib_set_name = match.group(2)
                     current_attribute_set = AttributeSet(attrib_set_name, [])
                 else:
@@ -151,7 +157,8 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]:
                         match = grouped_key_value_term_pattern.match(line)
                         if match is not None:
                             d = match.groupdict()
-                            # If we're in an attribute set, store it in the attribute set
+                            # If we're in an attribute set, store it in the attribute
+                            # set
                             if state == LibraryParserStateEnum.attribute_sets:
                                 current_attribute_set.add_attribute(
                                     d['term'], try_cast(d['value']), d['group_id'])
@@ -178,7 +185,8 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]:
                 line = stream.readline()
 
             if current_attribute_set is not None:
-                self._add_attribute_set(current_attribute_set, current_attribute_set_type)
+                self._add_attribute_set(
+                    current_attribute_set, current_attribute_set_type)
             self.attributes.clear()
             self.attributes._from_iterable(attributes)
             return True, nbytes
@@ -234,7 +242,7 @@ def create_index(self) -> int:
 
                 line = line.rstrip()
                 if state == 'header':
-                    # if re.match(r'MS:1003061\|spectrum name=', line):
+
                     if START_OF_SPECTRUM_MARKER.match(line):
                         state = 'body'
                         spectrum_file_offset = line_beginning_file_offset
@@ -243,7 +251,7 @@ def create_index(self) -> int:
                 if state == 'body':
                     if len(line) == 0:
                         continue
-                    # if re.match(r'MS:1003061\|spectrum name=', line):
+
                     if START_OF_SPECTRUM_MARKER.match(line):
                         if len(spectrum_buffer) > 0:
                             if not spectrum_name:
@@ -308,7 +316,9 @@ def _prepare_attribute_dict(self, match):
         except KeyError:
             match['value'] = try_cast(value)
 
-    def _parse_attribute_into(self, line: str, store: Attributed, line_number_message=lambda:'', state: SpectrumParserStateEnum=None) -> bool:
+    def _parse_attribute_into(self, line: str, store: Attributed,
+                              line_number_message=lambda:'',
+                              state: SpectrumParserStateEnum=None) -> bool:
         match = key_value_term_pattern.match(line)
         if match is not None:
             d = match.groupdict()
@@ -320,6 +330,8 @@ def _parse_attribute_into(self, line: str, store: Attributed, line_number_messag
                     attr_set = self.analyte_attribute_sets[d['value']]
                 elif SpectrumParserStateEnum.interpretation == state:
                     attr_set = self.interpretation_attribute_sets[d['value']]
+                elif SpectrumParserStateEnum.cluster == state:
+                    attr_set = self.cluster_attribute_sets[d['value']]
                 else:
                     raise ValueError(f"Cannot define attribute sets for {state}")
                 attr_set.apply(store)
@@ -336,7 +348,8 @@ def _parse_attribute_into(self, line: str, store: Attributed, line_number_messag
                 store.group_counter = int(d['group_id'])
                 return True
             else:
-                raise ValueError(f"Malformed grouped attribute {line}{line_number_message()}")
+                raise ValueError(
+                    f"Malformed grouped attribute {line}{line_number_message()}")
         elif "=" in line:
             name, value = line.split("=")
             store.add_attribute(name, try_cast(value))
@@ -344,13 +357,14 @@ def _parse_attribute_into(self, line: str, store: Attributed, line_number_messag
         else:
             raise ValueError(f"Malformed attribute line {line}{line_number_message()}")
 
-    def _parse(self, buffer: Iterable, spectrum_index: int = None,
+    def _parse(self, buffer: Iterable[str], spectrum_index: int = None,
                start_line_number: int=None) -> Spectrum:
         spec: Spectrum = self._new_spectrum()
         spec.index = spectrum_index if spectrum_index is not None else -1
         interpretation: Interpretation = None
         analyte: Analyte = None
         interpretation_member: InterpretationMember = None
+        cluster: SpectrumCluster = None
 
         STATES = SpectrumParserStateEnum
         state: SpectrumParserStateEnum = STATES.header
@@ -402,7 +416,8 @@ def real_line_number_or_nothing():
                     analyte = self._new_analyte(match.group(1))
                     spec.add_analyte(analyte)
                     continue
-                self._parse_attribute_into(line, spec, real_line_number_or_nothing, state)
+                self._parse_attribute_into(
+                    line, spec, real_line_number_or_nothing, state)
 
             elif state == STATES.interpretation:
                 if START_OF_ANALYTE_MARKER.match(line):
@@ -438,7 +453,8 @@ def real_line_number_or_nothing():
                     interpretation.add_member_interpretation(interpretation_member)
                     continue
 
-                self._parse_attribute_into(line, interpretation.attributes, real_line_number_or_nothing)
+                self._parse_attribute_into(
+                    line, interpretation.attributes, real_line_number_or_nothing)
                 self._analyte_interpretation_link(spec, interpretation)
 
             elif state == STATES.interpretation_member:
@@ -526,16 +542,23 @@ def real_line_number_or_nothing():
                             f"Malformed peak line {line} with {n_tokens} entries{real_line_number_or_nothing()}")
                 else:
                     raise ValueError(f"Malformed peak line {line}{real_line_number_or_nothing()}")
+            elif state == STATES.cluster:
+                self._parse_attribute_into(
+                    line, cluster, real_line_number_or_nothing, state)
             else:
-                raise ValueError(f"Unknown state {state}{real_line_number_or_nothing()}")
+                raise ValueError(
+                    f"Unknown state {state}{real_line_number_or_nothing()}")
+        if cluster:
+            return cluster
         spec.peak_list = peak_list
         # Backfill analytes into interpretations that never explicitly listed them.
         self._default_interpretation_to_analytes(spec)
         return spec
 
-    def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Spectrum:
-        # keep the two branches separate for the possibility that this is not possible with all
-        # index schemes.
+    def get_spectrum(self, spectrum_number: int=None,
+                     spectrum_name: str=None) -> Spectrum:
+        # keep the two branches separate for the possibility that this is not
+        # possible with all index schemes.
         if spectrum_number is not None:
             if spectrum_name is not None:
                 raise ValueError(
@@ -596,13 +619,16 @@ def write_header(self, library: SpectralLibraryBackendBase):
         for attr_set in library.interpretation_attribute_sets.values():
             self.write_attribute_set(attr_set, AttributeSetTypes.interpretation)
 
-    def write_attribute_set(self, attribute_set: AttributeSet, attribute_set_type: AttributeSetTypes):
+    def write_attribute_set(self, attribute_set: AttributeSet,
+                            attribute_set_type: AttributeSetTypes):
         if attribute_set_type == AttributeSetTypes.spectrum:
             set_type = "Spectrum"
         elif attribute_set_type == AttributeSetTypes.analyte:
             set_type = "Analyte"
         elif attribute_set_type == AttributeSetTypes.interpretation:
             set_type = "Interpretation"
+        elif attribute_set_type == AttributeSetTypes.cluster:
+            set_type = "Cluster"
 
         header = f"<AttributeSet {set_type}={attribute_set.name}>\n"
         self.handle.write(header)
@@ -620,12 +646,13 @@ def write_spectrum(self, spectrum: Spectrum):
         for analyte in spectrum.analytes.values():
             self.handle.write(f"<Analyte={analyte.id}>\n")
             self._write_attributes(analyte.attributes)
-        n_interps = len(spectrum.interpretations)
+        _n_interps = len(spectrum.interpretations)
         for interpretation in spectrum.interpretations.values():
             interpretation: Interpretation
 
             if len(spectrum.analytes) == 1:
-                attribs_of = list(self._filter_attributes(interpretation, self._not_analyte_mixture_term))
+                attribs_of = list(self._filter_attributes(
+                    interpretation, self._not_analyte_mixture_term))
             else:
                 attribs_of = interpretation.attributes
 

From a256e28da3af276f15f2ff351791de2b7a824a26 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Sun, 7 May 2023 19:53:44 -0400
Subject: [PATCH 03/24] Missing import

---
 implementations/python/mzlib/backends/base.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py
index 6fc8d38..2b56797 100644
--- a/implementations/python/mzlib/backends/base.py
+++ b/implementations/python/mzlib/backends/base.py
@@ -9,6 +9,7 @@
 from psims.controlled_vocabulary import Entity
 from psims.controlled_vocabulary.controlled_vocabulary import (
     load_uo, load_unimod, load_psims)
+from mzlib.cluster import SpectrumCluster
 
 from mzlib.index import MemoryIndex, SQLIndex, IndexBase
 from mzlib.spectrum import LIBRARY_ENTRY_INDEX, LIBRARY_ENTRY_KEY, Spectrum
@@ -63,9 +64,8 @@ def type_for_format(cls, format_or_extension):
 
 
 class SpectralLibraryBackendBase(AttributedEntity, _VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass):
-    """A base class for all spectral library formats.
+    """A base class for all spectral library formats."""
 
-    """
     file_format = None
 
     _file_extension_to_implementation: Dict[str,
@@ -447,6 +447,7 @@ def search(self, specification, **query_keys) -> List[Spectrum]:
 
 
 class SpectralLibraryWriterBase(_VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass):
+
     def __init__(self, filename, **kwargs):
         self.filename = filename
         super().__init__(**kwargs)

From dd6dc2ae3d68224c79cc77612ba5bfede8f029a7 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Thu, 11 May 2023 22:37:06 -0400
Subject: [PATCH 04/24] Add cluster support, fix crosswired key vs index at the
 heart of the library

---
 ...chinese_hamster_hcd_selected_head.mzlb.txt |  14 +-
 implementations/python/mzlib/attributes.py    |  36 ++--
 implementations/python/mzlib/backends/base.py |  84 +++++---
 .../python/mzlib/backends/bibliospec.py       |   4 +-
 implementations/python/mzlib/backends/json.py |  71 +++++--
 implementations/python/mzlib/backends/msp.py  |   6 +-
 implementations/python/mzlib/backends/text.py | 166 ++++++++++++----
 implementations/python/mzlib/cluster.py       |   9 +-
 implementations/python/mzlib/index/base.py    |  78 +++++++-
 implementations/python/mzlib/index/memory.py  | 185 +++++++++++++++---
 implementations/python/mzlib/index/sql.py     | 110 ++++++++++-
 implementations/python/mzlib/spectrum.py      |  20 +-
 .../python/mzlib/spectrum_library.py          |  58 ++++--
 implementations/python/pyproject.toml         |   8 +
 .../test_data/bad_peak_annotations.mzlb.txt   |   4 +-
 ...hinese_hamster_hcd_selected_head.mzlb.json |  14 +-
 ...chinese_hamster_hcd_selected_head.mzlb.txt |  14 +-
 .../tests/test_data/clusters_example.mzlb     | 142 ++++++++++++++
 .../complex_interpretations.mzlb.txt          |   2 +-
 ...lex_interpretations_with_members.mzlb.json |   2 +-
 ...plex_interpretations_with_members.mzlb.txt |   2 +-
 implementations/python/tests/test_index.py    |   2 +-
 .../python/tests/test_library_backend.py      |   2 +-
 implementations/python/tests/test_spectrum.py |   8 +-
 24 files changed, 841 insertions(+), 200 deletions(-)
 create mode 100644 implementations/python/pyproject.toml
 create mode 100644 implementations/python/tests/test_data/clusters_example.mzlb

diff --git a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt
index fc6a51e..79cd964 100644
--- a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt
+++ b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt
@@ -4,7 +4,7 @@ MS:1003188|library name=examples/chinese_hamster_hcd_selected_head.msp
 <AttributeSet Analyte=all>
 <AttributeSet Interpretation=all>
 <Spectrum=1>
-MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
+MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=855.4538
@@ -135,7 +135,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4
 1496.7792	11918.3	y15/-6.5ppm
 
 <Spectrum=2>
-MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV
+MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=855.4538
@@ -383,7 +383,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5
 1628.3004	719.6	?
 
 <Spectrum=3>
-MS:1003061|spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV
+MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=1207.1672
@@ -549,7 +549,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 1980.9857	3567.9	?
 
 <Spectrum=4>
-MS:1003061|spectrum name=AAAAGSTSVKPIFSR/2_0_44eV
+MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=731.9043
@@ -704,7 +704,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1
 1465.9423	113.63	?
 
 <Spectrum=5>
-MS:1003061|spectrum name=AAAAGSTSVKPIFSR/3_0_28eV
+MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=3
 MS:1000744|selected ion m/z=488.2719
@@ -909,7 +909,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 1469.9915	925.5	?
 
 <Spectrum=6>
-MS:1003061|spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV
+MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=830.8834
@@ -1021,7 +1021,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6
 1670.2889	140.136	?
 
 <Spectrum=7>
-MS:1003061|spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV
+MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=830.8834
diff --git a/implementations/python/mzlib/attributes.py b/implementations/python/mzlib/attributes.py
index 5c32302..fc30180 100644
--- a/implementations/python/mzlib/attributes.py
+++ b/implementations/python/mzlib/attributes.py
@@ -418,8 +418,7 @@ def _attributes_from_iterable(self, attributes):
         return self._from_iterable(attributes)
 
     def copy(self):
-        """Make a deep copy of the object
-        """
+        """Make a deep copy of the object"""
         return self.__class__(self.attributes)
 
     def __repr__(self):
@@ -457,7 +456,8 @@ class _ReadAttributes(object):
     attributes: AttributeManager
 
     def get_attribute(self, key, group_identifier=None, raw: bool = False):
-        """Get the value or values associated with a given
+        """
+        Get the value or values associated with a given
         attribute key from the entity's attribute store.
 
         Parameters
@@ -466,6 +466,9 @@ def get_attribute(self, key, group_identifier=None, raw: bool = False):
             The name of the attribute to retrieve
         group_identifier : str, optional
             The specific group identifier to return from.
+        raw : bool, optional
+            To return the stored value, or an :class:`Attribute` object preserving
+            additional information
 
         Returns
         -------
@@ -478,7 +481,8 @@ def get_attribute_group(self, group_identifier: str) -> List[Any]:
         return self.attributes.get_attribute_group(group_identifier)
 
     def has_attribute(self, key) -> bool:
-        """Test for the presence of a given attribute in the library
+        """
+        Test for the presence of a given attribute in the library
         level store.
 
         Parameters
@@ -493,7 +497,8 @@ def has_attribute(self, key) -> bool:
         return self.attributes.has_attribute(key)
 
     def get_by_name(self, name: str):
-        '''Search for an attribute by human-readable name.
+        """
+        Search for an attribute by human-readable name.
 
         Parameters
         ----------
@@ -504,7 +509,7 @@ def get_by_name(self, name: str):
         -------
         object:
             The attribute value if found or :const:`None`.
-        '''
+        """
         return self.attributes.get_by_name(name)
 
     def _iter_attribute_groups(self):
@@ -523,7 +528,8 @@ class _WriteAttributes(object):
     attributes: AttributeManager
 
     def add_attribute(self, key, value, group_identifier=None) -> Union[Any, List[Any]]:
-        """Add an attribute to the entity's attributes store.
+        """
+        Add an attribute to the entity's attributes store.
 
         Parameters
         ----------
@@ -541,7 +547,8 @@ def replace_attribute(self, key, value, group_identifier=None):
         return self.attributes.replace_attribute(key, value, group_identifier=group_identifier)
 
     def remove_attribute(self, key, group_identifier=None):
-        """Remove the value or values associated with a given
+        """
+        Remove the value or values associated with a given
         attribute key from the entity's attribute store.
 
         This rebuilds the entire store, which may be expensive.
@@ -564,13 +571,15 @@ def _clear_attributes(self):
 
 
 class AttributedEntity(_ReadAttributes, _WriteAttributes):
-    '''A base type for entities which contain an :class:`AttributeManager`
+    """
+    A base type for entities which contain an :class:`AttributeManager`
     without being completely subsumed by it.
 
     An :class:`AttributeManager` represents a collection of attributes
     first and foremost, supplying :class:`~.collections.abc.MutableMapping`-like
     interface to them, in addition to methods.
-    '''
+    """
+
     __slots__ = ("attributes", )
 
     attributes: AttributeManager
@@ -711,10 +720,11 @@ def member_of(self, target: Attributed) -> bool:
                 return False
         return True
 
-    def apply(self, target: Attributed):
+    def apply(self, target: Attributed, ):
         terms_to_remove: List[Tuple[str, Union[Attribute, List[Attribute]]]] = []
         for key in self.attributes.keys():
-            terms_to_remove.append((key, target.get_attribute(key, raw=True)))
+            if target.has_attribute(key):
+                terms_to_remove.append((key, target.get_attribute(key, raw=True)))
 
         group_ids = DefaultDict(int)
         for key, terms in terms_to_remove:
@@ -734,7 +744,7 @@ def apply(self, target: Attributed):
         for group_id, attrs in self._iter_attribute_groups():
             if group_id is None:
                 for a in attrs:
-                    target.add_attribute(a)
+                    target.add_attribute(a.key, a.value, group_identifier=None)
             else:
                 target.add_attribute_group(attrs)
 
diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py
index 0373734..b718414 100644
--- a/implementations/python/mzlib/backends/base.py
+++ b/implementations/python/mzlib/backends/base.py
@@ -14,7 +14,7 @@
 from mzlib.cluster import SpectrumCluster
 
 from mzlib.index import MemoryIndex, SQLIndex, IndexBase
-from mzlib.spectrum import LIBRARY_ENTRY_INDEX, LIBRARY_ENTRY_KEY, Spectrum
+from mzlib.spectrum import LIBRARY_SPECTRUM_INDEX, LIBRARY_SPECTRUM_KEY, Spectrum
 from mzlib.analyte import Analyte, Interpretation, InterpretationMember, ANALYTE_MIXTURE_TERM
 from mzlib.attributes import Attributed, AttributedEntity, AttributeSet, AttributeManagedProperty
 from mzlib.ontology import _VocabularyResolverMixin
@@ -69,7 +69,26 @@ def type_for_format(cls, format_or_extension):
         return cls._file_extension_to_implementation.get(format_or_extension)
 
 
-class SpectralLibraryBackendBase(AttributedEntity, _VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass):
+class _LibraryViewMixin:
+
+    name = AttributeManagedProperty[str](LIBRARY_NAME_TERM)
+    identifier = AttributeManagedProperty[str](LIBRARY_IDENTIFIER_TERM)
+    description = AttributeManagedProperty[str](LIBRARY_DESCRIPTION_TERM)
+    uri = AttributeManagedProperty[str](LIBRARY_URI_TERM)
+    library_version = AttributeManagedProperty[str](LIBRARY_VERSION_TERM)
+
+    @property
+    def format_version(self):
+        try:
+            value = self.get_attribute(FORMAT_VERSION_TERM)
+            return value
+        except KeyError:
+            value = DEFAULT_VERSION
+            self.add_attribute(FORMAT_VERSION_TERM, value)
+            return value
+
+
+class SpectralLibraryBackendBase(AttributedEntity, _VocabularyResolverMixin, _LibraryViewMixin, metaclass=SubclassRegisteringMetaclass):
     """A base class for all spectral library formats."""
 
     file_format = None
@@ -86,11 +105,6 @@ class SpectralLibraryBackendBase(AttributedEntity, _VocabularyResolverMixin, met
     interpretation_attribute_sets: Dict[str, AttributeSet]
     cluster_attribute_sets: Dict[str, AttributeSet]
 
-    name = AttributeManagedProperty[str](LIBRARY_NAME_TERM)
-    identifier = AttributeManagedProperty[str](LIBRARY_IDENTIFIER_TERM)
-    description = AttributeManagedProperty[str](LIBRARY_DESCRIPTION_TERM)
-    uri = AttributeManagedProperty[str](LIBRARY_URI_TERM)
-
     @classmethod
     def guess_from_filename(cls, filename: Union[str, Path, io.FileIO]) -> bool:
         """
@@ -178,19 +192,12 @@ def __init__(self, filename):
         self.interpretation_attribute_sets = {
             "all": AttributeSet("all", [])
         }
+        self.cluster_attribute_sets = {
+            "all": AttributeSet("all", [])
+        }
 
         super().__init__(None)
 
-    @property
-    def format_version(self):
-        try:
-            value = self.get_attribute(FORMAT_VERSION_TERM)
-            return value
-        except KeyError:
-            value = DEFAULT_VERSION
-            self.add_attribute(FORMAT_VERSION_TERM, value)
-            return value
-
     def read_header(self) -> bool:
         """
         Read just the header of the whole library
@@ -265,7 +272,7 @@ def get_spectrum(self, spectrum_number: int=None,
         Parameters
         ----------
         spectrum_number : int, optional
-            The index of the specturm in the library
+            The index of the spectrum in the library
         spectrum_name : str, optional
             The name of the spectrum in the library
 
@@ -276,6 +283,18 @@ def get_spectrum(self, spectrum_number: int=None,
         raise NotImplementedError()
 
     def get_cluster(self, cluster_number: int) -> SpectrumCluster:
+        """
+        Retrieve a single spectrum cluster from the library.
+
+        Parameters
+        ----------
+        cluster_number : int, optional
+            The index of the cluster in the library
+
+        Returns
+        -------
+        :class:`~.SpectrumCluster`
+        """
         raise NotImplementedError()
 
     def find_spectra(self, specification, **query_keys):
@@ -358,6 +377,7 @@ def _add_attribute_set(self, attribute_set: AttributeSet,
     def summarize_parsing_errors(self) -> Dict:
         return {}
 
+
 guess_implementation = SpectralLibraryBackendBase.guess_implementation
 
 
@@ -578,14 +598,14 @@ def _not_analyte_mixture_term(self, attrib):
     def _not_entry_index(self, attrib):
         if attrib:
             key = attrib[0]
-            if key == LIBRARY_ENTRY_INDEX:
+            if key == LIBRARY_SPECTRUM_INDEX:
                 return False
         return True
 
     def _not_entry_key_or_index(self, attrib):
         if attrib:
             key = attrib[0]
-            if key in (LIBRARY_ENTRY_INDEX, LIBRARY_ENTRY_KEY):
+            if key in (LIBRARY_SPECTRUM_INDEX, LIBRARY_SPECTRUM_KEY):
                 return False
         return True
 
@@ -601,14 +621,24 @@ def write_library(self, library: SpectralLibraryBackendBase):
         step = max(min(n // 100, 5000), 1)
         ident = ''
         i = 0
-        for i, spectrum in enumerate(library):
+        for i, entry in enumerate(library):
             if i % step == 0 and i:
+                if isinstance(entry, SpectrumCluster):
+                    tag = "cluster "
+                else:
+                    tag = ""
                 try:
-                    ident = f"{spectrum.key}:{spectrum.name}"
+                    ident = f"{tag}{entry.key}:{entry.name}"
                 except Exception:
-                    ident = str(spectrum.key)
+                    ident = f"{tag}{entry.key}"
                 logger.info(f"Wrote {ident} {i}/{n} ({i / n * 100.0:0.2f}%)")
-            self.write_spectrum(spectrum)
+            if isinstance(entry, Spectrum):
+                self.write_spectrum(entry)
+            elif isinstance(entry, SpectrumCluster):
+                self.write_cluster(entry)
+            else:
+                raise TypeError(f"Don't know how to save {entry.__class__}")
+
         i = n
         logger.info(f"Wrote {n} spectra")
 
@@ -628,17 +658,13 @@ def close(self):
         pass
 
 
-class LibrarySpectrumIterator(AttributedEntity, Iterator[Spectrum]):
+class LibraryIterator(AttributedEntity, _LibraryViewMixin, Iterator[Spectrum]):
     def __init__(self, backend: SpectralLibraryBackendBase) -> None:
         self.backend = backend
         self.attributes = backend
         self.iter = backend.read()
         self._buffer = next(self.iter)
 
-    @property
-    def format_version(self):
-        return self.backend.format_version
-
     def __iter__(self):
         return self
 
diff --git a/implementations/python/mzlib/backends/bibliospec.py b/implementations/python/mzlib/backends/bibliospec.py
index 2741c59..e2c5580 100644
--- a/implementations/python/mzlib/backends/bibliospec.py
+++ b/implementations/python/mzlib/backends/bibliospec.py
@@ -81,8 +81,8 @@ def __len__(self):
 
 
 class BibliospecSpectralLibrary(BibliospecBase, SpectralLibraryBackendBase):
-    '''Read Bibliospec 2 SQLite3 spectral library files.
-    '''
+    """Read Bibliospec 2 SQLite3 spectral library files."""
+
     connection: sqlite3.Connection
 
     file_format = "blib"
diff --git a/implementations/python/mzlib/backends/json.py b/implementations/python/mzlib/backends/json.py
index e039fc4..336dc86 100644
--- a/implementations/python/mzlib/backends/json.py
+++ b/implementations/python/mzlib/backends/json.py
@@ -4,10 +4,11 @@
 import logging
 import warnings
 
-from typing import Iterable, List, Dict, Mapping, Union
+from typing import Any, Iterable, List, Dict, Mapping, Union
 
 from pathlib import Path
 from xml.dom.minidom import Attr
+from mzlib.cluster import SpectrumCluster
 
 from mzlib.index import MemoryIndex
 from mzlib.attributes import AttributeManager, Attributed
@@ -25,18 +26,22 @@
 LIBRARY_METADATA_KEY = "attributes"
 ELEMENT_ATTRIBUTES_KEY = "attributes"
 SPECTRA_KEY = "spectra"
+CLUSTERS_KEY = "clusters"
 FORMAT_VERSION_KEY = "format_version"
 ANALYTES_KEY = 'analytes'
 INTERPRETATIONS_KEY = 'interpretations'
 INTERPRETATION_MEMBERS_KEY = 'members'
-PEAK_ANNOTATIONS_KEY = 'peak_annotations'
 ID_KEY = 'id'
+
 MZ_KEY = "mzs"
 INTENSITY_KEY = "intensities"
 AGGREGATIONS_KEY = "aggregations"
+PEAK_ANNOTATIONS_KEY = 'peak_annotations'
+
 SPECTRUM_CLASSES = "spectrum_attribute_sets"
 ANALYTE_CLASSES = "analyte_attribute_sets"
 INTERPRETATION_CLASSES = "interpretation_attribute_sets"
+CLUSTER_CLASSES = "cluster_attribute_sets"
 
 FORMAT_VERSION_ACC = FORMAT_VERSION_TERM.split("|")[0]
 
@@ -81,15 +86,25 @@ def read_header(self) -> bool:
 
     def create_index(self):
         for i, record in enumerate(self.buffer[SPECTRA_KEY]):
+            name = None
+            key = None
             for attrib in record['attributes']:
                 if attrib["accession"] == "MS:1003061":
-                    self.index.add(i, i, attrib['value'], None, None)
-                    break
+                    name = attrib['value']
+                    if name and key:
+                        break
+                if attrib["accession"] == "MS:1003237":
+                    key = attrib['value']
+                    if name and key:
+                        break
             else:
-                raise ValueError(f"Unidentified spectrum at index {i}")
+                if not name and not key:
+                    raise ValueError(f"Unidentified spectrum at index {i}")
+            self.index.add(key, i, name, None, None)
 
     def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Spectrum:
-        """Retrieve a single spectrum from the library.
+        """
+        Retrieve a single spectrum from the library.
 
         Parameters
         ----------
@@ -102,7 +117,6 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp
         -------
         :class:`~.Spectrum`
         """
-
         if spectrum_number is not None:
             if spectrum_name is not None:
                 raise ValueError(
@@ -111,7 +125,7 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp
         elif spectrum_name is not None:
             offset = self.index.offset_for(spectrum_name)
         data = self.buffer[SPECTRA_KEY][offset]
-        spectrum = self.make_spectrum_from_payload(data)
+        spectrum = self._make_spectrum_from_payload(data)
         return spectrum
 
     def _fill_attributes(self, attributes: List, store: Attributed, context_type: AttributeSetTypes=None) -> Attributed:
@@ -140,7 +154,7 @@ def _fill_attributes(self, attributes: List, store: Attributed, context_type: At
                     store.group_counter = int(group)
         return store
 
-    def make_analyte_from_payload(self, analyte_id, analyte_d: Dict) -> Analyte:
+    def _make_analyte_from_payload(self, analyte_id, analyte_d: Dict) -> Analyte:
         if analyte_id != analyte_d.get('id'):
             warnings.warn(
                 f"An analyte with explicit id {analyte_d['id']!r} does not match its key {analyte_id!r}")
@@ -148,7 +162,7 @@ def make_analyte_from_payload(self, analyte_id, analyte_d: Dict) -> Analyte:
         self._fill_attributes(analyte_d[ELEMENT_ATTRIBUTES_KEY], analyte, AttributeSetTypes.analyte)
         return analyte
 
-    def make_interpretation_from_payload(self, interpretation_id, interpretation_d: Dict) -> Interpretation:
+    def _make_interpretation_from_payload(self, interpretation_id, interpretation_d: Dict) -> Interpretation:
         if interpretation_id != interpretation_d.get('id'):
             warnings.warn(
                 f"An analyte with explicit id {interpretation_d['id']!r} does not match its key {interpretation_id!r}")
@@ -166,7 +180,13 @@ def make_interpretation_from_payload(self, interpretation_id, interpretation_d:
                 interpretation.add_member_interpretation(member_d)
         return interpretation
 
-    def make_spectrum_from_payload(self, data: Dict) -> Spectrum:
+    def _make_cluster_from_payload(self, data: Dict[str, Any]) -> SpectrumCluster:
+        cluster = self._new_cluster()
+        self._fill_attributes(
+            data[ELEMENT_ATTRIBUTES_KEY], cluster, AttributeSetTypes.cluster)
+        return cluster
+
+    def _make_spectrum_from_payload(self, data: Dict) -> Spectrum:
         spectrum = self._new_spectrum()
         self._fill_attributes(
             data[ELEMENT_ATTRIBUTES_KEY],
@@ -175,12 +195,12 @@ def make_spectrum_from_payload(self, data: Dict) -> Spectrum:
         )
         if ANALYTES_KEY in data:
             for analyte_id, analyte in data[ANALYTES_KEY].items():
-                analyte_d = self.make_analyte_from_payload(analyte_id, analyte)
+                analyte_d = self._make_analyte_from_payload(analyte_id, analyte)
                 spectrum.add_analyte(analyte_d)
 
         if INTERPRETATIONS_KEY in data:
             for interpretation_id, interpretation_d in data[INTERPRETATIONS_KEY].items():
-                interpretation = self.make_interpretation_from_payload(
+                interpretation = self._make_interpretation_from_payload(
                     interpretation_id,
                     interpretation_d
                 )
@@ -215,10 +235,16 @@ def make_spectrum_from_payload(self, data: Dict) -> Spectrum:
         return spectrum
 
     def read(self):
+        n = len(self.buffer.get(CLUSTERS_KEY, []))
+        for offset in range(n):
+            data = self.buffer[CLUSTERS_KEY][offset]
+            cluster = self._make_cluster_from_payload(data)
+            yield cluster
+
         n = len(self.buffer[SPECTRA_KEY])
         for offset in range(n):
             data = self.buffer[SPECTRA_KEY][offset]
-            spectrum = self.make_spectrum_from_payload(data)
+            spectrum = self._make_spectrum_from_payload(data)
             yield spectrum
 
 
@@ -241,6 +267,7 @@ def __init__(self, filename, version=None, pretty_print=True, format_annotations
             FORMAT_VERSION_KEY: self.version,
             LIBRARY_METADATA_KEY: [],
             SPECTRA_KEY: [],
+            CLUSTERS_KEY: [],
             SPECTRUM_CLASSES: {},
             ANALYTE_CLASSES: {},
             INTERPRETATION_CLASSES: {},
@@ -271,7 +298,6 @@ def write_header(self, library: SpectralLibraryBackendBase):
             c.name: self._format_attributes(c.attributes) for c in library.interpretation_attribute_sets.values()
         }
 
-
     def _format_attributes(self, attributes_manager: Iterable) -> List:
         attributes = []
         for attribute in attributes_manager:
@@ -311,6 +337,15 @@ def _format_attributes(self, attributes_manager: Iterable) -> List:
             attributes.append(reformed_attribute)
         return attributes
 
+    def write_cluster(self, cluster: SpectrumCluster):
+        attributes = self._format_attributes(
+            cluster.attributes
+        )
+        payload = {
+            ELEMENT_ATTRIBUTES_KEY: attributes
+        }
+        self.buffer[CLUSTERS_KEY].append(payload)
+
     def write_spectrum(self, spectrum: Spectrum):
         mzs = []
         intensities = []
@@ -361,7 +396,7 @@ def write_spectrum(self, spectrum: Spectrum):
                         ELEMENT_ATTRIBUTES_KEY: self._format_attributes(member)
                     }
 
-        spectrum = {
+        payload = {
             ELEMENT_ATTRIBUTES_KEY: attributes,
             MZ_KEY: mzs,
             INTENSITY_KEY: intensities,
@@ -371,9 +406,9 @@ def write_spectrum(self, spectrum: Spectrum):
             INTERPRETATIONS_KEY: interpretations
         }
         if not any(aggregations):
-            spectrum.pop(AGGREGATIONS_KEY)
+            payload.pop(AGGREGATIONS_KEY)
 
-        self.buffer[SPECTRA_KEY].append(spectrum)
+        self.buffer[SPECTRA_KEY].append(payload)
 
     def flush(self):
         # If we know we're writing a complete library, skip the probably-doing-too-many-things
diff --git a/implementations/python/mzlib/backends/msp.py b/implementations/python/mzlib/backends/msp.py
index 30223e7..2900972 100644
--- a/implementations/python/mzlib/backends/msp.py
+++ b/implementations/python/mzlib/backends/msp.py
@@ -883,7 +883,6 @@ def create_index(self) -> int:
         n_spectra: int
             The number of entries read
         """
-
         #### Check that the spectrum library filename isvalid
         filename = self.filename
 
@@ -1395,13 +1394,14 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp
         if spectrum_number is not None:
             if spectrum_name is not None:
                 raise ValueError("Provide only one of spectrum_number or spectrum_name")
-            offset = self.index.offset_for(spectrum_number)
+            index_record = self.index.record_for(spectrum_number)
+            offset = index_record.offset
         elif spectrum_name is not None:
             index_record = self.index.record_for(spectrum_name)
             spectrum_number = index_record.number
             offset = index_record.offset
         buffer = self._get_lines_for(offset)
-        spectrum = self._parse(buffer, spectrum_number)
+        spectrum = self._parse(buffer, index_record.index)
         return spectrum
 
     def summarize_parsing_errors(self) -> Dict:
diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py
index 151875d..f093c60 100644
--- a/implementations/python/mzlib/backends/text.py
+++ b/implementations/python/mzlib/backends/text.py
@@ -1,3 +1,4 @@
+from collections import deque
 import re
 import os
 import io
@@ -60,7 +61,7 @@ class LibraryParserStateEnum(enum.Enum):
 START_OF_ANALYTE_MARKER = re.compile(r"^<Analyte(?:=(.+))>")
 START_OF_PEAKS_MARKER = re.compile(r"^<Peaks>")
 START_OF_LIBRARY_MARKER = re.compile(r"^<mzSpecLib\s+(.+)>")
-SPECTRUM_NAME_PRESENT = re.compile(r'MS:1003061\|spectrum name=')
+SPECTRUM_NAME_PRESENT = re.compile(r'MS:1003061\|(?:library )?spectrum name=')
 START_OF_INTERPRETATION_MEMBER_MARKER = re.compile(r"<InterpretationMember(?:=(.+))>")
 START_OF_ATTRIBUTE_SET = re.compile(
     r"<AttributeSet (Spectrum|Analyte|Interpretation|Cluster)=(.+)>")
@@ -175,7 +176,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]:
                             raise ValueError(
                                 f"Malformed grouped attribute {line}")
                     elif "=" in line:
-                        name, value = line.split("=")
+                        name, value = line.split("=", 1)
                         if state == LibraryParserStateEnum.attribute_sets:
                             current_attribute_set.add_attribute(name, value)
                         else:
@@ -207,7 +208,6 @@ def create_index(self) -> int:
         n_spectra: int
             The number of entries read
         """
-
         #### Check that the spectrum library filename isvalid
         filename = self.filename
 
@@ -216,13 +216,19 @@ def create_index(self) -> int:
 
         with open_stream(filename, 'rt', encoding='utf8') as infile:
             state = 'header'
-            spectrum_buffer = []
+            entry_buffer = deque()
+
             n_spectra = 0
+            n_clusters = 0
+
             start_index = 0
             file_offset = 0
+
             line_beginning_file_offset = 0
             spectrum_file_offset = 0
             spectrum_name = ''
+            current_key = None
+            entry_is_cluster = False
 
             # Required for counting file_offset manually (LF vs CRLF)
             infile.readline()
@@ -243,49 +249,80 @@ def create_index(self) -> int:
                 line = line.rstrip()
                 if state == 'header':
 
-                    if START_OF_SPECTRUM_MARKER.match(line):
+                    if is_spec := START_OF_SPECTRUM_MARKER.match(line):
+                        current_key = int(is_spec.group(1))
                         state = 'body'
                         spectrum_file_offset = line_beginning_file_offset
+                        entry_is_cluster = False
+                    elif is_clus := START_OF_CLUSTER.match(line):
+                        current_key = int(is_clus.group(1))
+                        state = 'body'
+                        spectrum_file_offset = line_beginning_file_offset
+                        entry_is_cluster = True
                     else:
                         continue
+
                 if state == 'body':
                     if len(line) == 0:
                         continue
 
-                    if START_OF_SPECTRUM_MARKER.match(line):
-                        if len(spectrum_buffer) > 0:
-                            if not spectrum_name:
-                                raise ValueError("No spectrum name")
-                            self.index.add(
-                                number=n_spectra + start_index,
-                                offset=spectrum_file_offset,
-                                name=spectrum_name,
-                                analyte=None)
-                            n_spectra += 1
-                            spectrum_buffer = []
-                            #### Commit every now and then
-                            if n_spectra % 10000 == 0:
-                                self.index.commit()
-                                logger.info(f"Processed {file_offset} bytes, {n_spectra} spectra read")
-
+                    is_spec = START_OF_SPECTRUM_MARKER.match(line)
+                    is_clus = START_OF_CLUSTER.match(line)
+                    if (is_spec) or (is_clus):
+                        if len(entry_buffer) > 0:
+                            if not entry_is_cluster:
+                                if not spectrum_name:
+                                    raise ValueError("No spectrum name")
+                                self.index.add(
+                                    number=current_key,
+                                    offset=spectrum_file_offset,
+                                    name=spectrum_name,
+                                    analyte=None)
+                                n_spectra += 1
+                                current_key = int(is_spec.group(1)) if is_spec else int(is_clus.group(1))
+                                #### Commit every now and then
+                                if n_spectra % 10000 == 0:
+                                    self.index.commit()
+                                    logger.info(
+                                        f"Processed {file_offset} bytes, {n_spectra} spectra read, {n_clusters} read")
+                            else:
+                                self.index.add_cluster(number=n_clusters, offset=spectrum_file_offset)
+                                if n_clusters % 10000 == 0:
+                                    self.index.commit()
+                                    logger.info(
+                                        f"Processed {file_offset} bytes, {n_spectra} spectra read, {n_clusters} read")
+                                n_clusters += 1
+                                current_key = int(is_spec.group(1)) if is_spec else int(is_clus.group(1))
+
+                        entry_buffer.clear()
+                        entry_is_cluster = bool(is_clus)
                         spectrum_file_offset = line_beginning_file_offset
                         spectrum_name = ''
-                    if re.match(r'MS:1003061\|spectrum name', line):
-                        spectrum_name = re.match(r'MS:1003061\|spectrum name=(.+)', line).group(1)
-
-                    spectrum_buffer.append(line)
-
-
-            if not spectrum_name:
-                raise ValueError("No spectrum name")
-            self.index.add(
-                number=n_spectra + start_index,
-                offset=spectrum_file_offset,
-                name=spectrum_name,
-                analyte=None)
-            self.index.commit()
-            n_spectra += 1
-            logger.debug(f"Processed {file_offset} bytes, {n_spectra} spectra read")
+                    if re.match(r'MS:1003061\|(?:library )?spectrum name', line):
+                        spectrum_name = re.match(r'MS:1003061\|(?:library )?spectrum name=(.+)', line).group(1)
+
+                    entry_buffer.append(line)
+
+
+            if spectrum_name:
+                self.index.add(
+                    number=current_key,
+                    offset=spectrum_file_offset,
+                    name=spectrum_name,
+                    analyte=None)
+                self.index.commit()
+                n_spectra += 1
+                logger.info(
+                    f"Processed {file_offset} bytes, {n_spectra} spectra read, {n_clusters} read")
+            elif entry_is_cluster:
+                self.index.add_cluster(
+                    number=current_key,
+                    offset=spectrum_file_offset,
+                )
+                self.index.commit()
+                n_clusters += 1
+                logger.info(
+                    f"Processed {file_offset} bytes, {n_spectra} spectra read, {n_clusters} read")
 
             #### Flush the index
             self.index.commit()
@@ -301,7 +338,7 @@ def _buffer_from_stream(self, infile: io.IOBase) -> List:
             if state == 'body':
                 if len(line) == 0:
                     continue
-                if START_OF_SPECTRUM_MARKER.match(line):
+                if START_OF_SPECTRUM_MARKER.match(line) or START_OF_CLUSTER.match(line):
                     if len(spectrum_buffer) > 0:
                         return spectrum_buffer
                 spectrum_buffer.append(line)
@@ -351,14 +388,14 @@ def _parse_attribute_into(self, line: str, store: Attributed,
                 raise ValueError(
                     f"Malformed grouped attribute {line}{line_number_message()}")
         elif "=" in line:
-            name, value = line.split("=")
+            name, value = line.split("=", 1)
             store.add_attribute(name, try_cast(value))
             return True
         else:
             raise ValueError(f"Malformed attribute line {line}{line_number_message()}")
 
     def _parse(self, buffer: Iterable[str], spectrum_index: int = None,
-               start_line_number: int=None) -> Spectrum:
+               start_line_number: int=None) -> Union[Spectrum, SpectrumCluster]:
         spec: Spectrum = self._new_spectrum()
         spec.index = spectrum_index if spectrum_index is not None else -1
         interpretation: Interpretation = None
@@ -416,6 +453,14 @@ def real_line_number_or_nothing():
                     analyte = self._new_analyte(match.group(1))
                     spec.add_analyte(analyte)
                     continue
+
+                elif START_OF_CLUSTER.match(line):
+                    state = STATES.cluster
+                    cluster = self._new_cluster()
+                    match = START_OF_CLUSTER.match(line)
+                    cluster.key = int(match.group(1)) or cluster.index - 1
+                    continue
+
                 self._parse_attribute_into(
                     line, spec, real_line_number_or_nothing, state)
 
@@ -542,7 +587,28 @@ def real_line_number_or_nothing():
                             f"Malformed peak line {line} with {n_tokens} entries{real_line_number_or_nothing()}")
                 else:
                     raise ValueError(f"Malformed peak line {line}{real_line_number_or_nothing()}")
+
             elif state == STATES.cluster:
+                if START_OF_SPECTRUM_MARKER.match(line):
+                    raise ValueError(
+                        f"Clusters should not include spectrum sections {real_line_number_or_nothing()}")
+
+                elif START_OF_PEAKS_MARKER.match(line):
+                    raise ValueError(
+                        f"Clusters should not include peaks {real_line_number_or_nothing()}")
+
+                elif START_OF_INTERPRETATION_MARKER.match(line):
+                    raise ValueError(
+                        f"Clusters should not include interpretation sections {real_line_number_or_nothing()}")
+
+                elif START_OF_ANALYTE_MARKER.match(line):
+                    raise ValueError(
+                        f"Clusters should not include analyte sections {real_line_number_or_nothing()}")
+
+                elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line):
+                    raise ValueError(
+                        f"Clusters should not include interpretation member sections {real_line_number_or_nothing()}")
+
                 self._parse_attribute_into(
                     line, cluster, real_line_number_or_nothing, state)
             else:
@@ -563,16 +629,23 @@ def get_spectrum(self, spectrum_number: int=None,
             if spectrum_name is not None:
                 raise ValueError(
                     "Provide only one of spectrum_number or spectrum_name")
-            offset = self.index.offset_for(spectrum_number)
+            index_record = self.index.record_for(spectrum_number)
+            offset = index_record.offset
         elif spectrum_name is not None:
             index_record = self.index.record_for(spectrum_name)
             offset = index_record.offset
             spectrum_number = index_record.number
 
         buffer = self._get_lines_for(offset)
-        spectrum = self._parse(buffer, spectrum_number)
+        spectrum = self._parse(buffer, index_record.index)
         return spectrum
 
+    def get_cluster(self, cluster_number: int) -> SpectrumCluster:
+        offset = self.index.offset_for_cluster(cluster_number)
+        buffer = self._get_lines_for(offset)
+        cluster = self._parse(buffer, cluster_number)
+        return cluster
+
 
 class TextSpectralLibraryWriter(SpectralLibraryWriterBase):
     file_format = "mzlb.txt"
@@ -675,6 +748,15 @@ def write_spectrum(self, spectrum: Spectrum):
             self.handle.write("\t".join(peak_parts) + "\n")
         self.handle.write("\n")
 
+    def write_cluster(self, cluster: SpectrumCluster):
+        self.handle.write(f"<Cluster={cluster.key}>\n")
+        attribs_of = list(self._filter_attributes(
+            cluster,
+            self._not_entry_key_or_index)
+        )
+        self._write_attributes(attribs_of)
+        self.handle.write("\n")
+
     def close(self):
         self.handle.close()
 
diff --git a/implementations/python/mzlib/cluster.py b/implementations/python/mzlib/cluster.py
index 2d5a3f3..7bba278 100644
--- a/implementations/python/mzlib/cluster.py
+++ b/implementations/python/mzlib/cluster.py
@@ -4,17 +4,20 @@
 
 from typing import Dict,  List
 
-from mzlib.attributes import AttributeManager, AttributeManagedProperty
+from mzlib.attributes import AttributeManager, AttributeManagedProperty, AttributeGroupFacet
 from .utils import ensure_iter, flatten
 
 SIMILAR_SPECTRUM_KEYS = "MS:1003263|similar spectrum keys"
 SIMILAR_SPECTRUM_USI = "MS:1003264|similar spectrum USI"
 
 CLUSTER_KEY = "MS:1003267|spectrum cluster key"
+CLUSTER_SIZE = "MS:1003320|spectrum cluster size"
 
 CLUSTER_MEMBERS_KEYS = "MS:1003268|spectrum cluster member spectrum keys"
 CLUSTER_MEMBER_USI = "MS:1003269|spectrum cluster member USI"
 
+CLUSTER_SUMMARY_STATS = "MS:1003321|summary statistics of clustered spectra"
+
 
 @dataclass
 class ClusterMemberRef:
@@ -36,13 +39,15 @@ def key(self) -> str:
 
 
 class SpectrumCluster(AttributeManager):
-    def __init__(self, attributes: List):
+    def __init__(self, attributes: List=None):
         super().__init__(attributes)
 
     key = AttributeManagedProperty[int](CLUSTER_KEY)
     _member_references = AttributeManagedProperty(CLUSTER_MEMBERS_KEYS)
     _cluster_member_usis = AttributeManagedProperty(CLUSTER_MEMBER_USI)
 
+    size = AttributeManagedProperty[int](CLUSTER_SIZE)
+
     @property
     def members(self) -> List[ClusterMemberRef]:
         internal_refs = [
diff --git a/implementations/python/mzlib/index/base.py b/implementations/python/mzlib/index/base.py
index 13726a5..b14c98e 100644
--- a/implementations/python/mzlib/index/base.py
+++ b/implementations/python/mzlib/index/base.py
@@ -1,12 +1,13 @@
 import warnings
 
-from typing import Collection, Union, Any, List
+from typing import Collection, Iterator, Optional, Union, Any, List
 
 
 class IndexRecordBase:
     __slots__ = ()
 
     number: int
+    index: int
     offset: int
     name: str
 
@@ -21,6 +22,10 @@ def offset_for(self, record_label) -> int:
         record = self.record_for(record_label)
         return record.offset
 
+    def offset_for_cluster(self, record_label) -> int:
+        record = self.record_for_cluster(record_label)
+        return record.offset
+
     def record_for(self, record_label: Union[int, str]) -> IndexRecordBase:
         record = self.search(record_label)
         if isinstance(record, list):
@@ -29,37 +34,98 @@ def record_for(self, record_label: Union[int, str]) -> IndexRecordBase:
             record = record[0]
         return record
 
+    def record_for_cluster(self, record_label: int) -> IndexRecordBase:
+        record = self.search_clusters(record_label)
+        if isinstance(record, list):
+            warnings.warn(
+                f"Multiple records found for {record_label}, using the first")
+            record = record[0]
+        return record
+
     def search(self, i: Union[str, int, slice], **kwargs) -> Union[IndexRecordBase, List[IndexRecordBase]]:
         raise NotImplementedError()
 
+    def search_clusters(self, i: Optional[Union[int, slice]]=None, **kwargs) -> Union[IndexRecordBase, List[IndexRecordBase]]:
+        raise NotImplementedError()
+
     def add(self, number: int, offset: int, name: str, analyte: Any, attributes=None):
+        """
+        Add a new entry to the spectrum index.
+
+        Parameters
+        ----------
+        number : int
+            A numerical identifier for this spectrum.
+        offset : int
+            The offset in the file to reach the spectrum (in bytes if appropriate)
+        name : str,
+            A text identifier for this spectrum.
+        analyte : str, optional
+            A text representation of the analyte for that record
+        attributes : Dict[str, Any], optional
+            A key-value pair collection of this record, currently not supported.
+        """
+        raise NotImplementedError()
+
+    def add_cluster(self, number: int, offset: int, attributes=None):
+        """
+        Add a new entry to the spectrum index.
+
+        Parameters
+        ----------
+        number : int
+            A numerical identifier for this spectrum.
+        offset : int
+            The offset in the file to reach the spectrum (in bytes if appropriate)
+        attributes : Dict[str, Any], optional
+            A key-value pair collection of this record, currently not supported.
+        """
         raise NotImplementedError()
 
     def commit(self):
+        """
+        Commit any index state to disk, if this index supports persistence.
+
+        Has no effect on index types that do not have a persistence functionality.
+        """
         raise NotImplementedError()
 
-    def __iter__(self):
+    def iter_clusters(self) -> Iterator[IndexRecordBase]:
+        raise NotImplementedError()
+
+    def iter_spectra(self) -> Iterator[IndexRecordBase]:
         for i in range(len(self)):
             yield self[i]
 
+    def _get_by_index(self, i: Union[int, slice]) -> Union[IndexRecordBase, List[IndexRecordBase]]:
+        raise NotImplementedError()
+
+    def __iter__(self):
+        return self.iter_spectra()
+
     def __getitem__(self, i: Union[int, str, slice]):
         return self.search(i)
 
     def __len__(self):
         raise NotImplementedError()
 
-    def __contains__(self):
-        raise NotImplementedError()
+    def __contains__(self, key) -> bool:
+        try:
+            hit = self.search(key)
+            return True
+        except (KeyError, IndexError, ValueError):
+            return False
 
     def check_names_unique(self) -> bool:
-        '''Checks that all indexed spectra have unique
+        """
+        Checks that all indexed spectra have unique
         ``spectrum name`` parameters.
 
         Returns
         -------
         bool:
             Whether the spectrum names in the index are unique.
-        '''
+        """
         seen = set()
         for record in self:
             if record.name in seen:
diff --git a/implementations/python/mzlib/index/memory.py b/implementations/python/mzlib/index/memory.py
index f82521f..6262875 100644
--- a/implementations/python/mzlib/index/memory.py
+++ b/implementations/python/mzlib/index/memory.py
@@ -1,46 +1,71 @@
 import warnings
 import logging
 
-from typing import Any, Dict, Optional, List, DefaultDict
+from typing import Any, Dict, Iterator, Optional, List, DefaultDict, Union
 
 from numbers import Integral
 from collections import defaultdict
 
+from mzlib.index.base import IndexRecordBase
+
 from .base import IndexBase, IndexRecordBase
 
 logger = logging.getLogger(__name__)
 logger.addHandler(logging.NullHandler())
 
 
-class IndexRecord(IndexRecordBase):
-    __slots__ = ('number', 'offset', 'name', 'analyte', 'attributes')
+class _IndexAttr:
+    __slots__ = ()
+
+    def get(self, key: str, default=None) -> Any:
+        if self.attributes is not None:
+            return self.attributes.get(key, default)
+        return default
+
+    def set(self, key: str, value: Any):
+        if self.attributes is not None:
+            self.attributes[key] = value
+        else:
+            self.attributes = {key: value}
+
+
+class IndexRecord(IndexRecordBase, _IndexAttr):
+    """
+    A spectrum index record.
+
+    Attributes
+    ----------
+    number : int
+        A numerical identifier for the spectrum
+    offset : int
+        The offset in the file to reach the spectrum (in bytes if appropriate)
+    name : str,
+        A text identifier for this spectrum.
+    analyte : str, optional
+        A text representation of the analyte for that record
+    attributes : Dict[str, Any], optional
+        A key-value pair collection of this record.
+    """
+
+    __slots__ = ('number', 'offset', 'name', 'analyte', 'index', 'attributes')
 
     number: int
     offset: int
     name: str
+    index: int
     analyte: Any
     attributes: Optional[Dict[str, Any]]
 
-    def __init__(self, number, offset, name, analyte, attributes=None):
+    def __init__(self, number, offset, name, analyte, index: int=None, attributes=None):
         self.number = number
         self.offset = offset
         self.name = name
         self.analyte = analyte
+        self.index = index
         self.attributes = attributes
 
-    def get(self, key: str, default=None) -> Any:
-        if self.attributes is not None:
-            return self.attributes.get(key, default)
-        return default
-
-    def set(self, key: str, value: Any):
-        if self.attributes is not None:
-            self.attributes[key] = value
-        else:
-            self.attributes = {key: value}
-
     def __repr__(self):
-        template = f"{self.__class__.__name__}({self.number}, {self.offset}, {self.name}, {self.analyte}, {self.attributes})"
+        template = f"{self.__class__.__name__}({self.number}, {self.offset}, {self.name}, {self.analyte}, {self.index}, {self.attributes})"
         return template
 
     def __eq__(self, other):
@@ -74,26 +99,88 @@ def from_dict(cls, state: Dict) -> 'IndexRecord':
         return cls(**state)
 
 
+class ClusterIndexRecord(IndexRecordBase, _IndexAttr):
+    """
+    A spectrum cluster index record.
+
+    Attributes
+    ----------
+    number : int
+        A numerical identifier for this spectrum.
+    offset : int
+        The offset in the file to reach the spectrum (in bytes if appropriate)
+    attributes : Dict[str, Any], optional
+        A key-value pair collection of this record
+    """
+
+    __slots__ = ('number', 'offset', 'attributes')
+
+    def __init__(self, number, offset, attributes=None):
+        self.number = number
+        self.offset = offset
+        self.attributes = attributes
+
+    def __repr__(self):
+        template = f"{self.__class__.__name__}({self.number}, {self.offset}, {self.attributes})"
+        return template
+
+    def __eq__(self, other):
+        if self.number != other.number:
+            return False
+        elif self.offset != other.offset:
+            return False
+        if bool(self.attributes) == bool(other.attributes):
+            if bool(self.attributes) and self.attributes != other.attributes:
+                return False
+            # Implicitly allow None and empty dictionaries to be the same
+        return True
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __hash__(self):
+        return hash(self.name)
+
+    def to_dict(self) -> Dict:
+        return {
+            k: getattr(self, k, None) for k in self.__slots__
+        }
+
+    @classmethod
+    def from_dict(cls, state: Dict) -> 'ClusterIndexRecord':
+        return cls(**state)
+
+
 class MemoryIndex(IndexBase):
     records: List[IndexRecord]
+    cluster_records: List[ClusterIndexRecord]
     metadata: Dict[str, Any]
 
     _dirty: bool
+    _by_key: Dict[int, IndexRecord]
     _by_name: DefaultDict[str, List[IndexRecord]]
+    _by_attr: DefaultDict[str, DefaultDict[Any, List[IndexRecord]]]
 
     @classmethod
     def from_filename(cls, filename, library=None):
         inst = cls()
         return inst, False
 
-    def __init__(self, records=None, metadata=None):
+    def __init__(self, records=None, cluster_records=None, metadata=None):
         self.records = list(records or [])
+        self.cluster_records = list(cluster_records or [])
         self._by_name = defaultdict(list)
+        self._by_key = {}
         self._by_attr = defaultdict(lambda: defaultdict(list))
         self.metadata = metadata or {}
         self._dirty = True
 
-    def __iter__(self):
+    def iter_clusters(self) -> Iterator[IndexRecordBase]:
+        """Iterate over cluster entries in the index."""
+        return iter(self.cluster_records)
+
+    def iter_spectra(self):
+        """Iterate over spectrum entries in the index."""
         return iter(self.records)
 
     def __len__(self):
@@ -107,11 +194,11 @@ def search(self, i=None, **kwargs):
             raise NotImplementedError()
         if isinstance(i, Integral):
             try:
-                return self.records[i]
+                return self._by_key[i]
             except IndexError as err:
                 raise KeyError(i) from err
         elif isinstance(i, slice):
-            return self.records[i]
+            return [self._by_key[i] for i in range(i.start, i.stop) if i in self._by_key]
         if i in self._by_name:
             records = self._by_name[i]
             if len(records) == 1:
@@ -121,22 +208,74 @@ def search(self, i=None, **kwargs):
         else:
             raise KeyError(i)
 
+    def search_clusters(self, i=None, **kwargs):
+        if self._dirty:
+            self._update_index()
+        if i is None and kwargs:
+            # Executing attribute query
+            raise NotImplementedError()
+        if isinstance(i, Integral):
+            try:
+                return self.cluster_records[i]
+            except IndexError as err:
+                raise KeyError(i) from err
+        elif isinstance(i, slice):
+            return self.cluster_records[i]
+
     def __getitem__(self, i):
-        return self.search(i)
+        return self._get_by_index(i)
+
+    def _get_by_index(self, i: Union[int, slice]) -> Union[IndexRecord, List[IndexRecord]]:
+        return self.records[i]
 
     def _update_index(self):
         self.records.sort(key=lambda x: x.number)
 
         self._by_name = defaultdict(list)
         for record in self:
+            self._by_key[record.number] = record
             self._by_name[record.name].append(record)
-
         self._dirty = False
 
     def add(self, number: int, offset: int, name: str, analyte: Any, attributes=None):
-        record = IndexRecord(number, offset, name, analyte, attributes)
+        """
+        Add a new entry to the spectrum index.
+
+        Parameters
+        ----------
+        number : int
+            A numerical identifier for this spectrum.
+        offset : int
+            The offset in the file to reach the spectrum (in bytes if appropriate)
+        name : str,
+            A text identifier for this spectrum.
+        analyte : str, optional
+            A text representation of the analyte for that record
+        attributes : Dict[str, Any], optional
+            A key-value pair collection of this record, currently not supported.
+        """
+        n = len(self.records)
+        record = IndexRecord(number, offset, name, analyte, n, attributes)
         self.records.append(record)
         self._dirty = True
 
+    def add_cluster(self, number: int, offset: int, attributes=None):
+        """
+        Add a new entry to the spectrum index.
+
+        Parameters
+        ----------
+        number : int
+            A numerical identifier for this spectrum.
+        offset : int
+            The offset in the file to reach the spectrum (in bytes if appropriate)
+        attributes : Dict[str, Any], optional
+            A key-value pair collection of this record, currently not supported.
+        """
+        record = ClusterIndexRecord(number, offset, attributes)
+        self.cluster_records.append(record)
+        self._dirty = True
+
+
     def commit(self):
         self._update_index()
diff --git a/implementations/python/mzlib/index/sql.py b/implementations/python/mzlib/index/sql.py
index b1782d9..c7829e6 100644
--- a/implementations/python/mzlib/index/sql.py
+++ b/implementations/python/mzlib/index/sql.py
@@ -2,8 +2,11 @@
 import numbers
 import pathlib
 import logging
+from typing import Iterator, List, Union
 
 from sqlalchemy import Column, ForeignKey, Integer, Float, String, DateTime, Text, LargeBinary
+
+from mzlib.index.base import IndexRecordBase
 try: # For SQLAlchemy 2.0
     from sqlalchemy.orm import declarative_base
 except ImportError:
@@ -38,12 +41,23 @@ class SpectrumLibraryIndexRecord(Base):
     number = Column(Integer, nullable=False, index=True)
     offset = Column(Integer, nullable=False)
     name = Column(String(1024), nullable=False)
+    index = Column(Integer, nullable=False, index=True)
     analyte = Column(String(2014), nullable=True)
 
     def __repr__(self):
         return f"{self.__class__.__name__}({self.number}, {self.offset}, {self.name}, {self.analyte})"
 
 
+class ClusterSpectrumLibraryIndexRecord(Base):
+    __tablename__ = 'cluster_spectrum_library_index_record'
+    id = Column(Integer, primary_key=True)
+    number = Column(Integer, nullable=False, index=True)
+    offset = Column(Integer, nullable=False)
+
+    def __repr__(self):
+        return f"{self.__class__.__name__}({self.number}, {self.offset}, {self.name}, {self.analyte})"
+
+
 class SQLIndex(IndexBase):
     extension = '.splindex'
 
@@ -77,6 +91,8 @@ def __init__(self, filename):
         self.index_filename = self.filename + self.extension
         self._cache = None
         self.connect()
+        self._size = len(self)
+        self._size_uncommitted = 0
 
     def connect(self, create=None):
         filename = self.index_filename
@@ -94,21 +110,78 @@ def connect(self, create=None):
         self.engine = engine
         self._cache = None
 
-    def add(self, number, offset, name, analyte, attributes=None):
-        record = SpectrumLibraryIndexRecord(number=number, offset=offset, name=name, analyte=analyte)
+    def add(self, number, offset, name, analyte=None, attributes=None):
+        """
+        Add a new entry to the spectrum index.
+
+        Parameters
+        ----------
+        number : int
+            A numerical identifier for this spectrum.
+        offset : int
+            The offset in the file to reach the spectrum (in bytes if appropriate)
+        name : str,
+            A text identifier for this spectrum.
+        analyte : str, optional
+            A text representation of the analyte for that record
+        attributes : Dict[str, Any], optional
+            A key-value pair collection of this record, currently not supported.
+        """
+        record = SpectrumLibraryIndexRecord(number=number, offset=offset, name=name,
+                                            index=self._size + self._size_uncommitted, analyte=analyte)
+        self._size_uncommitted += 1
+        if attributes is not None:
+            raise NotImplementedError("Record attribute storage is not implemented")
+        self.session.add(record)
+
+    def add_cluster(self, number: int, offset: int, attributes=None):
+        """
+        Add a new entry to the spectrum index.
+
+        Parameters
+        ----------
+        number : int
+            A numerical identifier for this spectrum.
+        offset : int
+            The offset in the file to reach the spectrum (in bytes if appropriate)
+        attributes : Dict[str, Any], optional
+            A key-value pair collection of this record, currently not supported.
+        """
+        record = ClusterSpectrumLibraryIndexRecord(number=number, offset=offset)
         if attributes is not None:
             raise NotImplementedError("Record attribute storage is not implemented")
         self.session.add(record)
 
     def commit(self):
+        """Persist any new entries to disk."""
+        self._size += self._size_uncommitted
+        self._size_uncommitted = 0
         self.session.commit()
 
-    def __iter__(self):
-        for record in self.session.query(SpectrumLibraryIndexRecord).order_by(SpectrumLibraryIndexRecord.number).yield_per(10000):
+    def iter_clusters(self) -> Iterator[IndexRecordBase]:
+        """Iterate over cluster entries in the index."""
+        for record in self.session.query(ClusterSpectrumLibraryIndexRecord).order_by(
+                ClusterSpectrumLibraryIndexRecord.number).yield_per(10000):
+            yield record
+
+    def iter_spectra(self):
+        """Iterate over spectrum entries in the index."""
+        for record in self.session.query(SpectrumLibraryIndexRecord).order_by(
+                SpectrumLibraryIndexRecord.number).yield_per(10000):
             yield record
 
     def __getitem__(self, i):
-        return self.search(i)
+        return self._get_by_index(i)
+
+    def _get_by_index(self, i: Union[int, slice]) -> Union[SpectrumLibraryIndexRecord, List[SpectrumLibraryIndexRecord]]:
+        if isinstance(i, slice):
+            records = self.session.query(SpectrumLibraryIndexRecord).slice(i.start, i.stop).all()
+            if i.step:
+                raise NotImplementedError()
+            return records
+        else:
+            record = self.session.query(SpectrumLibraryIndexRecord).offset(i).limit(1).first()
+            return record
 
     def __len__(self):
         value = self.session.query(func.count(SpectrumLibraryIndexRecord.id)).scalar()
@@ -148,3 +221,30 @@ def search(self, i, **kwargs):
             else:
                 return records
 
+    def search_clusters(self, i, **kwargs):
+        if i is None and kwargs:
+            # Executing attribute query
+            raise NotImplementedError()
+        if isinstance(i, numbers.Integral):
+            if i < 0:
+                i = len(self) + i
+            if self._cache is not None and self._cache.number == i:
+                return self._cache
+            records = self.session.query(ClusterSpectrumLibraryIndexRecord).filter(
+                ClusterSpectrumLibraryIndexRecord.number == i).all()
+
+            if len(records) == 1:
+                return records[0]
+            elif len(records) == 0:
+                raise IndexError(i)
+            else:
+                raise ValueError(f"Too many records found for spectrum number {i}")
+        elif isinstance(i, slice):
+            start = i.start or 0
+            end = i.stop or float('inf')
+            records = self.session.query(ClusterSpectrumLibraryIndexRecord).filter(
+                ClusterSpectrumLibraryIndexRecord.number >= start,
+                ClusterSpectrumLibraryIndexRecord.number < end).all()
+            return records
+        else:
+            raise NotImplementedError()
diff --git a/implementations/python/mzlib/spectrum.py b/implementations/python/mzlib/spectrum.py
index 8f363f1..a554a07 100644
--- a/implementations/python/mzlib/spectrum.py
+++ b/implementations/python/mzlib/spectrum.py
@@ -15,9 +15,9 @@
 
 #A class that holds data for each spectrum that is read from the SpectralLibrary class
 
-SPECTRUM_NAME = "MS:1003061|spectrum name"
-LIBRARY_ENTRY_KEY = "MS:1003237|library spectrum key"
-LIBRARY_ENTRY_INDEX = "MS:1003062|library spectrum index"
+SPECTRUM_NAME = "MS:1003061|library spectrum name"
+LIBRARY_SPECTRUM_KEY = "MS:1003237|library spectrum key"
+LIBRARY_SPECTRUM_INDEX = "MS:1003062|library spectrum index"
 PRECURSOR_MZ = "MS:1003208|experimental precursor monoisotopic m/z"
 CHARGE_STATE = "MS:1000041|charge state"
 
@@ -66,8 +66,8 @@ def __init__(self, attributes=None, peak_list=None, analytes=None,
         self.interpretations = interpretations
 
     name = AttributeManagedProperty[str](SPECTRUM_NAME)
-    key = AttributeManagedProperty[int](LIBRARY_ENTRY_KEY)
-    index = AttributeManagedProperty[int](LIBRARY_ENTRY_INDEX)
+    key = AttributeManagedProperty[int](LIBRARY_SPECTRUM_KEY)
+    index = AttributeManagedProperty[int](LIBRARY_SPECTRUM_INDEX)
 
     precursor_mz = AttributeListManagedProperty[float]([PRECURSOR_MZ, "MS:1000744|selected ion m/z"])
     precursor_charge = AttributeManagedProperty[int](CHARGE_STATE)
@@ -123,9 +123,15 @@ def __str__(self):  # pragma: no cover
 
     def write(self, format="text", **kwargs):  # pragma: no cover
         """
-        write - Write out the spectrum in any of the supported formats
-        """
+        Write out the spectrum in any of the supported formats
 
+        Parameters
+        ----------
+        format : str
+            The name of the format to write in
+        **kwargs
+            Passed to implementation
+        """
         #### Set a buffer to fill with string data
         buffer = ''
 
diff --git a/implementations/python/mzlib/spectrum_library.py b/implementations/python/mzlib/spectrum_library.py
index a47604f..7e82470 100644
--- a/implementations/python/mzlib/spectrum_library.py
+++ b/implementations/python/mzlib/spectrum_library.py
@@ -1,15 +1,9 @@
 #!/usr/bin/env python3
-from __future__ import print_function
-import sys
-def eprint(*args, **kwargs):
-    print(*args, file=sys.stderr, **kwargs)
-
-import re
-import timeit
 import os
 import pathlib
 
 from typing import Type, List, Union
+from mzlib.cluster import SpectrumCluster
 
 from mzlib.spectrum_library_index import SpectrumLibraryIndex
 from mzlib.spectrum import Spectrum
@@ -60,9 +54,14 @@ def __init__(self, identifier=None, filename=None, format=None, index_type=None)
 
         Parameters
         ----------
+        identifier : str, optional
+            A universal identifier for a hosted spectral library to fetch.
+        filename : str, os.PathLike, or io.IOBase, optional
+            A path-like or file-like object that holds a spectral library to read.
         format : string
             Name of the format for the current encoding of the library.
-
+        index_type : Type[:class:`~.mzlib.index.base.IndexBase`]
+            The type of index to preferentially construct.
         """
         self.backend = None
         self.identifier = identifier
@@ -99,7 +98,10 @@ def _requires_backend(self):
     #### Define getter/setter for attribute identifier
     @property
     def identifier(self):
-        return(self._identifier)
+        if self._identifier is None:
+            if self._backend_initialized():
+                return self.backend.identifier
+        return self._identifier
 
     @identifier.setter
     def identifier(self, identifier):
@@ -148,11 +150,22 @@ def read(self):
         self._requires_backend()
         return self.backend.read()
 
-    def write(self, destination, format: str=None):
-        """Write the library to disk
+    def write(self, destination, format: str=None, **kwargs):
+        """
+        Write the library to disk.
+
+        Parameters
+        ----------
+        destination : str, os.PathLike, or io.IOBase
+            The path or stream to write the library to.
+        format : str, Type, or Callable
+            The name of the format or a callable object that returns
+            a :class:`~.SpectrumLibraryWriterBase`.
+        **kwargs
+            Passed to implementation.
         """
         filename = destination
-        if not isinstance(filename, (str, pathlib.Path)):
+        if not isinstance(filename, (str, pathlib.Path, os.PathLike)):
             filename = getattr(destination, "name", None)
 
         if format is None and filename is not None:
@@ -167,7 +180,7 @@ def write(self, destination, format: str=None):
         if writer_type is None:
             raise ValueError(
                 f"Could not find a format writer from file name {filename} or format {format}")
-        writer = writer_type(destination)
+        writer = writer_type(destination, **kwargs)
         if self._backend_initialized():
             with writer:
                 writer.write_library(self.backend)
@@ -176,7 +189,8 @@ def write(self, destination, format: str=None):
             writer.close()
 
     def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Spectrum:
-        """Retrieve a single spectrum from the library.
+        """
+        Retrieve a single spectrum from the library.
 
         Parameters
         ----------
@@ -192,6 +206,10 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp
         self._requires_backend()
         return self.backend.get_spectrum(spectrum_number, spectrum_name)
 
+    def get_cluster(self, cluster_number: int) -> SpectrumCluster:
+        self._requires_backend()
+        return self.backend.get_cluster(cluster_number)
+
     def find_spectra(self, specification, **query_keys) -> List[Spectrum]:
         """
         find_spectra - Return a list of spectra given query constraints
@@ -214,7 +232,8 @@ def __iter__(self):
         return iter([])
 
     def add_attribute(self, key, value, group_identifier=None):
-        """Add an attribute to the library level attributes store.
+        """
+        Add an attribute to the library level attributes store.
 
         Parameters
         ----------
@@ -230,7 +249,8 @@ def add_attribute(self, key, value, group_identifier=None):
         return self.backend.add_attribute(key, value, group_identifier=group_identifier)
 
     def get_attribute(self, key, group_identifier=None):
-        """Get the value or values associated with a given
+        """
+        Get the value or values associated with a given
         attribute key from the library level attribute store.
 
         Parameters
@@ -249,7 +269,8 @@ def get_attribute(self, key, group_identifier=None):
         return self.backend.get_attribute(key, group_identifier=group_identifier)
 
     def remove_attribute(self, key, group_identifier=None):
-        """Remove the value or values associated with a given
+        """
+        Remove the value or values associated with a given
         attribute key from the library level attribute store.
 
         This rebuilds the entire store, which may be expensive.
@@ -266,7 +287,8 @@ def remove_attribute(self, key, group_identifier=None):
         return self.backend.remove_attribute(key, group_identifier=group_identifier)
 
     def has_attribute(self, key):
-        """Test for the presence of a given attribute in the library
+        """
+        Test for the presence of a given attribute in the library
         level store.
 
         Parameters
diff --git a/implementations/python/pyproject.toml b/implementations/python/pyproject.toml
new file mode 100644
index 0000000..4b2cb8c
--- /dev/null
+++ b/implementations/python/pyproject.toml
@@ -0,0 +1,8 @@
+[tool.ruff]
+target-version = "py38"
+line-length = 120
+select = ["D"]
+ignore = ["D415", "D400", "D212", "D205", "D203", "D105"]
+
+[tool.ruff.pydocstyle]
+convention = "numpy"
\ No newline at end of file
diff --git a/implementations/python/tests/test_data/bad_peak_annotations.mzlb.txt b/implementations/python/tests/test_data/bad_peak_annotations.mzlb.txt
index 74029d3..80b01f4 100644
--- a/implementations/python/tests/test_data/bad_peak_annotations.mzlb.txt
+++ b/implementations/python/tests/test_data/bad_peak_annotations.mzlb.txt
@@ -4,7 +4,7 @@ MS:1003188|library name=tests/test_data/chinese_hamster_hcd_selected_head.msp
 <AttributeSet Analyte=all>
 <AttributeSet Interpretation=all>
 <Spectrum=1>
-MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
+MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=855.4538
@@ -137,7 +137,7 @@ MS:1003169|proforma peptidoform sequence=AAAAC[Carbamidomethyl]ALTPGPLADLAAR
 1496.7792	11918.3	q15/-6.5ppm
 
 <Spectrum=1>
-MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
+MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=855.4538
diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json
index f329cc0..33d4ff7 100644
--- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json
+++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json
@@ -104,7 +104,7 @@
         },
         {
           "accession": "MS:1003061",
-          "name": "spectrum name",
+          "name": "library spectrum name",
           "value": "AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV"
         },
         {
@@ -611,7 +611,7 @@
         },
         {
           "accession": "MS:1003061",
-          "name": "spectrum name",
+          "name": "library spectrum name",
           "value": "AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV"
         },
         {
@@ -1469,7 +1469,7 @@
         },
         {
           "accession": "MS:1003061",
-          "name": "spectrum name",
+          "name": "library spectrum name",
           "value": "AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV"
         },
         {
@@ -2081,7 +2081,7 @@
         },
         {
           "accession": "MS:1003061",
-          "name": "spectrum name",
+          "name": "library spectrum name",
           "value": "AAAAGSTSVKPIFSR/2_0_44eV"
         },
         {
@@ -2660,7 +2660,7 @@
         },
         {
           "accession": "MS:1003061",
-          "name": "spectrum name",
+          "name": "library spectrum name",
           "value": "AAAAGSTSVKPIFSR/3_0_28eV"
         },
         {
@@ -3389,7 +3389,7 @@
         },
         {
           "accession": "MS:1003061",
-          "name": "spectrum name",
+          "name": "library spectrum name",
           "value": "AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV"
         },
         {
@@ -3839,7 +3839,7 @@
         },
         {
           "accession": "MS:1003061",
-          "name": "spectrum name",
+          "name": "library spectrum name",
           "value": "AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV"
         },
         {
diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt
index 43cc2f9..1599ab0 100644
--- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt
+++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt
@@ -4,7 +4,7 @@ MS:1003188|library name=tests/test_data/chinese_hamster_hcd_selected_head.msp
 <AttributeSet Analyte=all>
 <AttributeSet Interpretation=all>
 <Spectrum=1>
-MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
+MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=855.4538
@@ -135,7 +135,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4
 1496.7792	11918.3	y15/-6.5ppm
 
 <Spectrum=2>
-MS:1003061|spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV
+MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=855.4538
@@ -383,7 +383,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5
 1628.3004	719.6	?
 
 <Spectrum=3>
-MS:1003061|spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV
+MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=1207.1672
@@ -549,7 +549,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 1980.9857	3567.9	?
 
 <Spectrum=4>
-MS:1003061|spectrum name=AAAAGSTSVKPIFSR/2_0_44eV
+MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=731.9043
@@ -704,7 +704,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1
 1465.9423	113.63	?
 
 <Spectrum=5>
-MS:1003061|spectrum name=AAAAGSTSVKPIFSR/3_0_28eV
+MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=3
 MS:1000744|selected ion m/z=488.2719
@@ -909,7 +909,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 1469.9915	925.5	?
 
 <Spectrum=6>
-MS:1003061|spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV
+MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=830.8834
@@ -1021,7 +1021,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6
 1670.2889	140.136	?
 
 <Spectrum=7>
-MS:1003061|spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV
+MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
 MS:1000744|selected ion m/z=830.8834
diff --git a/implementations/python/tests/test_data/clusters_example.mzlb b/implementations/python/tests/test_data/clusters_example.mzlb
new file mode 100644
index 0000000..6b94bd6
--- /dev/null
+++ b/implementations/python/tests/test_data/clusters_example.mzlb
@@ -0,0 +1,142 @@
+<mzSpecLib 1.0>
+<Cluster=1>
+MS:1003320|spectrum cluster size=6
+MS:1003268|spectrum cluster member spectrum keys=1,6,23,63,89
+MS:1003269|spectrum cluster member USI=mzspec:PXD000561:Adult_Frontalcortex_bRP_Elite_85_f09:scan:17555
+[1]MS:1003321|summary statistics of clustered spectra=MS:1003304|spectral dot product
+[1]MS:1003176|attribute mean=0.7
+[2]MS:1003321|summary statistics of clustered spectra=MS:1003208|experimental precursor monoisotopic m/z
+[2]MS:1003176|attribute mean=1029.05
+[2]MS:1003177|attribute standard deviation=0.41
+MS:1003322|spectrum cluster best representative=63
+
+<Spectrum=1>
+MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
+MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
+MS:1000041|charge state=2
+MS:1000744|selected ion m/z=855.4538
+MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
+[1]MS:1000045|collision energy=46
+[1]UO:0000000|unit=UO:0000266|electronvolt
+MS:1003057|scan number=5538
+MS:1003203|constituent spectrum file="CHO-K1_bRPLC_C1.RAW.FT.hcd.ch.MGF"
+MS:1003070|number of replicate spectra used=1
+MS:1003069|number of replicate spectra available=2
+MS:1000002|sample name="jhu_cho_brplc_cam"
+MS:1000028|detector resolution=7500
+[2]MS:1000828|isolation window lower offset=0.95
+[2]UO:0000000|unit=MS:1000040|m/z
+[3]MS:1000829|isolation window upper offset=0.95
+[3]UO:0000000|unit=MS:1000040|m/z
+MS:1003085|previous MS1 scan precursor intensity=8799173.32
+MS:1003086|precursor apex intensity=25273307.5
+MS:1000512|filter string="FTMS + p NSI d Full ms2 855.96@hcd35.00 [140.00-1725.00]"
+MS:1003059|number of peaks=87
+[4]MS:1003275|other attribute name=Se
+[4]MS:1003276|other attribute value=1(^G1:sc=8.13346e-015)
+<Analyte=1>
+MS:1000224|molecular mass=1710.9076
+MS:1000888|stripped peptide sequence=AAAACALTPGPLADLAAR
+[1]MS:1001975|delta m/z=1.4
+[1]UO:0000000|unit=UO:0000169|parts per million
+MS:1003208|experimental precursor monoisotopic m/z=855.455
+MS:1003169|proforma peptidoform sequence=AAAAC[Carbamidomethyl]ALTPGPLADLAAR
+MS:1001117|theoretical mass=1708.89303961159
+[2]MS:1003048|number of enzymatic termini=2
+[2]MS:1001045|cleavage agent name=MS:1001251|Trypsin
+[2]MS:1001112|n-terminal flanking residue=R
+[2]MS:1001113|c-terminal flanking residue=L
+[2]MS:1000885|protein accession="tr|G3IJB9|G3IJB9_CRIGR UDP-N-acetylhexosamine pyrophosphorylase-like protein 1 OS=Cricetulus griseus GN=I79_023952 PE=4 SV=1"
+<Interpretation=1>
+MS:1003079|total unassigned intensity fraction=0.2848
+MS:1003080|top 20 peak unassigned intensity fraction=0.1879
+MS:1003289|intensity of highest unassigned peak=0.45
+MS:1003290|number of unassigned peaks among top 20 peaks=4
+<Peaks>
+143.0823	14791.5	b2/5.6ppm
+153.2575	5008.6	?
+159.0917	11531.8	?
+162.5977	5804.6	?
+169.0972	12931.0	?
+175.1193	18211.1	y1/2.0ppm,IR/2.0ppm
+194.0699	6011.0	?
+199.1074	6737.4	?
+201.7527	5576.3	?
+212.1051	13786.1	?
+214.1195	82269.8	b3/4.1ppm
+229.1552	7852.0	?
+232.0764	28555.9	?
+249.1071	4670.7	?
+257.1633	15001.9	a4/9.7ppm
+276.4497	5288.0	?
+283.1411	24965.9	?
+283.3943	5090.5	?
+285.1567	48285.5	b4/3.4ppm
+302.1834	10558.9	?
+303.114	52736.6	?
+317.1891	6585.3	y3/-12.9ppm
+345.1593	18050.6	?
+354.1793	31400.1	b5-NH2-CO-CH2SH/5.9ppm
+356.1916	7124.1	?
+370.3776	6139.7	?
+371.6662	11983.3	?
+374.1505	34930.7	?
+416.2029	8953.6	?
+417.1931	16940.5	a5/3.9ppm
+425.2171	15449.8	b6-NH2-CO-CH2SH/6.6ppm
+428.2024	8874.5	m5:8-H2O/14.4ppm
+428.249	6831.1	?
+430.278	30118.6	y4/1.8ppm
+445.1886	57883.2	b5/5.0ppm
+446.2049	8868.2	m5:8/-4.2ppm
+457.1753	5403.5	?
+467.2682	6117.4	?
+469.2735	6906.4	?
+471.2018	9435.3	?
+487.2299	15480.3	?
+488.2367	13813.7	a6/16.6ppm
+490.7795	12739.9	y10^2/-0.6ppm
+491.2839	6651.0	y10+i^2/5.4ppm
+495.9153	6370.5	?
+499.2364	5273.9	m4:8-H2O/6.2ppm
+516.2236	49862.4	b6/0.2ppm
+528.2323	9881.3	?
+530.2752	10333.1	?
+541.3164	7041.3	?
+545.308	6833.8	y5/7.0ppm
+558.2726	23958.7	?
+570.3232	6006.3	?
+601.3148	11919.6	a7/3.6ppm
+617.2834	5841.2	?
+617.3372	4342.1	?
+629.3052	22419.3	b7/-3.7ppm
+641.3058	11147.2	m2:8-H2O/-2.7ppm
+659.3259	9164.2	m2:8/11.8ppm
+685.3383	6861.0	?
+712.3472	13798.2	b8-H2O/3.6ppm
+730.3578	16426.3	b8/3.5ppm
+826.4765	18892.5	y8/-2.0ppm
+855.017	7062.7	?
+855.5167	249849.0	?
+856.523	57946.3	?
+883.4993	29383.7	y9/-0.3ppm
+922.4447	6413.2	?
+934.4906	6605.4	?
+963.5431	13545.1	y10-NH3/17.9ppm
+980.556	559065.0	y10/3.7ppm
+981.5587	216762.0	y10+i/3.6ppm
+1037.5859	14218.2	?
+1038.5812	9574.2	?
+1067.8983	6611.6	?
+1081.6041	179858.0	y11/3.8ppm
+1082.6077	79540.9	y11+i/4.5ppm
+1143.0283	6099.8	?
+1194.6899	106907.0	y12/4.9ppm
+1195.6869	50339.1	y12+i/-0.1ppm
+1265.7273	83029.7	y13/4.8ppm
+1266.7281	42164.7	y13+i/3.2ppm
+1390.1328	5531.5	?
+1395.6904	8549.2	?
+1425.7538	51400.1	y14/1.4ppm
+1426.7601	40643.8	y14+i/3.8ppm
+1496.7792	11918.3	y15/-6.5ppm
diff --git a/implementations/python/tests/test_data/complex_interpretations.mzlb.txt b/implementations/python/tests/test_data/complex_interpretations.mzlb.txt
index 1617e91..bc547e5 100644
--- a/implementations/python/tests/test_data/complex_interpretations.mzlb.txt
+++ b/implementations/python/tests/test_data/complex_interpretations.mzlb.txt
@@ -1,5 +1,5 @@
 <Spectrum=1>
-MS:1003061|spectrum name=Test
+MS:1003061|library spectrum name=Test
 MS:1000744|selected ion m/z=880.8902
 <Analyte=1>
 MS:1000888|peptidoform=DSDDVPM[Oxidation]VLVGNKCDLAAR
diff --git a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json
index 4c77e4d..8db18cf 100644
--- a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json
+++ b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json
@@ -49,7 +49,7 @@
         },
         {
           "accession": "MS:1003061",
-          "name": "spectrum name",
+          "name": "library spectrum name",
           "value": "Test"
         },
         {
diff --git a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.txt b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.txt
index 3775eb3..5d648a9 100644
--- a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.txt
+++ b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.txt
@@ -1,5 +1,5 @@
 <Spectrum=1>
-MS:1003061|spectrum name=Test
+MS:1003061|library spectrum name=Test
 MS:1000744|selected ion m/z=880.8902
 <Analyte=1>
 MS:1000888|peptidoform=DSDDVPM[Oxidation]VLVGNKCDLAAR
diff --git a/implementations/python/tests/test_index.py b/implementations/python/tests/test_index.py
index 01bb2a6..a10459b 100644
--- a/implementations/python/tests/test_index.py
+++ b/implementations/python/tests/test_index.py
@@ -28,7 +28,7 @@ def test_sequence_behavior(self):
         index = self._make_index(lib)
         assert len(index) == 7
         record = index[3]
-        assert record.number == 3
+        assert record.number == 4
         assert record.name == "AAAAGSTSVKPIFSR/2_0_44eV"
 
 
diff --git a/implementations/python/tests/test_library_backend.py b/implementations/python/tests/test_library_backend.py
index a29f2a9..456c7b7 100644
--- a/implementations/python/tests/test_library_backend.py
+++ b/implementations/python/tests/test_library_backend.py
@@ -22,7 +22,7 @@ def test_sequence_behavior(self):
         assert len(lib) == 7
         spec = lib[3]
         assert spec.get_attribute(
-            "MS:1003061|spectrum name") == "AAAAGSTSVKPIFSR/2_0_44eV"
+            "MS:1003061|library spectrum name") == "AAAAGSTSVKPIFSR/2_0_44eV"
 
     # TODO: Fix clipping in _buffer_from_stream first
     # def test_iteration(self):
diff --git a/implementations/python/tests/test_spectrum.py b/implementations/python/tests/test_spectrum.py
index 7e7288e..534486a 100644
--- a/implementations/python/tests/test_spectrum.py
+++ b/implementations/python/tests/test_spectrum.py
@@ -18,15 +18,15 @@ def get_spectrum(self, index):
         return library.get_spectrum(index)
 
     def test_write(self):
-        spectrum = self.get_spectrum(0)
+        spectrum = self.get_spectrum(1)
         buffer = spectrum.write('text')
         lines = buffer.splitlines()
         n_lines = len(lines)
         assert n_lines == 131
-        assert buffer.startswith("<Spectrum=1>\nMS:1003061|spectrum name")
+        assert buffer.startswith("<Spectrum=1>\nMS:1003061|library spectrum name")
 
     def test_equality(self):
-        spectrum = self.get_spectrum(0)
-        spectrum2 = self.get_spectrum(0)
+        spectrum = self.get_spectrum(1)
+        spectrum2 = self.get_spectrum(1)
         assert spectrum == spectrum2
 

From f1cb7cb3daa3998a90cfc70bcc8e642845cae7e2 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Thu, 11 May 2023 22:51:31 -0400
Subject: [PATCH 05/24] Fix up slicing

---
 implementations/python/mzlib/backends/bibliospec.py | 2 +-
 implementations/python/mzlib/index/memory.py        | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/implementations/python/mzlib/backends/bibliospec.py b/implementations/python/mzlib/backends/bibliospec.py
index e2c5580..dfba98e 100644
--- a/implementations/python/mzlib/backends/bibliospec.py
+++ b/implementations/python/mzlib/backends/bibliospec.py
@@ -58,7 +58,7 @@ def __getitem__(self, i):
         if isinstance(i, int):
             return self.search(i + 1)
         elif isinstance(i, slice):
-            return [self.search(j + 1) for j in range(i.start, i.stop, i.step)]
+            return [self.search(j + 1) for j in range(i.start or 0, i.stop or len(self), i.step or 1)]
         else:
             raise TypeError(f"Cannot index {self.__class__.__name__} with {i}")
 
diff --git a/implementations/python/mzlib/index/memory.py b/implementations/python/mzlib/index/memory.py
index 6262875..e401a62 100644
--- a/implementations/python/mzlib/index/memory.py
+++ b/implementations/python/mzlib/index/memory.py
@@ -198,7 +198,13 @@ def search(self, i=None, **kwargs):
             except IndexError as err:
                 raise KeyError(i) from err
         elif isinstance(i, slice):
-            return [self._by_key[i] for i in range(i.start, i.stop) if i in self._by_key]
+            start = i.start
+            stop = i.stop
+            if start is None:
+                start = min(self._by_key) if self._by_key else 0
+            if stop is None:
+                stop = max(self._by_key) if self._by_key else 0
+            return [self._by_key[i] for i in range(start, stop) if i in self._by_key]
         if i in self._by_name:
             records = self._by_name[i]
             if len(records) == 1:

From 778cb32a03c4aa2fe63314b0352e964bb12bcc3e Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Fri, 12 May 2023 09:02:31 -0400
Subject: [PATCH 06/24] Fix cluster retrieval in JSON backend

---
 implementations/python/mzlib/backends/json.py | 40 ++++++++++++++-----
 implementations/python/mzlib/backends/text.py |  8 ++--
 .../python/mzlib/spectrum_library.py          | 12 ++++++
 3 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/implementations/python/mzlib/backends/json.py b/implementations/python/mzlib/backends/json.py
index 336dc86..15fd462 100644
--- a/implementations/python/mzlib/backends/json.py
+++ b/implementations/python/mzlib/backends/json.py
@@ -1,5 +1,4 @@
 import io
-import enum
 import json
 import logging
 import warnings
@@ -7,7 +6,6 @@
 from typing import Any, Iterable, List, Dict, Mapping, Union
 
 from pathlib import Path
-from xml.dom.minidom import Attr
 from mzlib.cluster import SpectrumCluster
 
 from mzlib.index import MemoryIndex
@@ -17,6 +15,7 @@
 from mzlib.spectrum import Spectrum
 
 from .base import SpectralLibraryBackendBase, SpectralLibraryWriterBase, FORMAT_VERSION_TERM, AttributeSetTypes
+from .utils import open_stream
 
 
 logger = logging.getLogger(__name__)
@@ -57,10 +56,11 @@ def __init__(self, filename, index_type=None, read_metadata=True):
         self.buffer = {}
         self._load_buffer(self.filename)
         self.attributes = AttributeManager()
-        self._fill_attributes(self.buffer.get(LIBRARY_METADATA_KEY), self.attributes)
         self.index, was_initialized = index_type.from_filename(self.filename)
         if not was_initialized:
             self.create_index()
+        if read_metadata:
+            self.read_header()
 
     @classmethod
     def guess_from_filename(cls, filename: Union[str, Path, io.FileIO, Mapping]) -> bool:
@@ -68,24 +68,25 @@ def guess_from_filename(cls, filename: Union[str, Path, io.FileIO, Mapping]) ->
             return SPECTRA_KEY in filename and LIBRARY_METADATA_KEY in filename
         return super(JSONSpectralLibrary, cls).guess_from_filename(filename)
 
-    def _load_buffer(self, filename_or_stream):
-        if isinstance(filename_or_stream, dict):
+    def _load_buffer(self, filename_or_stream: Union[str, Path, io.FileIO, Mapping]):
+        if isinstance(filename_or_stream, Mapping):
             self.buffer = filename_or_stream
         else:
             if hasattr(filename_or_stream, 'read'):
                 self.handle = filename_or_stream
             else:
-                self.handle = open(filename_or_stream, 'rt')
+                self.handle = open_stream(filename_or_stream, 'rt')
             self.buffer = json.load(self.handle)
             self.handle.close()
 
     def read_header(self) -> bool:
         if self.buffer:
-            pass
+            self._fill_attributes(self.buffer.get(LIBRARY_METADATA_KEY), self.attributes)
+            return True
         return False
 
     def create_index(self):
-        for i, record in enumerate(self.buffer[SPECTRA_KEY]):
+        for i, record in enumerate(self.buffer.get(SPECTRA_KEY, [])):
             name = None
             key = None
             for attrib in record['attributes']:
@@ -101,6 +102,16 @@ def create_index(self):
                 if not name and not key:
                     raise ValueError(f"Unidentified spectrum at index {i}")
             self.index.add(key, i, name, None, None)
+        for i, record in enumerate(self.buffer.get(CLUSTERS_KEY, [])):
+            key = None
+            for attrib in record[ELEMENT_ATTRIBUTES_KEY]:
+                if attrib["accession"] == "MS:1003267":
+                    key = attrib['value']
+                    break
+            else:
+                if not name and not key:
+                    raise ValueError(f"Unidentified spectrum cluster at index {i}")
+            self.index.add_cluster(key, i, None)
 
     def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Spectrum:
         """
@@ -109,7 +120,7 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp
         Parameters
         ----------
         spectrum_number : int, optional
-            The index of the specturm in the library
+            The index of the spectrum in the library
         spectrum_name : str, optional
             The name of the spectrum in the library
 
@@ -128,7 +139,14 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp
         spectrum = self._make_spectrum_from_payload(data)
         return spectrum
 
-    def _fill_attributes(self, attributes: List, store: Attributed, context_type: AttributeSetTypes=None) -> Attributed:
+    def get_cluster(self, cluster_number: int) -> SpectrumCluster:
+        offset = self.index.offset_for_cluster(cluster_number)
+        data = self.buffer[CLUSTERS_KEY][offset]
+        cluster = self._make_cluster_from_payload(data)
+        return cluster
+
+    def _fill_attributes(self, attributes: List[Dict[str, Any]], store: Attributed,
+                         context_type: AttributeSetTypes=None) -> Attributed:
         for attrib in attributes:
             if attrib['accession'] == "MS:1003212":
                 if context_type == AttributeSetTypes.analyte:
@@ -137,6 +155,8 @@ def _fill_attributes(self, attributes: List, store: Attributed, context_type: At
                     self.entry_attribute_sets[attrib['value']].apply(store)
                 elif context_type == AttributeSetTypes.interpretation:
                     self.interpretation_attribute_sets[attrib['value']].apply(store)
+                elif context_type == AttributeSetTypes.cluster:
+                    self.cluster_attribute_sets[attrib['value']].apply(store)
                 else:
                     raise ValueError(f"Could not infer which attribute set type to use for {context_type}")
             else:
diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py
index f093c60..a197210 100644
--- a/implementations/python/mzlib/backends/text.py
+++ b/implementations/python/mzlib/backends/text.py
@@ -1,4 +1,3 @@
-from collections import deque
 import re
 import os
 import io
@@ -6,6 +5,7 @@
 import warnings
 import enum
 
+from collections import deque
 from typing import ClassVar, List, Tuple, Union, Iterable
 
 from mzlib.annotation import parse_annotation
@@ -193,11 +193,11 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]:
             return True, nbytes
         return False, 0
 
-    def read_header(self) -> Tuple[bool, int]:
+    def read_header(self) -> bool:
         if isinstance(self.filename, io.IOBase):
-            return self._parse_header_from_stream(self.filename)
+            return self._parse_header_from_stream(self.filename)[0]
         with open_stream(self.filename, 'rt', encoding='utf8') as stream:
-            return self._parse_header_from_stream(stream)
+            return self._parse_header_from_stream(stream)[0]
 
     def create_index(self) -> int:
         """
diff --git a/implementations/python/mzlib/spectrum_library.py b/implementations/python/mzlib/spectrum_library.py
index 7e82470..564c54b 100644
--- a/implementations/python/mzlib/spectrum_library.py
+++ b/implementations/python/mzlib/spectrum_library.py
@@ -207,6 +207,18 @@ def get_spectrum(self, spectrum_number: int=None, spectrum_name: str=None) -> Sp
         return self.backend.get_spectrum(spectrum_number, spectrum_name)
 
     def get_cluster(self, cluster_number: int) -> SpectrumCluster:
+        """
+        Retrieve a single spectrum cluster from the library.
+
+        Parameters
+        ----------
+        cluster_number : int, optional
+            The index of the cluster in the library
+
+        Returns
+        -------
+        :class:`~.SpectrumCluster`
+        """
         self._requires_backend()
         return self.backend.get_cluster(cluster_number)
 

From a1a965e847a2b4f9a6c1372a08df7be6015b1479 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Mon, 15 May 2023 22:31:52 -0400
Subject: [PATCH 07/24] Update machinery for testing

---
 ...chinese_hamster_hcd_selected_head.mzlb.txt | 14 ++--
 implementations/python/mzlib/backends/base.py | 28 +++++---
 .../python/mzlib/backends/bibliospec.py       | 18 ++---
 .../python/mzlib/backends/diann.py            | 11 +++-
 implementations/python/mzlib/backends/json.py |  8 +--
 implementations/python/mzlib/backends/msp.py  | 18 ++---
 .../python/mzlib/backends/spectronaut.py      |  8 ++-
 implementations/python/mzlib/backends/text.py |  3 +
 .../python/mzlib/backends/utils.py            | 19 ++++--
 implementations/python/mzlib/index/sql.py     | 15 +++--
 implementations/python/mzlib/spectrum.py      |  3 +-
 .../python/mzlib/spectrum_library.py          | 20 ++++--
 implementations/python/mzlib/tools/cli.py     |  9 +++
 implementations/python/mzlib/tools/utils.py   | 65 +++++++++++++++++++
 ...hinese_hamster_hcd_selected_head.mzlb.json | 29 +++++----
 ...chinese_hamster_hcd_selected_head.mzlb.txt | 14 ++--
 ...lex_interpretations_with_members.mzlb.json |  1 +
 17 files changed, 207 insertions(+), 76 deletions(-)
 create mode 100644 implementations/python/mzlib/tools/utils.py

diff --git a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt
index 79cd964..5cc3e74 100644
--- a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt
+++ b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt
@@ -7,7 +7,7 @@ MS:1003188|library name=examples/chinese_hamster_hcd_selected_head.msp
 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=855.4538
+MS:1003208|experimental precursor monoisotopic m/z=855.4538
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=46
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -138,7 +138,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4
 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=855.4538
+MS:1003208|experimental precursor monoisotopic m/z=855.4538
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=53
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -386,7 +386,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5
 MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=1207.1672
+MS:1003208|experimental precursor monoisotopic m/z=1207.1672
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=76
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -552,7 +552,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=731.9043
+MS:1003208|experimental precursor monoisotopic m/z=731.9043
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=44
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -707,7 +707,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1
 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=3
-MS:1000744|selected ion m/z=488.2719
+MS:1003208|experimental precursor monoisotopic m/z=488.2719
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=28
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -912,7 +912,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=830.8834
+MS:1003208|experimental precursor monoisotopic m/z=830.8834
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=50
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -1024,7 +1024,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6
 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=830.8834
+MS:1003208|experimental precursor monoisotopic m/z=830.8834
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=52
 [1]UO:0000000|unit=UO:0000266|electronvolt
diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py
index b718414..8b6ad0f 100644
--- a/implementations/python/mzlib/backends/base.py
+++ b/implementations/python/mzlib/backends/base.py
@@ -19,7 +19,7 @@
 from mzlib.attributes import Attributed, AttributedEntity, AttributeSet, AttributeManagedProperty
 from mzlib.ontology import _VocabularyResolverMixin
 
-from .utils import open_stream, LineBuffer
+from .utils import open_stream, _LineBuffer
 
 logger = logging.getLogger(__name__.rsplit(".", 1)[0])
 logger.addHandler(logging.NullHandler())
@@ -129,7 +129,7 @@ def guess_from_filename(cls, filename: Union[str, Path, io.FileIO]) -> bool:
         return filename.endswith(cls.file_format)
 
     @classmethod
-    def guess_from_header(cls, filename) -> bool:
+    def guess_from_header(cls, filename: Union[str, Path, io.FileIO]) -> bool:
         """
         Guess if the file is of this type by inspecting the file's header section
 
@@ -146,7 +146,7 @@ def guess_from_header(cls, filename) -> bool:
         return False
 
     @classmethod
-    def guess_implementation(cls, filename, index_type=None,
+    def guess_implementation(cls, filename: Union[str, Path, io.FileIO], index_type=None,
                              **kwargs) -> 'SpectralLibraryBackendBase':
         """
         Guess the backend implementation to use with this file format.
@@ -179,7 +179,7 @@ def guess_implementation(cls, filename, index_type=None,
                 pass
         raise FormatInferenceFailure(f"Could not guess backend implementation for {filename}")
 
-    def __init__(self, filename):
+    def __init__(self, filename: Union[str, Path, io.FileIO]):
         self.filename = filename
         self.index = MemoryIndex()
 
@@ -332,7 +332,7 @@ def __getitem__(self, i) -> Union[Spectrum, List[Spectrum]]:
         return result
 
     @classmethod
-    def has_index_preference(cls, filename: str) -> Type[IndexBase]:
+    def has_index_preference(cls, filename: Union[str, Path, io.FileIO]) -> Type[IndexBase]:
         """
         Does this backend prefer a particular index for this file?
 
@@ -358,7 +358,14 @@ def has_index_preference(cls, filename: str) -> Type[IndexBase]:
         except Exception:
             return MemoryIndex
 
-    def read(self):
+    def read(self) -> Iterator[Union[Spectrum, SpectrumCluster]]:
+        """
+        Create an sequential iterator over the spectrum library.
+
+        Yields
+        ------
+        entry :  Union[:class:`~.Spectrum`, :class:`~.SpectrumCluster`]
+        """
         raise NotImplementedError()
 
     def _add_attribute_set(self, attribute_set: AttributeSet,
@@ -383,8 +390,8 @@ def summarize_parsing_errors(self) -> Dict:
 
 class _PlainTextSpectralLibraryBackendBase(SpectralLibraryBackendBase):
 
-    def __init__(self, filename, index_type=None, read_metadata=True,
-                 create_index: bool=True):
+    def __init__(self, filename: Union[str, Path, io.FileIO], index_type=None,
+                 read_metadata: bool=True, create_index: bool=True):
         if index_type is None and create_index:
             index_type = self.has_index_preference(filename)
 
@@ -428,7 +435,7 @@ def read(self) -> Iterator[Spectrum]:
                 raise ValueError("Could not locate valid header")
             else:
                 stream.seek(offset)
-            buffering_stream = LineBuffer(stream)
+            buffering_stream = _LineBuffer(stream)
             while True:
                 # Will clip the first line of the next spectrum. Needs work
                 buffer = self._buffer_from_stream(buffering_stream)
@@ -490,7 +497,8 @@ def guess_from_header(cls, filename) -> bool:
                 return False
         return False
 
-    def __init__(self, filename: str, index_type=None, delimiter='\t', read_metadata=True, create_index: bool = True, ** kwargs):
+    def __init__(self, filename: Union[str, Path, io.FileIO], index_type=None, delimiter='\t',
+                 read_metadata: bool=True, create_index: bool = True, ** kwargs):
         if index_type is None:
             index_type = self.has_index_preference(filename)
         self._delimiter = delimiter
diff --git a/implementations/python/mzlib/backends/bibliospec.py b/implementations/python/mzlib/backends/bibliospec.py
index dfba98e..4a0a322 100644
--- a/implementations/python/mzlib/backends/bibliospec.py
+++ b/implementations/python/mzlib/backends/bibliospec.py
@@ -5,7 +5,7 @@
 import sqlite3
 import zlib
 
-from typing import List, Mapping, Tuple, Iterable, Type
+from typing import Iterator, List, Mapping, Tuple, Iterable, Type
 
 import numpy as np
 
@@ -100,7 +100,6 @@ def __init__(self, filename, **kwargs):
         self.read_header()
 
     def read_header(self) -> bool:
-        '''Stub implementation, awaiting better understanding of Bibliospec to divine other metadata'''
         attribs = AttributeManager()
         attribs.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION)
         attribs.add_attribute("MS:1003207|library creation software", "Bibliospec")
@@ -116,24 +115,25 @@ def read_header(self) -> bool:
         return True
 
     def _populate_analyte(self, analyte: Analyte, row: Mapping):
-        '''Fill an analyte with details describing a peptide sequence and inferring
+        """
+        Fill an analyte with details describing a peptide sequence and inferring
         from context its traits based upon the assumptions Bibliospec makes.
 
         Bibliospec only stores modifications as delta masses.
-        '''
+        """
         peptide = self._correct_modifications_in_sequence(row)
         analyte.add_attribute("MS:1003169|proforma peptidoform sequence", str(peptide))
         analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass)
         analyte.add_attribute("MS:1000888|stripped peptide sequence", row['peptideSeq'])
         analyte.add_attribute(CHARGE_STATE, row['precursorCharge'])
 
-
     def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):
-        '''Read a spectrum from the spectrum library.
+        """
+        Read a spectrum from the spectrum library.
 
         Bibliospec does not support alternative labeling of spectra with a
         plain text name so looking up by `spectrum_name` is not supported.
-        '''
+        """
         if spectrum_number is None:
             raise ValueError("Only spectrum number queries are supported. spectrum_number must have an integer value")
 
@@ -201,5 +201,7 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):
         spectrum.peak_list = peak_list
         return spectrum
 
-
+    def read(self) -> Iterator[Spectrum]:
+        for rec in self.index:
+            yield self.get_spectrum(rec.number)
 
diff --git a/implementations/python/mzlib/backends/diann.py b/implementations/python/mzlib/backends/diann.py
index a119310..ea7a7aa 100644
--- a/implementations/python/mzlib/backends/diann.py
+++ b/implementations/python/mzlib/backends/diann.py
@@ -1,11 +1,13 @@
 import json
+import os
 
 from typing import List, Tuple, Dict, Iterator, Any, Union
 
 from pyteomics import proforma
 
 from mzlib import annotation
-from mzlib.backends.base import DEFAULT_VERSION, FORMAT_VERSION_TERM, _CSVSpectralLibraryBackendBase
+from mzlib.backends.base import DEFAULT_VERSION, FORMAT_VERSION_TERM, LIBRARY_NAME_TERM, _CSVSpectralLibraryBackendBase
+from mzlib.backends.utils import open_stream
 from mzlib.spectrum import Spectrum, SPECTRUM_NAME
 
 
@@ -66,11 +68,16 @@ def _spectrum_type(self):
     def read_header(self) -> bool:
         result = super().read_header()
         self.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION)
+        if hasattr(self.filename, 'name'):
+            name = self.filename.name.replace(".gz", '').rsplit('.', 1)[0].split(os.sep)[-1]
+        else:
+            name = self.filename.replace(".gz", '').rsplit(".", 1)[0].split(os.sep)[-1]
+        self.add_attribute(LIBRARY_NAME_TERM, name)
         self.add_attribute("MS:1003207|library creation software", "MS:1003253|DIA-NN")
         return result
 
     def create_index(self):
-        with open(self.filename, 'rb') as stream:
+        with open_stream(self.filename, 'rb') as stream:
             header = stream.readline()
             header_cols = header.split(b'\t')
             column_key = header_cols.index(b'transition_group_id')
diff --git a/implementations/python/mzlib/backends/json.py b/implementations/python/mzlib/backends/json.py
index 15fd462..d26a165 100644
--- a/implementations/python/mzlib/backends/json.py
+++ b/implementations/python/mzlib/backends/json.py
@@ -322,10 +322,10 @@ def _format_attributes(self, attributes_manager: Iterable) -> List:
         attributes = []
         for attribute in attributes_manager:
             reformed_attribute = {}
-            if attribute.group_id is None:
-                key, value = attribute
-            else:
-                key, value, cv_param_group = attribute
+            key = attribute.key
+            value = attribute.value
+            if attribute.group_id is not None:
+                cv_param_group = attribute.group_id
                 reformed_attribute['cv_param_group'] = cv_param_group
 
             term = None
diff --git a/implementations/python/mzlib/backends/msp.py b/implementations/python/mzlib/backends/msp.py
index 2900972..0f3704f 100644
--- a/implementations/python/mzlib/backends/msp.py
+++ b/implementations/python/mzlib/backends/msp.py
@@ -285,13 +285,13 @@ def add(self, handler: AttributeHandler):
     "precursor_charge": "MS:1000041|charge state",
     "precursorcharge": "MS:1000041|charge state",
 
-    "Parent": "MS:1000744|selected ion m/z",
-    "ObservedPrecursorMZ": "MS:1000744|selected ion m/z",
-    "PrecursorMZ": "MS:1000744|selected ion m/z",
-    "PRECURSORMZ": "MS:1000744|selected ion m/z",
-    "precursor": "MS:1000744|selected ion m/z",
-    "precursor_mass": "MS:1000744|selected ion m/z",
-    "precursormass": "MS:1000744|selected ion m/z",
+    "Parent": "MS:1003208|experimental precursor monoisotopic m/z",
+    "ObservedPrecursorMZ": "MS:1003208|experimental precursor monoisotopic m/z",
+    "PrecursorMZ": "MS:1003208|experimental precursor monoisotopic m/z",
+    "PRECURSORMZ": "MS:1003208|experimental precursor monoisotopic m/z",
+    "precursor": "MS:1003208|experimental precursor monoisotopic m/z",
+    "precursor_mass": "MS:1003208|experimental precursor monoisotopic m/z",
+    "precursormass": "MS:1003208|experimental precursor monoisotopic m/z",
 
     "Single": ["MS:1003065|spectrum aggregation type", "MS:1003066|singleton spectrum"],
     "Consensus": ["MS:1003065|spectrum aggregation type", "MS:1003067|consensus spectrum"],
@@ -865,9 +865,9 @@ def _parse_header_from_stream(self, stream: io.IOBase) -> Tuple[bool, int]:
         attributes = AttributeManager()
         attributes.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION)
         if isinstance(self.filename, (str, os.PathLike)):
-            attributes.add_attribute(LIBRARY_NAME_TERM, self.filename)
+            attributes.add_attribute(LIBRARY_NAME_TERM, self.filename.rsplit('.msp', 1)[0].split(os.sep)[-1])
         elif hasattr(stream, 'name'):
-            attributes.add_attribute(LIBRARY_NAME_TERM, stream.name)
+            attributes.add_attribute(LIBRARY_NAME_TERM, stream.name.rsplit('.msp', 1)[0].split(os.sep)[-1])
         self.attributes.clear()
         self.attributes._from_iterable(attributes)
         if leader_terms_pattern.match(first_line):
diff --git a/implementations/python/mzlib/backends/spectronaut.py b/implementations/python/mzlib/backends/spectronaut.py
index 77551ab..c797bf5 100644
--- a/implementations/python/mzlib/backends/spectronaut.py
+++ b/implementations/python/mzlib/backends/spectronaut.py
@@ -1,4 +1,5 @@
 import json
+import os
 
 from typing import List, Tuple, Dict, Iterator, Any, Deque, Union
 
@@ -6,7 +7,7 @@
 
 from mzlib import annotation
 from mzlib.analyte import Analyte
-from mzlib.backends.base import _CSVSpectralLibraryBackendBase, FORMAT_VERSION_TERM, DEFAULT_VERSION
+from mzlib.backends.base import LIBRARY_NAME_TERM, _CSVSpectralLibraryBackendBase, FORMAT_VERSION_TERM, DEFAULT_VERSION
 from mzlib.backends.utils import open_stream
 from mzlib.spectrum import Spectrum, SPECTRUM_NAME
 
@@ -95,6 +96,11 @@ def _spectrum_type(self):
     def read_header(self) -> bool:
         result = super().read_header()
         self.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION)
+        if hasattr(self.filename, 'name'):
+            name = self.filename.name.replace(".gz", '').rsplit('.', 1)[0].split(os.sep)[-1]
+        else:
+            name = self.filename.replace(".gz", '').rsplit(".", 1)[0].split(os.sep)[-1]
+        self.add_attribute(LIBRARY_NAME_TERM, name)
         self.add_attribute("MS:1003207|library creation software", "MS:1001327|Spectronaut")
         return result
 
diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py
index a197210..04bad89 100644
--- a/implementations/python/mzlib/backends/text.py
+++ b/implementations/python/mzlib/backends/text.py
@@ -427,6 +427,9 @@ def real_line_number_or_nothing():
             line = line.strip()
             if not line:
                 break
+            # Skip comments for now, no round-trip
+            if line.startswith("#"):
+                continue
             if state == STATES.header:
                 if START_OF_SPECTRUM_MARKER.match(line):
                     match = START_OF_SPECTRUM_MARKER.match(line)
diff --git a/implementations/python/mzlib/backends/utils.py b/implementations/python/mzlib/backends/utils.py
index dd2f127..3711684 100644
--- a/implementations/python/mzlib/backends/utils.py
+++ b/implementations/python/mzlib/backends/utils.py
@@ -20,10 +20,16 @@
     pass
 
 
-class LineBuffer(object):
+class _LineBuffer(object):
+    """
+    An implementation detail that treats a stream/iterator over line strings as LIFO
+    queue that can have lines pushed back onto it.
+    """
+
     lines: deque
     stream: io.IOBase
     last_line: str
+    _stream_is_file_like: bool
 
     def __init__(self, stream: io.IOBase, lines: Iterable=None, last_line: str=None):
         if lines is None:
@@ -31,12 +37,13 @@ def __init__(self, stream: io.IOBase, lines: Iterable=None, last_line: str=None)
         self.lines = deque(lines)
         self.stream = stream
         self.last_line = last_line
+        self._stream_is_file_like = hasattr(self.stream, 'readline')
 
     def readline(self) -> Union[bytes, str]:
         if self.lines:
             line = self.lines.popleft()
         else:
-            line = self.stream.readline()
+            line = self.stream.readline() if self._stream_is_file_like else next(self.stream)
         self.last_line = line
         return line
 
@@ -77,13 +84,15 @@ def try_cast(value: Any) -> Union[str, int, float, Any]:
     return value
 
 
-def test_gzipped(f):
-    """Checks the first two bytes of the
+def test_gzipped(f) -> bool:
+    """
+    Checks the first two bytes of the
     passed file for gzip magic numbers
 
     Parameters
     ----------
     f : file-like or path-like
+        The file to test
 
     Returns
     -------
@@ -149,6 +158,8 @@ def open_stream(f: Union[io.IOBase, os.PathLike], mode='rt', buffer_size: Option
 
 
 class CaseInsensitiveDict(Dict[str, Any]):
+    """A case sensitive version of a dictionary with string keys."""
+
     def __init__(self, base=None, **kwargs):
         if base is not None:
             self.update(base)
diff --git a/implementations/python/mzlib/index/sql.py b/implementations/python/mzlib/index/sql.py
index c7829e6..24cc10b 100644
--- a/implementations/python/mzlib/index/sql.py
+++ b/implementations/python/mzlib/index/sql.py
@@ -1,3 +1,4 @@
+import io
 import os
 import numbers
 import pathlib
@@ -12,8 +13,9 @@
 except ImportError:
     from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import relationship
+from sqlalchemy.engine import Engine
 from sqlalchemy import create_engine, func
-from sqlalchemy.orm import sessionmaker
+from sqlalchemy.orm import sessionmaker, scoped_session
 
 from .base import IndexBase
 
@@ -61,6 +63,12 @@ def __repr__(self):
 class SQLIndex(IndexBase):
     extension = '.splindex'
 
+    filename: str
+    index_filename: str
+    _cache: SpectrumLibraryIndexRecord
+    session: scoped_session
+    engine: Engine
+
     @classmethod
     def from_filename(cls, filename, library=None):
         if not isinstance(filename, (str, pathlib.Path)):
@@ -77,7 +85,7 @@ def from_filename(cls, filename, library=None):
         return inst, exists
 
     @classmethod
-    def exists(cls, filename):
+    def exists(cls, filename: Union[str, pathlib.Path, io.FileIO]):
         if not isinstance(filename, (str, pathlib.Path)):
             if not hasattr(filename, "name"):
                 raise TypeError(f"Could not coerce filename from {filename}")
@@ -104,8 +112,7 @@ def connect(self, create=None):
         engine = create_engine("sqlite:///"+filename)
         Base.metadata.create_all(engine)
 
-        DBSession = sessionmaker(bind=engine)
-        session = DBSession()
+        session = scoped_session(sessionmaker(bind=engine))
         self.session = session
         self.engine = engine
         self._cache = None
diff --git a/implementations/python/mzlib/spectrum.py b/implementations/python/mzlib/spectrum.py
index a554a07..9dd8097 100644
--- a/implementations/python/mzlib/spectrum.py
+++ b/implementations/python/mzlib/spectrum.py
@@ -69,7 +69,8 @@ def __init__(self, attributes=None, peak_list=None, analytes=None,
     key = AttributeManagedProperty[int](LIBRARY_SPECTRUM_KEY)
     index = AttributeManagedProperty[int](LIBRARY_SPECTRUM_INDEX)
 
-    precursor_mz = AttributeListManagedProperty[float]([PRECURSOR_MZ, "MS:1000744|selected ion m/z"])
+    precursor_mz = AttributeListManagedProperty[float](
+        [PRECURSOR_MZ, "MS:1003208|experimental precursor monoisotopic m/z"])
     precursor_charge = AttributeManagedProperty[int](CHARGE_STATE)
 
     spectrum_aggregation = AttributeFacet[SpectrumAggregation](SpectrumAggregation)
diff --git a/implementations/python/mzlib/spectrum_library.py b/implementations/python/mzlib/spectrum_library.py
index 564c54b..2a9878f 100644
--- a/implementations/python/mzlib/spectrum_library.py
+++ b/implementations/python/mzlib/spectrum_library.py
@@ -2,12 +2,14 @@
 import os
 import pathlib
 
-from typing import Type, List, Union
+from typing import Optional, Type, List, Union
+from mzlib.attributes import AttributeManagedProperty
+from mzlib.backends.base import LIBRARY_DESCRIPTION_TERM, LIBRARY_NAME_TERM, LIBRARY_URI_TERM, LIBRARY_VERSION_TERM
 from mzlib.cluster import SpectrumCluster
 
 from mzlib.spectrum_library_index import SpectrumLibraryIndex
 from mzlib.spectrum import Spectrum
-from mzlib.index import MemoryIndex, SQLIndex, IndexBase
+from mzlib.index import IndexBase
 from mzlib.backends import guess_implementation, SpectralLibraryBackendBase, SpectralLibraryWriterBase
 
 
@@ -47,6 +49,10 @@ class SpectrumLibrary:
     format: str
     index_type: Type[IndexBase]
 
+    name = AttributeManagedProperty[str](LIBRARY_NAME_TERM)
+    description = AttributeManagedProperty[str](LIBRARY_DESCRIPTION_TERM)
+    uri = AttributeManagedProperty[str](LIBRARY_URI_TERM)
+    library_version = AttributeManagedProperty[str](LIBRARY_VERSION_TERM)
 
     def __init__(self, identifier=None, filename=None, format=None, index_type=None):
         """
@@ -86,6 +92,7 @@ def _init_from_filename(self, index_type: Type[IndexBase]=None):
             self.backend = backend_type(
                 self.filename, index_type=index_type)
             self._format = self.backend.format_name
+        self._identifier = self.backend.identifier
 
     def _backend_initialized(self):
         return self.backend is not None
@@ -97,15 +104,17 @@ def _requires_backend(self):
 
     #### Define getter/setter for attribute identifier
     @property
-    def identifier(self):
+    def identifier(self) -> Optional[str]:
         if self._identifier is None:
             if self._backend_initialized():
                 return self.backend.identifier
         return self._identifier
 
     @identifier.setter
-    def identifier(self, identifier):
+    def identifier(self, identifier: Optional[str]):
         self._identifier = identifier
+        if self.backend is not None:
+            self.backend.identifier = identifier
 
     #### Define getter/setter for attribute filename
     @property
@@ -136,7 +145,8 @@ def attributes(self):
         return None
 
     def read_header(self) -> bool:
-        """Read just the header of the whole library
+        """
+        Read just the header of the whole library
 
         Returns
         -------
diff --git a/implementations/python/mzlib/tools/cli.py b/implementations/python/mzlib/tools/cli.py
index 31840d4..04ded01 100644
--- a/implementations/python/mzlib/tools/cli.py
+++ b/implementations/python/mzlib/tools/cli.py
@@ -14,6 +14,8 @@
 from mzlib.validate.level import RequirementLevel
 from mzlib.ontology import ControlledVocabularyResolver
 
+from mzlib.tools.utils import ColoringFormatter
+
 CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
 
 logger = logging.getLogger(__name__)
@@ -46,6 +48,13 @@ def main():
         format=format_string,
         datefmt="%H:%M:%S")
 
+    fmtr = ColoringFormatter(format_string, datefmt='%H:%M:%S')
+
+    for handler in logging.getLogger().handlers:
+        handler.setFormatter(
+            fmtr
+        )
+
 
 @main.command("describe", short_help=("Produce a minimal textual description"
                                       " of a spectral library"))
diff --git a/implementations/python/mzlib/tools/utils.py b/implementations/python/mzlib/tools/utils.py
new file mode 100644
index 0000000..ff8d2d8
--- /dev/null
+++ b/implementations/python/mzlib/tools/utils.py
@@ -0,0 +1,65 @@
+import logging
+import re
+from typing import Dict
+
+
+class LevelAwareColoredLogFormatter(logging.Formatter):
+    try:
+        from colorama import Fore, Style
+        # GREY = Fore.WHITE
+        GREY = ''
+        BLUE = Fore.BLUE
+        GREEN = Fore.GREEN
+        YELLOW = Fore.YELLOW
+        RED = Fore.RED
+        BRIGHT = Style.BRIGHT
+        DIM = Style.DIM
+        BOLD_RED = Fore.RED + Style.BRIGHT
+        RESET = Style.RESET_ALL
+    except ImportError:
+        GREY = ''
+        BLUE = ''
+        GREEN = ''
+        YELLOW = ''
+        RED = ''
+        BRIGHT = ''
+        DIM = ''
+        BOLD_RED = ''
+        RESET = ''
+
+    def _colorize_field(self, fmt: str, field: str, color: str) -> str:
+        return re.sub("(" + field + ")", color + r"\1" + self.RESET, fmt)
+
+    def _patch_fmt(self, fmt: str, level_color: str) -> str:
+        fmt = self._colorize_field(fmt, r"%\(asctime\)s", self.GREEN)
+        fmt = self._colorize_field(fmt, r"%\(name\).*?s", self.BLUE)
+        fmt = self._colorize_field(fmt, r"%\(message\).*?s", self.GREY)
+        if level_color:
+            fmt = self._colorize_field(fmt, r"%\(levelname\).*?s", level_color)
+        return fmt
+
+    def __init__(self, fmt, level_color=None, **kwargs):
+        fmt = self._patch_fmt(fmt, level_color=level_color)
+        super().__init__(fmt, **kwargs)
+
+
+class ColoringFormatter(logging.Formatter):
+    level_to_color = {
+        logging.INFO: LevelAwareColoredLogFormatter.GREEN,
+        logging.DEBUG: LevelAwareColoredLogFormatter.GREY + LevelAwareColoredLogFormatter.DIM,
+        logging.WARN: LevelAwareColoredLogFormatter.YELLOW + LevelAwareColoredLogFormatter.BRIGHT,
+        logging.ERROR: LevelAwareColoredLogFormatter.BOLD_RED,
+        logging.CRITICAL: LevelAwareColoredLogFormatter.BOLD_RED,
+        logging.FATAL: LevelAwareColoredLogFormatter.RED + LevelAwareColoredLogFormatter.DIM,
+    }
+
+    _formatters: Dict[int, LevelAwareColoredLogFormatter]
+
+    def __init__(self, fmt: str, **kwargs):
+        self._formatters = {}
+        for level, style in self.level_to_color.items():
+            self._formatters[level] = LevelAwareColoredLogFormatter(fmt, level_color=style, **kwargs)
+
+    def format(self, record: logging.LogRecord) -> str:
+        fmtr = self._formatters[record.levelno]
+        return fmtr.format(record)
diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json
index 33d4ff7..06888c4 100644
--- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json
+++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json
@@ -14,6 +14,7 @@
       "value": "tests/test_data/chinese_hamster_hcd_selected_head.msp"
     }
   ],
+  "clusters": [],
   "format_version": "1.0",
   "interpretation_attribute_sets": {
     "all": []
@@ -119,8 +120,8 @@
           "value": 2
         },
         {
-          "accession": "MS:1000744",
-          "name": "selected ion m/z",
+          "accession": "MS:1003208",
+          "name": "experimental precursor monoisotopic m/z",
           "value": 855.4538
         },
         {
@@ -626,8 +627,8 @@
           "value": 2
         },
         {
-          "accession": "MS:1000744",
-          "name": "selected ion m/z",
+          "accession": "MS:1003208",
+          "name": "experimental precursor monoisotopic m/z",
           "value": 855.4538
         },
         {
@@ -1484,8 +1485,8 @@
           "value": 2
         },
         {
-          "accession": "MS:1000744",
-          "name": "selected ion m/z",
+          "accession": "MS:1003208",
+          "name": "experimental precursor monoisotopic m/z",
           "value": 1207.1672
         },
         {
@@ -2096,8 +2097,8 @@
           "value": 2
         },
         {
-          "accession": "MS:1000744",
-          "name": "selected ion m/z",
+          "accession": "MS:1003208",
+          "name": "experimental precursor monoisotopic m/z",
           "value": 731.9043
         },
         {
@@ -2675,8 +2676,8 @@
           "value": 3
         },
         {
-          "accession": "MS:1000744",
-          "name": "selected ion m/z",
+          "accession": "MS:1003208",
+          "name": "experimental precursor monoisotopic m/z",
           "value": 488.2719
         },
         {
@@ -3404,8 +3405,8 @@
           "value": 2
         },
         {
-          "accession": "MS:1000744",
-          "name": "selected ion m/z",
+          "accession": "MS:1003208",
+          "name": "experimental precursor monoisotopic m/z",
           "value": 830.8834
         },
         {
@@ -3854,8 +3855,8 @@
           "value": 2
         },
         {
-          "accession": "MS:1000744",
-          "name": "selected ion m/z",
+          "accession": "MS:1003208",
+          "name": "experimental precursor monoisotopic m/z",
           "value": 830.8834
         },
         {
diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt
index 1599ab0..aaca838 100644
--- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt
+++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt
@@ -7,7 +7,7 @@ MS:1003188|library name=tests/test_data/chinese_hamster_hcd_selected_head.msp
 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=855.4538
+MS:1003208|experimental precursor monoisotopic m/z=855.4538
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=46
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -138,7 +138,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4
 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=855.4538
+MS:1003208|experimental precursor monoisotopic m/z=855.4538
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=53
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -386,7 +386,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5
 MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=1207.1672
+MS:1003208|experimental precursor monoisotopic m/z=1207.1672
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=76
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -552,7 +552,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=731.9043
+MS:1003208|experimental precursor monoisotopic m/z=731.9043
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=44
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -707,7 +707,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1
 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=3
-MS:1000744|selected ion m/z=488.2719
+MS:1003208|experimental precursor monoisotopic m/z=488.2719
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=28
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -912,7 +912,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=830.8834
+MS:1003208|experimental precursor monoisotopic m/z=830.8834
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=50
 [1]UO:0000000|unit=UO:0000266|electronvolt
@@ -1024,7 +1024,7 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6
 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
 MS:1000041|charge state=2
-MS:1000744|selected ion m/z=830.8834
+MS:1003208|experimental precursor monoisotopic m/z=830.8834
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=52
 [1]UO:0000000|unit=UO:0000266|electronvolt
diff --git a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json
index 8db18cf..8fe0aee 100644
--- a/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json
+++ b/implementations/python/tests/test_data/complex_interpretations_with_members.mzlb.json
@@ -3,6 +3,7 @@
     "all": []
   },
   "attributes": [],
+  "clusters": [],
   "format_version": "1.0",
   "interpretation_attribute_sets": {
     "all": []

From ecadc0a20cb0e568d47ef7bd2475bf614480cfba Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Mon, 15 May 2023 23:08:20 -0400
Subject: [PATCH 08/24] Add tests for Spectronaut and DIA-NN libraries

---
 .../python/mzlib/backends/diann.py            |   2 +-
 implementations/python/mzlib/ontology_term.py |   6 +-
 .../mzlib/spectrum_library_collection.py      |   1 +
 .../python/mzlib/spectrum_library_index.py    |   2 +
 .../mzlib/universal_spectrum_identifier.py    |   3 +-
 .../human_serum.head.spectronaut.tsv          | 145 +++++++++++++++++
 ...04_canonical_sall_pv_plasma.head.diann.tsv | 147 ++++++++++++++++++
 .../python/tests/test_library_backend.py      |  40 ++++-
 8 files changed, 338 insertions(+), 8 deletions(-)
 create mode 100644 implementations/python/tests/test_data/human_serum.head.spectronaut.tsv
 create mode 100644 implementations/python/tests/test_data/phl004_canonical_sall_pv_plasma.head.diann.tsv

diff --git a/implementations/python/mzlib/backends/diann.py b/implementations/python/mzlib/backends/diann.py
index ea7a7aa..f5b6755 100644
--- a/implementations/python/mzlib/backends/diann.py
+++ b/implementations/python/mzlib/backends/diann.py
@@ -106,7 +106,7 @@ def create_index(self):
                     self.index.add(
                         number=n,
                         offset=offset,
-                        name=key.decode("utf8"),
+                        name=key.decode("utf8") ,
                         analyte=None
                     )
                     n += 1
diff --git a/implementations/python/mzlib/ontology_term.py b/implementations/python/mzlib/ontology_term.py
index 37a5384..5a96750 100644
--- a/implementations/python/mzlib/ontology_term.py
+++ b/implementations/python/mzlib/ontology_term.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-
+# pragma: no cover
 #from __future__ import print_function
 #import sys
 #def eprint(*args, **kwargs):
@@ -58,7 +58,7 @@ def __init__(self, line_list=None, verbose=0):
         #### If we have been given an input line_list on construction, parse it right away
         if line_list is not None:
             self.parse(line_list=line_list)
-        
+
 
     #########################################################################
     #### parse the line_list
@@ -400,7 +400,7 @@ def parse(self, line_list=None, verbose=0):
         else:
             self.is_valid = False
             logging.critical("Number of errors while parsing term '%s': %i", self.name, self.n_errors)
- 
+
         if self.n_errors > 0 or len(self.unparsable_line_list) > 0:
             print("=====================")
             self.show()
diff --git a/implementations/python/mzlib/spectrum_library_collection.py b/implementations/python/mzlib/spectrum_library_collection.py
index 08f4da6..d6541aa 100644
--- a/implementations/python/mzlib/spectrum_library_collection.py
+++ b/implementations/python/mzlib/spectrum_library_collection.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+# pragma: no cover
 from __future__ import print_function
 import sys
 def eprint(*args, **kwargs):
diff --git a/implementations/python/mzlib/spectrum_library_index.py b/implementations/python/mzlib/spectrum_library_index.py
index 07cbe5d..665b95a 100644
--- a/implementations/python/mzlib/spectrum_library_index.py
+++ b/implementations/python/mzlib/spectrum_library_index.py
@@ -1,4 +1,6 @@
 #!/usr/bin/env python3
+
+# pragma: no cover
 from __future__ import print_function
 import sys
 def eprint(*args, **kwargs):
diff --git a/implementations/python/mzlib/universal_spectrum_identifier.py b/implementations/python/mzlib/universal_spectrum_identifier.py
index e146f56..bfddb44 100644
--- a/implementations/python/mzlib/universal_spectrum_identifier.py
+++ b/implementations/python/mzlib/universal_spectrum_identifier.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+# pragma: no cover
 from __future__ import print_function
 import sys
 def eprint(*args, **kwargs):
@@ -32,7 +33,7 @@ def __init__(self, usi=None):
 
         if usi:
             self.parse(verbose=None)
-        
+
 
     # Attributes:
     #   usi
diff --git a/implementations/python/tests/test_data/human_serum.head.spectronaut.tsv b/implementations/python/tests/test_data/human_serum.head.spectronaut.tsv
new file mode 100644
index 0000000..55db2fb
--- /dev/null
+++ b/implementations/python/tests/test_data/human_serum.head.spectronaut.tsv
@@ -0,0 +1,145 @@
+ReferenceRun	PrecursorCharge	Workflow	IntModifiedPeptide	CV	AllowForNormalization	ModifiedPeptide	StrippedPeptide	iRT	IonMobility	iRTSourceSpecific	BGSInferenceId	IsProteotypic	IntLabeledPeptide	LabeledPeptide	PrecursorMz	ReferenceRunQvalue	ReferenceRunMS1Response	FragmentLossType	FragmentNumber	FragmentType	FragmentCharge	FragmentMz	RelativeIntensity	ExcludeFromAssay	Database	ProteinGroups	UniProtIds	Protein Name	ProteinDescription	Organisms	OrganismId	Genes	Protein Existence	Sequence Version	FASTAName
+IK_221028_C19_lib2_01	2		_AQIPILR_	-60	TRUE	_AQIPILR_	AQIPILR	28.658491	0.7629655	28.450514|28.82925	P04114	TRUE	_AQIPILR_	_AQIPILR_	405.7634379	0.006434474	11824944	noloss	3	b	1	313.1870317	3.564491	FALSE	sp	P04114	P04114	APOB_HUMAN	Apolipoprotein B-100	Homo sapiens	9606	APOB	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AQIPILR_	-60	TRUE	_AQIPILR_	AQIPILR	28.658491	0.7629655	28.450514|28.82925	P04114	TRUE	_AQIPILR_	_AQIPILR_	405.7634379	0.006434474	11824944	noloss	3	y	1	401.2870801	3.3441753	FALSE	sp	P04114	P04114	APOB_HUMAN	Apolipoprotein B-100	Homo sapiens	9606	APOB	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AQIPILR_	-60	TRUE	_AQIPILR_	AQIPILR	28.658491	0.7629655	28.450514|28.82925	P04114	TRUE	_AQIPILR_	_AQIPILR_	405.7634379	0.006434474	11824944	noloss	4	y	1	498.339844	100	FALSE	sp	P04114	P04114	APOB_HUMAN	Apolipoprotein B-100	Homo sapiens	9606	APOB	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AQIPILR_	-60	TRUE	_AQIPILR_	AQIPILR	28.658491	0.7629655	28.450514|28.82925	P04114	TRUE	_AQIPILR_	_AQIPILR_	405.7634379	0.006434474	11824944	noloss	4	y	2	249.6735602	11.528888	FALSE	sp	P04114	P04114	APOB_HUMAN	Apolipoprotein B-100	Homo sapiens	9606	APOB	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AQIPILR_	-60	TRUE	_AQIPILR_	AQIPILR	28.658491	0.7629655	28.450514|28.82925	P04114	TRUE	_AQIPILR_	_AQIPILR_	405.7634379	0.006434474	11824944	noloss	5	y	1	611.423908	36.853043	FALSE	sp	P04114	P04114	APOB_HUMAN	Apolipoprotein B-100	Homo sapiens	9606	APOB	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AQIPILR_	-60	TRUE	_AQIPILR_	AQIPILR	28.658491	0.7629655	28.450514|28.82925	P04114	TRUE	_AQIPILR_	_AQIPILR_	405.7634379	0.006434474	11824944	NH3	4	y	1	481.3132956	5.708535	FALSE	sp	P04114	P04114	APOB_HUMAN	Apolipoprotein B-100	Homo sapiens	9606	APOB	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	noloss	3	b	1	228.1342679	48.582985	FALSE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	noloss	3	y	1	377.1779236	4.8015785	FALSE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	noloss	4	b	1	341.2183319	4.1744895	TRUE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	noloss	4	y	1	508.2184085	5.056169	FALSE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	noloss	5	y	1	565.2398722	69.96272	FALSE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	noloss	6	y	1	712.3082861	100	FALSE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	noloss	7	y	1	825.3923501	51.118713	FALSE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	noloss	8	y	1	924.460764	1.5956572	TRUE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	NH3	3	y	1	360.1513752	3.1122544	TRUE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	NH3	4	y	1	491.1918601	1.0637656	TRUE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	H2O	6	y	1	694.2977213	1.3498487	TRUE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	NH3	6	y	1	695.2817378	2.9444132	TRUE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_AGVLFGMSDR_	-40	TRUE	_AGVLFGMSDR_	AGVLFGMSDR	46.12812	0.8644136	46.12812|44.301308	P09172	TRUE	_AGVLFGMSDR_	_AGVLFGMSDR_	526.763309	0.000140481	1200641	H2O	3	y	1	359.1673588	2.1920948	TRUE	sp	P09172	P09172	DOPO_HUMAN	Dopamine beta-hydroxylase	Homo sapiens	9606	DBH	1	3	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	noloss	3	b	1	360.1037556	14.370086	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	noloss	3	y	1	377.2030757	69.91263	FALSE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	noloss	4	y	1	490.2871397	47.727154	FALSE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	noloss	5	b	1	588.1896105	3.50105	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	noloss	5	y	1	603.3712037	20.282309	FALSE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	noloss	6	b	1	701.2736745	4.035067	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	noloss	6	y	1	717.4141311	24.80533	FALSE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	noloss	7	y	1	831.4570586	21.473965	FALSE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	noloss	8	y	1	946.4840016	100	FALSE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	H2O	3	b	1	342.0931908	5.760026	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	H2O	3	y	1	359.1925109	5.090061	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	H2O	4	y	1	472.2765749	4.874231	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	NH3	5	b	1	571.1630621	4.1113725	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	NH3	6	b	1	684.2471261	4.05031	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	NH3	6	y	1	700.3875827	3.518	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	NH3	7	y	1	814.4305102	7.3916	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	H2O	8	y	1	928.4734368	5.1134515	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	NH3	8	y	1	929.4574532	10.1338825	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_DEDNNLLTEK_	-40	TRUE	_DEDNNLLTEK_	DEDNNLLTEK	13.850179	0.90230334	26.061718|13.850179	P09486	TRUE	_DEDNNLLTEK_	_DEDNNLLTEK_	595.7804071	0.000191886	313203.16	noloss	9	y	1	1075.526595	14.727533	TRUE	sp	P09486	P09486	SPRC_HUMAN	SPARC	Homo sapiens	9606	SPARC	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	noloss	3	b	1	343.1764671	3.8687427	TRUE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	noloss	3	y	1	347.2288965	6.1732	FALSE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	noloss	4	b	1	444.2241455	1.421	TRUE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	noloss	4	y	1	434.260925	9.774844	FALSE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	noloss	5	y	1	581.3293389	2.66143	TRUE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	noloss	6	y	1	638.3508026	19.109653	FALSE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	noloss	7	y	1	753.3777456	20.841564	FALSE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	noloss	8	y	1	854.4254241	60.931602	FALSE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	noloss	9	y	1	953.493838	36.67316	FALSE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	H2O	4	y	1	416.2503601	2.702636	TRUE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	H2O	7	y	1	735.3671808	2.1958628	TRUE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	H2O	8	y	1	836.4148593	4.374068	TRUE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	H2O	9	y	1	935.4832732	4.7364564	TRUE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_GWVTDGFSSLK_	-40	TRUE	_GWVTDGFSSLK_	GWVTDGFSSLK	62.688942	0.92365193	62.87711|62.59508	P02656	TRUE	_GWVTDGFSSLK_	_GWVTDGFSSLK_	598.8009456	0.000448056	9233573	H2O	9	y	2	468.2452748	1.662614	TRUE	sp	P02656	P02656	APOC3_HUMAN	Apolipoprotein C-III	Homo sapiens	9606	APOC3	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	3	b	1	288.1553973	13.402388	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	3	y	1	374.2397956	35.40592	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	4	y	1	560.3191085	46.133404	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	5	y	1	675.3460516	49.92076	FALSE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	6	y	1	803.4046291	88.30317	FALSE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	7	y	1	916.488693	35.01724	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	7	y	2	458.7479848	8.279677	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	8	y	1	1003.520721	100	FALSE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	9	y	1	1104.5684	49.551434	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	H2O	3	b	1	270.1448324	95.16555	FALSE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	H2O	6	y	1	785.3940643	24.796286	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	H2O	6	y	2	393.2006704	13.1079035	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	NH3	6	y	1	786.3780807	46.45572	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	H2O	8	y	1	985.5101566	7.3205233	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	H2O	8	y	2	493.2587166	4.255348	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	H2O	9	y	1	1086.557835	7.42398	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	4	b	2	201.1233688	88.65463	FALSE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	4	b	1	401.2394612	4.14027	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	noloss	9	y	2	552.7878382	4.422511	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	H2O	4	b	1	383.2288964	12.326735	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	NH3	8	y	1	986.4941731	24.063145	TRUE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_VTSIQDWVQK_	-40	TRUE	_VTSIQDWVQK_	VTSIQDWVQK	39.73143	0.9102522	39.839462|39.662376	P00738	TRUE	_VTSIQDWVQK_	_VTSIQDWVQK_	602.3220451	0.000163706	907266.44	NH3	3	y	1	357.2132472	60.34427	FALSE	sp	P00738	P00738	HPT_HUMAN	Haptoglobin	Homo sapiens	9606	HP	1	1	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	noloss	3	b	1	371.192511	3.1688333	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	noloss	3	y	1	347.2037444	10.465045	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	noloss	4	y	1	475.2623219	23.276728	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	noloss	5	y	1	604.304915	37.09764	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	noloss	6	y	1	675.3420288	64.2233	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	noloss	7	y	1	804.3846219	37.812637	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	noloss	8	y	1	891.4166503	96.74726	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	noloss	9	y	1	1004.500714	100	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	H2O	3	b	1	353.1819462	4.8028316	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	NH3	3	b	1	354.1659627	5.46505	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	NH3	4	y	1	458.2357735	1.8879279	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	NH3	6	y	1	658.3154804	1.120537	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	H2O	7	y	1	786.3740571	7.649042	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	H2O	8	y	1	873.4060855	4.228747	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_QELSEAEQATR_	-40	TRUE	_QELSEAEQATR_	QELSEAEQATR	-14.638051	0.92576075	-14.638051|-17.592121	P01024	TRUE	_QELSEAEQATR_	_QELSEAEQATR_	631.3045807	0	1612831.6	H2O	9	y	1	986.4901495	1.3095266	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_EEGTDLEVTANR_	-40	TRUE	_EEGTDLEVTANR_	EEGTDLEVTANR	5.3526073	0.94296074	5.3526073|3.901706	P20742	TRUE	_EEGTDLEVTANR_	_EEGTDLEVTANR_	667.3151454	0.000139519	230759.89	noloss	3	y	1	360.1989934	20.52395	TRUE	sp	P20742	P20742	PZP_HUMAN	Pregnancy zone protein	Homo sapiens	9606	PZP	1	4	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_EEGTDLEVTANR_	-40	TRUE	_EEGTDLEVTANR_	EEGTDLEVTANR	5.3526073	0.94296074	5.3526073|3.901706	P20742	TRUE	_EEGTDLEVTANR_	_EEGTDLEVTANR_	667.3151454	0.000139519	230759.89	noloss	4	y	1	461.2466719	85.759026	FALSE	sp	P20742	P20742	PZP_HUMAN	Pregnancy zone protein	Homo sapiens	9606	PZP	1	4	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_EEGTDLEVTANR_	-40	TRUE	_EEGTDLEVTANR_	EEGTDLEVTANR	5.3526073	0.94296074	5.3526073|3.901706	P20742	TRUE	_EEGTDLEVTANR_	_EEGTDLEVTANR_	667.3151454	0.000139519	230759.89	noloss	5	y	1	560.3150858	82.32931	FALSE	sp	P20742	P20742	PZP_HUMAN	Pregnancy zone protein	Homo sapiens	9606	PZP	1	4	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_EEGTDLEVTANR_	-40	TRUE	_EEGTDLEVTANR_	EEGTDLEVTANR	5.3526073	0.94296074	5.3526073|3.901706	P20742	TRUE	_EEGTDLEVTANR_	_EEGTDLEVTANR_	667.3151454	0.000139519	230759.89	noloss	6	y	1	689.3576789	100	FALSE	sp	P20742	P20742	PZP_HUMAN	Pregnancy zone protein	Homo sapiens	9606	PZP	1	4	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_EEGTDLEVTANR_	-40	TRUE	_EEGTDLEVTANR_	EEGTDLEVTANR	5.3526073	0.94296074	5.3526073|3.901706	P20742	TRUE	_EEGTDLEVTANR_	_EEGTDLEVTANR_	667.3151454	0.000139519	230759.89	noloss	7	y	1	802.4417428	79.06382	FALSE	sp	P20742	P20742	PZP_HUMAN	Pregnancy zone protein	Homo sapiens	9606	PZP	1	4	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_EEGTDLEVTANR_	-40	TRUE	_EEGTDLEVTANR_	EEGTDLEVTANR	5.3526073	0.94296074	5.3526073|3.901706	P20742	TRUE	_EEGTDLEVTANR_	_EEGTDLEVTANR_	667.3151454	0.000139519	230759.89	noloss	8	y	1	917.4686859	72.09271	FALSE	sp	P20742	P20742	PZP_HUMAN	Pregnancy zone protein	Homo sapiens	9606	PZP	1	4	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_EEGTDLEVTANR_	-40	TRUE	_EEGTDLEVTANR_	EEGTDLEVTANR	5.3526073	0.94296074	5.3526073|3.901706	P20742	TRUE	_EEGTDLEVTANR_	_EEGTDLEVTANR_	667.3151454	0.000139519	230759.89	noloss	9	y	1	1018.516364	21.396385	TRUE	sp	P20742	P20742	PZP_HUMAN	Pregnancy zone protein	Homo sapiens	9606	PZP	1	4	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_EEGTDLEVTANR_	-40	TRUE	_EEGTDLEVTANR_	EEGTDLEVTANR	5.3526073	0.94296074	5.3526073|3.901706	P20742	TRUE	_EEGTDLEVTANR_	_EEGTDLEVTANR_	667.3151454	0.000139519	230759.89	noloss	10	y	1	1075.537828	83.90465	FALSE	sp	P20742	P20742	PZP_HUMAN	Pregnancy zone protein	Homo sapiens	9606	PZP	1	4	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_EEGTDLEVTANR_	-40	TRUE	_EEGTDLEVTANR_	EEGTDLEVTANR	5.3526073	0.94296074	5.3526073|3.901706	P20742	TRUE	_EEGTDLEVTANR_	_EEGTDLEVTANR_	667.3151454	0.000139519	230759.89	H2O	6	y	1	671.3471141	8.983159	TRUE	sp	P20742	P20742	PZP_HUMAN	Pregnancy zone protein	Homo sapiens	9606	PZP	1	4	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	noloss	3	b	1	419.1231111	14.944622	TRUE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	noloss	3	y	1	436.1972791	30.738253	FALSE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	noloss	4	b	1	534.1500542	9.713619	TRUE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	noloss	4	y	1	583.2656931	51.723152	FALSE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	noloss	5	y	1	712.3082861	50.78903	FALSE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	noloss	6	y	1	841.3508792	59.771255	FALSE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	noloss	7	y	1	956.3778223	75.695114	FALSE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	noloss	8	y	1	1085.420415	100	FALSE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	noloss	9	y	1	1214.463008	15.559877	TRUE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	H2O	3	b	1	401.1125463	9.39616	TRUE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	H2O	4	b	1	516.1394894	5.2136855	TRUE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	NH3	4	y	1	566.2391447	6.1200476	TRUE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	H2O	5	y	1	694.2977213	6.602107	TRUE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	H2O	6	y	1	823.3403144	10.601803	TRUE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	H2O	7	y	1	938.3672575	6.748549	TRUE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	H2O	8	y	1	1067.409851	26.568663	TRUE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_C[+57]EEDEEFTC[+57]R_	-40	TRUE	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	CEEDEEFTCR	-1.2823446	0.9011946	-1.2823446|-4.0177083	P00747	TRUE	_C[+57]EEDEEFTC[+57]R_	_C[Carbamidomethyl (C)]EEDEEFTC[Carbamidomethyl (C)]R_	687.7504667	0.00036315	385101.16	H2O	9	y	1	1196.452444	10.719879	TRUE	sp	P00747	P00747	PLMN_HUMAN	Plasminogen	Homo sapiens	9606	PLG	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	3	b	1	386.2034101	7.7662497	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	3	y	1	347.2037444	3.2926106	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	4	b	1	499.2874741	6.734219	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	4	y	1	475.2623219	14.221682	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	5	b	1	586.3195025	1.5547751	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	5	y	1	604.304915	12.158777	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	6	b	1	715.3620955	3.230758	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	6	y	1	675.3420288	25.758886	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	7	b	1	786.3992093	1.9753934	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	7	y	1	804.3846219	22.087849	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	8	b	1	915.4418024	1.8401791	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	8	b	2	458.2245394	1.6086894	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	8	y	1	891.4166503	74.71736	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	9	y	1	1004.500714	54.28254	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	10	b	1	1114.537494	1.0495659	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	10	y	1	1133.543307	100	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	11	y	1	1261.601885	24.588951	FALSE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	noloss	11	y	2	631.3045807	2.1570513	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	H2O	3	b	1	368.1928453	3.4925315	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	NH3	3	b	1	369.1768617	2.9452903	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	H2O	4	b	1	481.2769092	1.0290943	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	NH3	4	y	1	458.2357735	1.8126523	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	H2O	8	y	1	873.4060855	2.0101676	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	H2O	10	y	1	1115.532743	6.641937	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	H2O	11	y	2	622.2992983	6.5424037	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	H2O	11	y	1	1243.59132	3.3419182	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	NH3	11	y	1	1244.575336	10.56645	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	NH3	4	b	1	482.2609257	1.1637812	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
+IK_221028_C19_lib2_01	2		_KQELSEAEQATR_	-40	TRUE	_KQELSEAEQATR_	KQELSEAEQATR	-26.474043	0.9663669	-26.300653|-26.58661	P01024	TRUE	_KQELSEAEQATR_	_KQELSEAEQATR_	695.3520622	0.000291637	1674753	NH3	3	y	1	330.177196	1.6216956	TRUE	sp	P01024	P01024	CO3_HUMAN	Complement C3	Homo sapiens	9606	C3	1	2	H_sapiens_uniprot_reviewed_cannonical_3AUP000005640_2-2022.08.12-15.33.51.77
diff --git a/implementations/python/tests/test_data/phl004_canonical_sall_pv_plasma.head.diann.tsv b/implementations/python/tests/test_data/phl004_canonical_sall_pv_plasma.head.diann.tsv
new file mode 100644
index 0000000..ea42718
--- /dev/null
+++ b/implementations/python/tests/test_data/phl004_canonical_sall_pv_plasma.head.diann.tsv
@@ -0,0 +1,147 @@
+FileName	PrecursorMz	ProductMz	Tr_recalibrated	IonMobility	transition_name	LibraryIntensity	transition_group_id	decoy	PeptideSequence	Proteotypic	QValue	PGQValue	Ms1ProfileCorr	ProteinGroup	ProteinName	Genes	FullUniModPeptideName	ModifiedPeptide	PrecursorCharge	PeptideGroupLabel	UniprotID	NTerm	CTerm	FragmentType	FragmentCharge	FragmentSeriesNumber	FragmentLossType	ExcludeFromAssay
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	427.22995	117.9	0	AAAAAAAAAAAAAAAASAGGK2_98_1_0_6	1	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	498.26706	117.9	0	AAAAAAAAAAAAAAAASAGGK2_98_1_0_7	0.88039231	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	569.3042	117.9	0	AAAAAAAAAAAAAAAASAGGK2_98_1_0_8	0.66275221	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	356.19284	117.9	0	AAAAAAAAAAAAAAAASAGGK2_98_1_0_5	0.58039355	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	640.34131	117.9	0	AAAAAAAAAAAAAAAASAGGK2_98_1_0_9	0.44118175	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	703.37335	117.9	0	AAAAAAAAAAAAAAAASAGGK2_121_1_0_12	0.40000239	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	916.48468	117.9	0	AAAAAAAAAAAAAAAASAGGK2_121_1_0_9	0.40000239	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	12	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	1129.5961	117.9	0	AAAAAAAAAAAAAAAASAGGK2_121_1_0_6	0.36078224	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	15	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	774.41046	117.9	0	AAAAAAAAAAAAAAAASAGGK2_121_1_0_11	0.34117815	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	845.44757	117.9	0	AAAAAAAAAAAAAAAASAGGK2_121_1_0_10	0.30195799	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	490.26199	117.9	0	AAAAAAAAAAAAAAAASAGGK2_121_1_0_15	0.301357	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	419.22488	117.9	0	AAAAAAAAAAAAAAAASAGGK2_121_1_0_16	0.29999879	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	1058.5588	117.9	0	AAAAAAAAAAAAAAAASAGGK2_121_1_0_7	0.25881943	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	14	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	632.33624	117.9	0	AAAAAAAAAAAAAAAASAGGK2_121_1_0_13	0.2572208	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	853.45264	117.9	0	AAAAAAAAAAAAAAAASAGGK2_98_1_0_12	0.18174933	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	12	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	987.52179	117.9	0	AAAAAAAAAAAAAAAASAGGK2_121_1_0_8	0.17647269	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	13	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	782.41553	117.9	0	AAAAAAAAAAAAAAAASAGGK2_98_1_0_11	0.13921174	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	924.48975	117.9	0	AAAAAAAAAAAAAAAASAGGK2_98_1_0_13	0.13725254	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	13	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	711.37842	117.9	0	AAAAAAAAAAAAAAAASAGGK2_98_1_0_10	0.11764847	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	778.41296	1200.6332	117.9	0	AAAAAAAAAAAAAAAASAGGK2_121_1_0_5	0.10526822	AAAAAAAAAAAAAAAASAGGK2	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	2	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	16	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	703.37335	117.4	0	AAAAAAAAAAAAAAAASAGGK3_121_1_0_12	1	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	419.22488	117.4	0	AAAAAAAAAAAAAAAASAGGK3_121_1_0_16	0.75598669	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	498.26706	117.4	0	AAAAAAAAAAAAAAAASAGGK3_98_1_0_7	0.67160469	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	640.34131	117.4	0	AAAAAAAAAAAAAAAASAGGK3_98_1_0_9	0.61734796	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	569.3042	117.4	0	AAAAAAAAAAAAAAAASAGGK3_98_1_0_8	0.56162828	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	356.19284	117.4	0	AAAAAAAAAAAAAAAASAGGK3_98_1_0_5	0.51522595	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	427.22995	117.4	0	AAAAAAAAAAAAAAAASAGGK3_98_1_0_6	0.51522595	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	774.41046	117.4	0	AAAAAAAAAAAAAAAASAGGK3_121_1_0_11	0.48234525	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	561.29907	117.4	0	AAAAAAAAAAAAAAAASAGGK3_121_1_0_14	0.41774848	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	490.26199	117.4	0	AAAAAAAAAAAAAAAASAGGK3_121_1_0_15	0.37849048	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	648.33875	117.4	0	AAAAAAAAAAAAAAAASAGGK3_98_2_0_18	0.37133196	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	2	18	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	632.33624	117.4	0	AAAAAAAAAAAAAAAASAGGK3_121_1_0_13	0.32955998	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	y	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	711.37842	117.4	0	AAAAAAAAAAAAAAAASAGGK3_98_1_0_10	0.27849901	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	519.27777	782.41553	117.4	0	AAAAAAAAAAAAAAAASAGGK3_98_1_0_11	0.23208249	AAAAAAAAAAAAAAAASAGGK3	0	AAAAAAAAAAAAAAAASAGGK	1	0	0	0	P0CG40	SP9_HUMAN	SP9	AAAAAAAAAAAAAAAASAGGK	AAAAAAAAAAAAAAAASAGGK	3	AAAAAAAAAAAAAAAASAGGK	P0CG40	0	0	b	1	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	427.22995	110	0	AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_6	1	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	356.19284	110	0	AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_5	0.91602999	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	498.26706	110	0	AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_7	0.83227003	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	569.3042	110	0	AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_8	0.79017997	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	640.34131	110	0	AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_9	0.59228998	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	815.43701	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_11	0.54980999	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	886.47412	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_10	0.51920998	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	12	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	531.28851	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_15	0.50638998	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	957.51123	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_9	0.49983999	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	13	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	673.36279	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_13	0.46117997	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	1028.5483	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_8	0.44073999	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	14	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	711.37842	110	0	AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_10	0.42937002	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	744.3999	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_12	0.41159999	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	602.32562	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_14	0.40491998	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	782.41553	110	0	AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_11	0.38191	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	403.22995	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_17	0.36974999	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	1099.5854	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_7	0.34740001	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	15	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	1170.6226	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_6	0.31896001	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	16	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	853.45264	110	0	AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_12	0.27395001	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	12	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	474.26706	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_16	0.26207	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	995.52686	110	0	AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_14	0.20731001	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	14	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	1241.6597	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_5	0.2052	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	17	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	924.48975	110	0	AAAAAAAAAAAAAAAGAGAGAK2_98_1_0_13	0.18191999	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	13	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	798.92627	1312.6968	110	0	AAAAAAAAAAAAAAAGAGAGAK2_121_1_0_4	0.114	AAAAAAAAAAAAAAAGAGAGAK2	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	2	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	18	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	531.28851	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_15	1	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	498.26706	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_7	0.82642001	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	427.22995	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_6	0.75252002	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	356.19284	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_5	0.71425998	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	602.32562	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_14	0.66922998	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	569.3042	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_8	0.56159002	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	673.36279	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_13	0.52272004	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	640.34131	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_9	0.47526002	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	711.37842	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_10	0.40158999	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	744.3999	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_12	0.39754999	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	403.22995	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_17	0.31963	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	462.7485	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_98_2_0_13	0.27653	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	2	13	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	782.41553	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_11	0.27631998	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	815.43701	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_11	0.24988998	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	474.26706	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_16	0.24581002	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	391.7114	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_98_2_0_11	0.18824001	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	2	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	853.45264	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_12	0.17081	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	12	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	886.47412	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_10	0.16329999	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	12	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	924.48975	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_98_1_0_13	0.10528	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	b	1	13	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	532.95325	957.51123	109.8	0	AAAAAAAAAAAAAAAGAGAGAK3_121_1_0_9	0.091109999	AAAAAAAAAAAAAAAGAGAGAK3	0	AAAAAAAAAAAAAAAGAGAGAK	1	0	0	0	P55011	S12A2_HUMAN	SLC12A2	AAAAAAAAAAAAAAAGAGAGAK	AAAAAAAAAAAAAAAGAGAGAK	3	AAAAAAAAAAAAAAAGAGAGAK	P55011	0	0	y	1	13	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	646.31549	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_121_1_0_20	1	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	y	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	880.41589	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_121_1_0_18	0.72346348	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	y	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	498.26706	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_7	0.66272509	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	569.3042	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_8	0.60243261	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	356.19284	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_5	0.58793229	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	809.37878	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_121_1_0_19	0.48742947	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	y	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	711.37842	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_10	0.48370829	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	427.22995	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_6	0.48304707	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	1027.4844	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_121_1_0_17	0.38692665	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	y	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	853.45264	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_12	0.32992482	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	12	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	1171.5378	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_121_1_0_15	0.31720817	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	y	1	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	782.41553	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_11	0.21106207	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	746.36969	640.34131	151.3	0	AAAAAAAAAAAAAAASGFAYPGTSER3_98_1_0_9	0.21104671	AAAAAAAAAAAAAAASGFAYPGTSER3	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	3	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	560.02911	646.31549	151.10001	0	AAAAAAAAAAAAAAASGFAYPGTSER4_121_1_0_20	1	AAAAAAAAAAAAAAASGFAYPGTSER4	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	4	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	y	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	560.02911	356.19284	151.10001	0	AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_5	0.66913706	AAAAAAAAAAAAAAASGFAYPGTSER4	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	4	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	560.02911	492.24124	151.10001	0	AAAAAAAAAAAAAAASGFAYPGTSER4_121_1_0_22	0.59304374	AAAAAAAAAAAAAAASGFAYPGTSER4	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	4	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	y	1	4	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	560.02911	498.26706	151.10001	0	AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_7	0.4210082	AAAAAAAAAAAAAAASGFAYPGTSER4	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	4	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	560.02911	809.37878	151.10001	0	AAAAAAAAAAAAAAASGFAYPGTSER4_121_1_0_19	0.4210082	AAAAAAAAAAAAAAASGFAYPGTSER4	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	4	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	y	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	560.02911	569.3042	151.10001	0	AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_8	0.36934987	AAAAAAAAAAAAAAASGFAYPGTSER4	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	4	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	560.02911	711.37842	151.10001	0	AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_10	0.344071	AAAAAAAAAAAAAAASGFAYPGTSER4	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	4	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	560.02911	514.24579	151.10001	0	AAAAAAAAAAAAAAASGFAYPGTSER4_121_2_0_17	0.33137658	AAAAAAAAAAAAAAASGFAYPGTSER4	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	4	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	y	2	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	560.02911	427.22995	151.10001	0	AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_6	0.33042264	AAAAAAAAAAAAAAASGFAYPGTSER4	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	4	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	560.02911	640.34131	151.10001	0	AAAAAAAAAAAAAAASGFAYPGTSER4_98_1_0_9	0.31578368	AAAAAAAAAAAAAAASGFAYPGTSER4	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	4	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	b	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	560.02911	586.27252	151.10001	0	AAAAAAAAAAAAAAASGFAYPGTSER4_121_2_0_15	0.27729672	AAAAAAAAAAAAAAASGFAYPGTSER4	0	AAAAAAAAAAAAAAASGFAYPGTSER	1	0	0	0	P35453	HXD13_HUMAN	HOXD13	AAAAAAAAAAAAAAASGFAYPGTSER	AAAAAAAAAAAAAAASGFAYPGTSER	4	AAAAAAAAAAAAAAASGFAYPGTSER	P35453	0	0	y	2	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	429.24561	573.33551	88.5	0	AAAAAAAAAAK2_121_1_0_4	1	AAAAAAAAAAK2	0	AAAAAAAAAAK	1	0	0	0	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341			AAAAAAAAAAK	AAAAAAAAAAK	2	AAAAAAAAAAK	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341	0	0	y	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	429.24561	502.29837	88.5	0	AAAAAAAAAAK2_121_1_0_5	0.84049195	AAAAAAAAAAK2	0	AAAAAAAAAAK	1	0	0	0	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341			AAAAAAAAAAK	AAAAAAAAAAK	2	AAAAAAAAAAK	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341	0	0	y	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	429.24561	644.37262	88.5	0	AAAAAAAAAAK2_121_1_0_3	0.62802154	AAAAAAAAAAK2	0	AAAAAAAAAAK	1	0	0	0	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341			AAAAAAAAAAK	AAAAAAAAAAK	2	AAAAAAAAAAK	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341	0	0	y	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	429.24561	715.40973	88.5	0	AAAAAAAAAAK2_121_1_0_2	0.53955686	AAAAAAAAAAK2	0	AAAAAAAAAAK	1	0	0	0	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341			AAAAAAAAAAK	AAAAAAAAAAK	2	AAAAAAAAAAK	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341	0	0	y	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	429.24561	356.19284	88.5	0	AAAAAAAAAAK2_98_1_0_5	0.52399796	AAAAAAAAAAK2	0	AAAAAAAAAAK	1	0	0	0	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341			AAAAAAAAAAK	AAAAAAAAAAK	2	AAAAAAAAAAK	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341	0	0	b	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	429.24561	431.26126	88.5	0	AAAAAAAAAAK2_121_1_0_6	0.46901798	AAAAAAAAAAK2	0	AAAAAAAAAAK	1	0	0	0	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341			AAAAAAAAAAK	AAAAAAAAAAK	2	AAAAAAAAAAK	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341	0	0	y	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	429.24561	427.22995	88.5	0	AAAAAAAAAAK2_98_1_0_6	0.32801768	AAAAAAAAAAK2	0	AAAAAAAAAAK	1	0	0	0	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341			AAAAAAAAAAK	AAAAAAAAAAK	2	AAAAAAAAAAK	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341	0	0	b	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	429.24561	360.22415	88.5	0	AAAAAAAAAAK2_121_1_0_7	0.30400032	AAAAAAAAAAK2	0	AAAAAAAAAAK	1	0	0	0	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341			AAAAAAAAAAK	AAAAAAAAAAK	2	AAAAAAAAAAK	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341	0	0	y	1	4	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	429.24561	498.26706	88.5	0	AAAAAAAAAAK2_98_1_0_7	0.22399409	AAAAAAAAAAK2	0	AAAAAAAAAAK	1	0	0	0	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341			AAAAAAAAAAK	AAAAAAAAAAK	2	AAAAAAAAAAK	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341	0	0	b	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	429.24561	786.44684	88.5	0	AAAAAAAAAAK2_121_1_0_1	0.19198385	AAAAAAAAAAK2	0	AAAAAAAAAAK	1	0	0	0	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341			AAAAAAAAAAK	AAAAAAAAAAK	2	AAAAAAAAAAK	P50914,P50458,A6NHT5,P15502,DECOY_Q9Y651,DECOY_O60341	0	0	y	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	443.24869	601.34161	38.400002	0	AAAAAAAAAAR2_121_1_0_4	1	AAAAAAAAAAR2	0	AAAAAAAAAAR	1	0	0	0	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40			AAAAAAAAAAR	AAAAAAAAAAR	2	AAAAAAAAAAR	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40	0	0	y	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	443.24869	530.3045	38.400002	0	AAAAAAAAAAR2_121_1_0_5	0.90602791	AAAAAAAAAAR2	0	AAAAAAAAAAR	1	0	0	0	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40			AAAAAAAAAAR	AAAAAAAAAAR	2	AAAAAAAAAAR	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40	0	0	y	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	443.24869	672.37872	38.400002	0	AAAAAAAAAAR2_121_1_0_3	0.83756346	AAAAAAAAAAR2	0	AAAAAAAAAAR	1	0	0	0	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40			AAAAAAAAAAR	AAAAAAAAAAR	2	AAAAAAAAAAR	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40	0	0	y	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	443.24869	356.19284	38.400002	0	AAAAAAAAAAR2_98_1_0_5	0.69194162	AAAAAAAAAAR2	0	AAAAAAAAAAR	1	0	0	0	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40			AAAAAAAAAAR	AAAAAAAAAAR	2	AAAAAAAAAAR	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40	0	0	b	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	443.24869	388.23029	38.400002	0	AAAAAAAAAAR2_121_1_0_7	0.52842641	AAAAAAAAAAR2	0	AAAAAAAAAAR	1	0	0	0	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40			AAAAAAAAAAR	AAAAAAAAAAR	2	AAAAAAAAAAR	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40	0	0	y	1	4	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	443.24869	459.2674	38.400002	0	AAAAAAAAAAR2_121_1_0_6	0.47519037	AAAAAAAAAAR2	0	AAAAAAAAAAR	1	0	0	0	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40			AAAAAAAAAAR	AAAAAAAAAAR	2	AAAAAAAAAAR	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40	0	0	y	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	443.24869	743.41583	38.400002	0	AAAAAAAAAAR2_121_1_0_2	0.39619291	AAAAAAAAAAR2	0	AAAAAAAAAAR	1	0	0	0	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40			AAAAAAAAAAR	AAAAAAAAAAR	2	AAAAAAAAAAR	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40	0	0	y	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	443.24869	427.22995	38.400002	0	AAAAAAAAAAR2_98_1_0_6	0.36180204	AAAAAAAAAAR2	0	AAAAAAAAAAR	1	0	0	0	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40			AAAAAAAAAAR	AAAAAAAAAAR	2	AAAAAAAAAAR	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40	0	0	b	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	443.24869	569.3042	38.400002	0	AAAAAAAAAAR2_98_1_0_8	0.20031726	AAAAAAAAAAR2	0	AAAAAAAAAAR	1	0	0	0	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40			AAAAAAAAAAR	AAAAAAAAAAR	2	AAAAAAAAAAR	P47928,Q9Y651,DECOY_Q76L83,DECOY_Q8WXD9,DECOY_Q5VZB9,DECOY_P35453,DECOY_O14654,DECOY_P55011,DECOY_P0CG40	0	0	b	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	789.49811	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_15	1	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	369.22449	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_19	0.58921003	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	3	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	427.22995	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_98_1_0_6	0.31046999	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	b	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	498.26706	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_98_1_0_7	0.30821002	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	b	1	7	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	569.3042	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_98_1_0_8	0.26800001	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	b	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	356.19284	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_98_1_0_5	0.25783998	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	b	1	5	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	640.34131	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_98_1_0_9	0.20063001	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	b	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	1101.6779	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_11	0.15594999	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	1188.71	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_10	0.15020999	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	12	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	902.58221	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_14	0.13153	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	8	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	1259.7471	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_9	0.1284	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	13	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	973.61932	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_13	0.11756	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	9	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	997.50616	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_98_1_0_14	0.11383001	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	b	1	14	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	711.37842	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_98_1_0_10	0.10726	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	b	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	1330.7842	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_8	0.10523	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	14	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	1110.5902	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_98_1_0_15	0.10514	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	b	1	15	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	926.46899	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_98_1_0_13	0.10461	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	b	1	13	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	1401.8213	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_7	0.080120005	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	15	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	855.43188	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_98_1_0_12	0.074140005	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	b	1	12	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	1044.6564	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_12	0.072379999	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	10	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	482.30853	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_18	0.067829996	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	4	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	798.41046	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_98_1_0_11	0.05923	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	b	1	11	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	1472.8584	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_6	0.055300001	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	16	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	692.44537	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_16	0.044360001	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	6	noloss	False
+/home/andrew/hc-storage/diabetes_study/speclib/phl004_canonical_sall_pv.csv	950.04419	1543.8955	132.2	0	AAAAAAAAAASGAAIPPLIPPR2_121_1_0_5	0.036620002	AAAAAAAAAASGAAIPPLIPPR2	0	AAAAAAAAAASGAAIPPLIPPR	1	0	0	0	O14654	IRS4_HUMAN	IRS4	AAAAAAAAAASGAAIPPLIPPR	AAAAAAAAAASGAAIPPLIPPR	2	AAAAAAAAAASGAAIPPLIPPR	O14654	0	0	y	1	17	noloss	False
diff --git a/implementations/python/tests/test_library_backend.py b/implementations/python/tests/test_library_backend.py
index 456c7b7..1560066 100644
--- a/implementations/python/tests/test_library_backend.py
+++ b/implementations/python/tests/test_library_backend.py
@@ -1,9 +1,9 @@
-from mzlib.spectrum import Spectrum
+import math
 import os
 import unittest
-import tempfile
 
-from mzlib.backends import MSPSpectralLibrary, TextSpectralLibrary, JSONSpectralLibrary
+from mzlib.spectrum import Spectrum
+from mzlib.backends import (MSPSpectralLibrary, TextSpectralLibrary, JSONSpectralLibrary, SpectronautTSVSpectralLibrary, DIANNTSVSpectralLibrary)
 from mzlib.analyte import ANALYTE_MIXTURE_TERM
 
 from .common import datafile
@@ -62,3 +62,37 @@ class TestJSONLibrary(unittest.TestCase, MzSpecLibLibraryBehaviorBase):
     test_file = datafile("chinese_hamster_hcd_selected_head.mzlb.json")
     library_cls = JSONSpectralLibrary
     test_interpretation_file = datafile("complex_interpretations_with_members.mzlb.json")
+
+
+class TestSpectronautLibrary(unittest.TestCase, LibraryBehaviorBase):
+    test_file = datafile("human_serum.head.spectronaut.tsv")
+    library_cls = SpectronautTSVSpectralLibrary
+
+    def test_sequence_behavior(self):
+        lib = self._open_library()
+        assert len(lib) == 10
+
+        spec: Spectrum = lib[0]
+        assert spec.name == 'AQIPILR/2'
+        assert math.isclose(spec.precursor_mz, 405.7634379)
+        assert spec.precursor_charge == 2
+
+        spec = lib[5]
+        assert spec.name == 'QELSEAEQATR/2'
+        assert math.isclose(spec.precursor_mz, 631.3045807)
+        assert spec.get_analyte(1).proteins[0].name == 'CO3_HUMAN'
+
+
+class TestDIANNTSVLibrary(unittest.TestCase, LibraryBehaviorBase):
+    test_file = datafile("phl004_canonical_sall_pv_plasma.head.diann.tsv")
+    library_cls = DIANNTSVSpectralLibrary
+
+    def test_sequence_behavior(self):
+        lib = self._open_library()
+        assert len(lib) == 10
+
+        spec: Spectrum = lib[0]
+        analyte = spec.get_analyte(1)
+        assert analyte.peptide == 'AAAAAAAAAAAAAAAASAGGK'
+        assert spec.name == 'AAAAAAAAAAAAAAAASAGGK2'
+

From a59847a79e81c18744bcfcd816851960246500e1 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Mon, 15 May 2023 23:19:15 -0400
Subject: [PATCH 09/24] Fix cluster indexing

---
 implementations/python/mzlib/backends/text.py |  2 +-
 implementations/python/mzlib/index/memory.py  | 25 +++++++++++++++----
 implementations/python/tests/test_cluster.py  | 21 ++++++++++++++++
 3 files changed, 42 insertions(+), 6 deletions(-)
 create mode 100644 implementations/python/tests/test_cluster.py

diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py
index 04bad89..e0e0317 100644
--- a/implementations/python/mzlib/backends/text.py
+++ b/implementations/python/mzlib/backends/text.py
@@ -286,7 +286,7 @@ def create_index(self) -> int:
                                     logger.info(
                                         f"Processed {file_offset} bytes, {n_spectra} spectra read, {n_clusters} read")
                             else:
-                                self.index.add_cluster(number=n_clusters, offset=spectrum_file_offset)
+                                self.index.add_cluster(number=current_key, offset=spectrum_file_offset)
                                 if n_clusters % 10000 == 0:
                                     self.index.commit()
                                     logger.info(
diff --git a/implementations/python/mzlib/index/memory.py b/implementations/python/mzlib/index/memory.py
index e401a62..a4b0d3f 100644
--- a/implementations/python/mzlib/index/memory.py
+++ b/implementations/python/mzlib/index/memory.py
@@ -158,6 +158,7 @@ class MemoryIndex(IndexBase):
 
     _dirty: bool
     _by_key: Dict[int, IndexRecord]
+    _by_key_cluster: Dict[int, ClusterIndexRecord]
     _by_name: DefaultDict[str, List[IndexRecord]]
     _by_attr: DefaultDict[str, DefaultDict[Any, List[IndexRecord]]]
 
@@ -171,6 +172,7 @@ def __init__(self, records=None, cluster_records=None, metadata=None):
         self.cluster_records = list(cluster_records or [])
         self._by_name = defaultdict(list)
         self._by_key = {}
+        self._by_key_cluster = {}
         self._by_attr = defaultdict(lambda: defaultdict(list))
         self.metadata = metadata or {}
         self._dirty = True
@@ -217,16 +219,22 @@ def search(self, i=None, **kwargs):
     def search_clusters(self, i=None, **kwargs):
         if self._dirty:
             self._update_index()
+
         if i is None and kwargs:
             # Executing attribute query
             raise NotImplementedError()
+
         if isinstance(i, Integral):
-            try:
-                return self.cluster_records[i]
-            except IndexError as err:
-                raise KeyError(i) from err
+            return self._by_key_cluster[i]
+
         elif isinstance(i, slice):
-            return self.cluster_records[i]
+            start = i.start
+            stop = i.stop
+            if start is None:
+                start = min(self._by_key_cluster) if self._by_key_cluster else 0
+            if stop is None:
+                stop = max(self._by_key_cluster) if self._by_key_cluster else 0
+            return [self._by_key_cluster[i] for i in range(start, stop) if i in self._by_key_cluster]
 
     def __getitem__(self, i):
         return self._get_by_index(i)
@@ -237,10 +245,17 @@ def _get_by_index(self, i: Union[int, slice]) -> Union[IndexRecord, List[IndexRe
     def _update_index(self):
         self.records.sort(key=lambda x: x.number)
 
+        self._by_key.clear()
         self._by_name = defaultdict(list)
         for record in self:
             self._by_key[record.number] = record
             self._by_name[record.name].append(record)
+
+        self.cluster_records.sort(key=lambda x: x.number)
+        self._by_key_cluster.clear()
+        for record in self.cluster_records:
+            self._by_key_cluster[record.number] = record
+
         self._dirty = False
 
     def add(self, number: int, offset: int, name: str, analyte: Any, attributes=None):
diff --git a/implementations/python/tests/test_cluster.py b/implementations/python/tests/test_cluster.py
new file mode 100644
index 0000000..a4a3047
--- /dev/null
+++ b/implementations/python/tests/test_cluster.py
@@ -0,0 +1,21 @@
+import os
+import unittest
+
+from mzlib.backends import TextSpectralLibrary
+from mzlib.cluster import SpectrumCluster
+
+from .common import datafile
+
+
+class TestSpectrumCluster(unittest.TestCase):
+
+    def get_library(self):
+        test_file = datafile("clusters_example.mzlb")
+        return TextSpectralLibrary(test_file)
+
+    def test_text_cluster_parsing(self):
+        lib = self.get_library()
+        cluster: SpectrumCluster = lib.get_cluster(1)
+
+        assert cluster.key == 1
+        assert cluster.size == 6
\ No newline at end of file

From 890d8faf07f394523994e4d0011fd1c5fece4861 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Thu, 25 May 2023 22:20:06 -0400
Subject: [PATCH 10/24] Make enums internal

---
 implementations/python/mzlib/backends/text.py | 33 ++++++++++---------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py
index e0e0317..d9873ff 100644
--- a/implementations/python/mzlib/backends/text.py
+++ b/implementations/python/mzlib/backends/text.py
@@ -36,7 +36,7 @@
     r"^\d+(.\d+)?")
 
 
-class SpectrumParserStateEnum(enum.Enum):
+class _SpectrumParserStateEnum(enum.Enum):
     unknown = 0
     header = 1
     analyte = 2
@@ -47,7 +47,7 @@ class SpectrumParserStateEnum(enum.Enum):
     cluster = 7
 
 
-class LibraryParserStateEnum(enum.Enum):
+class _LibraryParserStateEnum(enum.Enum):
     unknown = 0
     header = 1
     attribute_sets = 2
@@ -71,7 +71,8 @@ class LibraryParserStateEnum(enum.Enum):
 attribute_set_types = {
     "spectrum": AttributeSetTypes.spectrum,
     "analyte": AttributeSetTypes.analyte,
-    "interpretation": AttributeSetTypes.interpretation
+    "interpretation": AttributeSetTypes.interpretation,
+    "cluster": AttributeSetTypes.cluster,
 }
 
 
@@ -104,7 +105,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]:
         first_line = stream.readline()
         nbytes += len(first_line)
 
-        state = LibraryParserStateEnum.unknown
+        state = _LibraryParserStateEnum.unknown
 
         current_attribute_set = None
         current_attribute_set_type = None
@@ -112,7 +113,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]:
         if not _is_header_line(first_line):
             return True, 0
         elif START_OF_LIBRARY_MARKER.match(first_line):
-            state = LibraryParserStateEnum.header
+            state = _LibraryParserStateEnum.header
             match = START_OF_LIBRARY_MARKER.match(first_line)
             version = match.group(1)
             attributes = AttributeManager()
@@ -126,7 +127,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]:
                     continue
                 match = START_OF_ATTRIBUTE_SET.match(line)
                 if match:
-                    state = LibraryParserStateEnum.attribute_sets
+                    state = _LibraryParserStateEnum.attribute_sets
                     if current_attribute_set is not None:
                         self._add_attribute_set(
                             current_attribute_set, current_attribute_set_type)
@@ -142,7 +143,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]:
                     if match is not None:
                         d = match.groupdict()
                         # If we're in an attribute set, store it in the attribute set
-                        if state == LibraryParserStateEnum.attribute_sets:
+                        if state == _LibraryParserStateEnum.attribute_sets:
                             current_attribute_set.add_attribute(
                                 d['term'], try_cast(d['value']))
                         else: # Otherwise store it in the library level attributes
@@ -160,7 +161,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]:
                             d = match.groupdict()
                             # If we're in an attribute set, store it in the attribute
                             # set
-                            if state == LibraryParserStateEnum.attribute_sets:
+                            if state == _LibraryParserStateEnum.attribute_sets:
                                 current_attribute_set.add_attribute(
                                     d['term'], try_cast(d['value']), d['group_id'])
                                 current_attribute_set.group_counter = int(d['group_id'])
@@ -177,7 +178,7 @@ def _parse_header_from_stream(self, stream: io.TextIOBase) -> Tuple[bool, int]:
                                 f"Malformed grouped attribute {line}")
                     elif "=" in line:
                         name, value = line.split("=", 1)
-                        if state == LibraryParserStateEnum.attribute_sets:
+                        if state == _LibraryParserStateEnum.attribute_sets:
                             current_attribute_set.add_attribute(name, value)
                         else:
                             attributes.add_attribute(name, value)
@@ -355,19 +356,19 @@ def _prepare_attribute_dict(self, match):
 
     def _parse_attribute_into(self, line: str, store: Attributed,
                               line_number_message=lambda:'',
-                              state: SpectrumParserStateEnum=None) -> bool:
+                              state: _SpectrumParserStateEnum=None) -> bool:
         match = key_value_term_pattern.match(line)
         if match is not None:
             d = match.groupdict()
             self._prepare_attribute_dict(d)
             if d['term'] == ATTRIBUTE_SET_NAME:
-                if SpectrumParserStateEnum.header == state:
+                if _SpectrumParserStateEnum.header == state:
                     attr_set = self.entry_attribute_sets[d['value']]
-                elif SpectrumParserStateEnum.analyte == state:
+                elif _SpectrumParserStateEnum.analyte == state:
                     attr_set = self.analyte_attribute_sets[d['value']]
-                elif SpectrumParserStateEnum.interpretation == state:
+                elif _SpectrumParserStateEnum.interpretation == state:
                     attr_set = self.interpretation_attribute_sets[d['value']]
-                elif SpectrumParserStateEnum.cluster == state:
+                elif _SpectrumParserStateEnum.cluster == state:
                     attr_set = self.cluster_attribute_sets[d['value']]
                 else:
                     raise ValueError(f"Cannot define attribute sets for {state}")
@@ -403,8 +404,8 @@ def _parse(self, buffer: Iterable[str], spectrum_index: int = None,
         interpretation_member: InterpretationMember = None
         cluster: SpectrumCluster = None
 
-        STATES = SpectrumParserStateEnum
-        state: SpectrumParserStateEnum = STATES.header
+        STATES = _SpectrumParserStateEnum
+        state: _SpectrumParserStateEnum = STATES.header
 
         peak_list = []
         line_number = -1

From 080faccc3585a137bbe5b4ef30224a465b449c2f Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Fri, 26 May 2023 12:28:39 -0400
Subject: [PATCH 11/24] Violating MAY rules is still valid

---
 implementations/python/mzlib/validate/validator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/implementations/python/mzlib/validate/validator.py b/implementations/python/mzlib/validate/validator.py
index eacfc2e..b5816d5 100644
--- a/implementations/python/mzlib/validate/validator.py
+++ b/implementations/python/mzlib/validate/validator.py
@@ -221,10 +221,10 @@ def apply_rules(self, obj: Attributed, path: str, identifier_path: Tuple) -> boo
         for rule in itertools.chain(self.semantic_rules, self.object_rules):
             if rule.path == path:
                 v = rule(obj, path, identifier_path, self)
-                result &= v
                 level = logging.DEBUG
                 if not v and rule.requirement_level > RequirementLevel.may:
                     level = logging.WARN
+                    result &= v
                 logger.log(level, f"Applied {rule.id} to {path}:{identifier_path} {v}/{result}")
         return result
 

From a784c270e8d4d3cded4bb3d2de180618bce79d11 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Fri, 26 May 2023 12:29:15 -0400
Subject: [PATCH 12/24] Rebuild references

---
 ...chinese_hamster_hcd_selected_head.mzlb.txt | 16 ++---
 implementations/python/mzlib/analyte.py       | 29 +++++++-
 implementations/python/mzlib/attributes.py    | 19 +++--
 .../python/mzlib/backends/diann.py            |  3 +-
 implementations/python/mzlib/backends/msp.py  | 17 ++---
 .../python/mzlib/backends/spectronaut.py      |  2 +
 .../python/mzlib/validate/rules/base.json     | 22 +++++-
 ...hinese_hamster_hcd_selected_head.mzlb.json | 72 +++++++++----------
 ...chinese_hamster_hcd_selected_head.mzlb.txt | 16 ++---
 9 files changed, 126 insertions(+), 70 deletions(-)

diff --git a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt
index 5cc3e74..4991bc3 100644
--- a/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt
+++ b/implementations/python/examples/chinese_hamster_hcd_selected_head.mzlb.txt
@@ -1,12 +1,11 @@
 <mzSpecLib 1.0>
-MS:1003188|library name=examples/chinese_hamster_hcd_selected_head.msp
+MS:1003188|library name=examples/chinese_hamster_hcd_selected_head
 <AttributeSet Spectrum=all>
 <AttributeSet Analyte=all>
 <AttributeSet Interpretation=all>
 <Spectrum=1>
 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=855.4538
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=46
@@ -30,6 +29,7 @@ MS:1003059|number of peaks=87
 <Analyte=1>
 MS:1000224|molecular mass=1710.9076
 MS:1000888|stripped peptide sequence=AAAACALTPGPLADLAAR
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=1.4
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=855.455
@@ -137,7 +137,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4
 <Spectrum=2>
 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=855.4538
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=53
@@ -161,6 +160,7 @@ MS:1003059|number of peaks=204
 <Analyte=1>
 MS:1000224|molecular mass=1710.9076
 MS:1000888|stripped peptide sequence=AAAACALTPGPLADLAAR
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=4.2
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=855.4574
@@ -385,7 +385,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5
 <Spectrum=3>
 MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=1207.1672
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=76
@@ -409,6 +408,7 @@ MS:1003059|number of peaks=122
 <Analyte=1>
 MS:1000224|molecular mass=2414.3344
 MS:1000888|stripped peptide sequence=AAAAGQTGTVPPGAPGALPLPGMAIVK
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=-0.9
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=1207.1661
@@ -551,7 +551,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 <Spectrum=4>
 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=731.9043
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=44
@@ -575,6 +574,7 @@ MS:1003059|number of peaks=111
 <Analyte=1>
 MS:1000224|molecular mass=1463.8086
 MS:1000888|stripped peptide sequence=AAAAGSTSVKPIFSR
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=-2.7
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=731.9023
@@ -706,7 +706,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1
 <Spectrum=5>
 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=3
 MS:1003208|experimental precursor monoisotopic m/z=488.2719
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=28
@@ -730,6 +729,7 @@ MS:1003059|number of peaks=161
 <Analyte=1>
 MS:1000224|molecular mass=1464.8157
 MS:1000888|stripped peptide sequence=AAAAGSTSVKPIFSR
+MS:1000041|charge state=3
 [1]MS:1001975|delta m/z=3.8
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=488.2738
@@ -911,7 +911,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 <Spectrum=6>
 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=830.8834
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=50
@@ -935,6 +934,7 @@ MS:1003059|number of peaks=68
 <Analyte=1>
 MS:1000224|molecular mass=1661.7668
 MS:1000888|stripped peptide sequence=AAAALGSHGSCSSEVEK
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=4.1
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=830.8868
@@ -1023,7 +1023,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6
 <Spectrum=7>
 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=830.8834
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=52
@@ -1047,6 +1046,7 @@ MS:1003059|number of peaks=402
 <Analyte=1>
 MS:1000224|molecular mass=1661.7668
 MS:1000888|stripped peptide sequence=AAAALGSHGSCSSEVEK
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=-2.0
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=830.8817
diff --git a/implementations/python/mzlib/analyte.py b/implementations/python/mzlib/analyte.py
index e787ea0..dac3cb4 100644
--- a/implementations/python/mzlib/analyte.py
+++ b/implementations/python/mzlib/analyte.py
@@ -5,8 +5,9 @@
 except ImportError:
     from collections import (MutableMapping, Mapping)
 
-import textwrap
-from typing import Iterable, KeysView, ItemsView, ValuesView, Dict
+from typing import Iterable, KeysView, ItemsView, Optional, ValuesView, Dict
+
+from pyteomics import proforma
 
 from mzlib.attributes import AttributedEntity, IdentifiedAttributeManager, AttributeManagedProperty, AttributeProxy, AttributeGroupFacet
 
@@ -15,6 +16,9 @@
 FIRST_INTERPRETATION_KEY = '1'
 
 ANALYTE_MIXTURE_TERM = "MS:1003163|analyte mixture members"
+CHARGE_STATE = "MS:1000041|charge state"
+PROFORMA_ION = "MS:1003270|proforma peptidoform ion notation"
+PROFORMA_SEQ = "MS:1000889|proforma peptidoform sequence"
 
 
 class _AnalyteMappingProxy(Mapping):
@@ -205,3 +209,24 @@ class Analyte(IdentifiedAttributeManager):
     mass = AttributeManagedProperty[float]("MS:1001117|theoretical mass")
     peptide = AttributeManagedProperty[str]("MS:1003169|proforma peptidoform sequence")
     proteins = AttributeGroupFacet[ProteinDescription](ProteinDescription)
+
+    @property
+    def charge(self) -> Optional[int]:
+        if self.has_attribute(CHARGE_STATE):
+            return self.get_attribute(CHARGE_STATE)
+        elif self.has_attribute(PROFORMA_ION):
+            ion_val = self.get_attribute(PROFORMA_ION)
+            val = proforma.ProForma.parse(ion_val)
+            return val.charge_state
+        else:
+            return None
+
+    @charge.setter
+    def charge(self, value):
+        if value is not None:
+            if self.has_attribute(CHARGE_STATE):
+                self.replace_attribute(CHARGE_STATE, value)
+            else:
+                self.add_attribute(CHARGE_STATE, value)
+        else:
+            self.remove_attribute(CHARGE_STATE)
\ No newline at end of file
diff --git a/implementations/python/mzlib/attributes.py b/implementations/python/mzlib/attributes.py
index fc30180..1b21747 100644
--- a/implementations/python/mzlib/attributes.py
+++ b/implementations/python/mzlib/attributes.py
@@ -132,7 +132,8 @@ def get_next_group_identifier(self) -> str:
 
     #### Add an attribute to the list and update the lookup tables
     def add_attribute(self, key: str, value, group_identifier: Optional[str] = None):
-        """Add an attribute to the list and update the lookup tables
+        """
+        Add an attribute to the list and update the lookup tables
 
         Parameters
         ----------
@@ -184,7 +185,8 @@ def add_attribute_group(self, attributes: List[Union[Attribute, Tuple[str, Any]]
     def get_attribute(self, key: str, group_identifier: Optional[str] = None,
                       raw: bool = False) -> Union[Any, List[Any], Attribute,
                                                   List[Attribute]]:
-        """Get the value or values associated with a given
+        """
+        Get the value or values associated with a given
         attribute key.
 
         Parameters
@@ -193,6 +195,8 @@ def get_attribute(self, key: str, group_identifier: Optional[str] = None,
             The name of the attribute to retrieve
         group_identifier : str, optional
             The specific group identifier to return from.
+        raw : bool
+            Whether to return the :class:`Attribute` object or unwrap the value
 
         Returns
         -------
@@ -246,7 +250,8 @@ def replace_attribute(self, key, value, group_identifier=None):
             raise NotImplementedError()
 
     def get_by_name(self, name: str):
-        '''Search for an attribute by human-readable name.
+        """
+        Search for an attribute by human-readable name.
 
         Parameters
         ----------
@@ -257,7 +262,7 @@ def get_by_name(self, name: str):
         -------
         object:
             The attribute value if found or :const:`None`.
-        '''
+        """
         matches = []
         for attr in self:
             if attr.key.split("|")[-1] == name:
@@ -276,7 +281,8 @@ def clear(self):
         self._clear_attributes()
 
     def remove_attribute(self, key, group_identifier=None):
-        """Remove the value or values associated with a given
+        """
+        Remove the value or values associated with a given
         attribute key from the store.
 
         This rebuilds the entire store, which may be expensive.
@@ -329,7 +335,8 @@ def _iter_attribute_groups(self):
         yield None, acc
 
     def has_attribute(self, key):
-        """Test for the presence of a given attribute
+        """
+        Test for the presence of a given attribute
 
         Parameters
         ----------
diff --git a/implementations/python/mzlib/backends/diann.py b/implementations/python/mzlib/backends/diann.py
index f5b6755..cef1868 100644
--- a/implementations/python/mzlib/backends/diann.py
+++ b/implementations/python/mzlib/backends/diann.py
@@ -130,7 +130,7 @@ def _parse_from_buffer(self, buffer: List[Dict[str, Any]], spectrum_index: int =
 
         spec.add_attribute(SPECTRUM_NAME, descr['transition_group_id'])
         spec.add_attribute(SELECTED_ION_MZ, float(descr['PrecursorMz']))
-        spec.add_attribute(CHARGE_STATE, int(descr['PrecursorCharge']))
+
         if 'FileName' in descr:
             spec.add_attribute(SOURCE_FILE, descr['FileName'])
         spec.add_attribute(*self._spectrum_type())
@@ -150,6 +150,7 @@ def _parse_from_buffer(self, buffer: List[Dict[str, Any]], spectrum_index: int =
             analyte.add_attribute(STRIPPED_PEPTIDE_TERM, descr['PeptideSequence'])
         analyte.add_attribute(PROFORMA_PEPTIDE_TERM, pf_seq)
         analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass)
+        analyte.add_attribute(CHARGE_STATE, int(descr['PrecursorCharge']))
 
         protein_group_id = analyte.get_next_group_identifier()
         if "UniprotID" in descr:
diff --git a/implementations/python/mzlib/backends/msp.py b/implementations/python/mzlib/backends/msp.py
index 0f3704f..9b289e6 100644
--- a/implementations/python/mzlib/backends/msp.py
+++ b/implementations/python/mzlib/backends/msp.py
@@ -233,6 +233,10 @@ def add(self, handler: AttributeHandler):
 
 
 analyte_terms = CaseInsensitiveDict({
+    "Charge": "MS:1000041|charge state",
+    "precursor_charge": "MS:1000041|charge state",
+    "precursorcharge": "MS:1000041|charge state",
+
     "MW": "MS:1000224|molecular mass",
     "total exact mass": "MS:1000224|molecular mass",
     "ExactMass": "MS:1000224|molecular mass",
@@ -281,10 +285,6 @@ def add(self, handler: AttributeHandler):
 
 
 other_terms = CaseInsensitiveDict({
-    "Charge": "MS:1000041|charge state",
-    "precursor_charge": "MS:1000041|charge state",
-    "precursorcharge": "MS:1000041|charge state",
-
     "Parent": "MS:1003208|experimental precursor monoisotopic m/z",
     "ObservedPrecursorMZ": "MS:1003208|experimental precursor monoisotopic m/z",
     "PrecursorMZ": "MS:1003208|experimental precursor monoisotopic m/z",
@@ -773,7 +773,7 @@ def protein_handler(key, value, container: Attributed):
                                 match.group(1), group_identifier=group_identifier)
         container.add_attribute("MS:1001113|c-terminal flanking residue",
                                 match.group(2), group_identifier=group_identifier)
-    container.add_attribute(key, re.sub(r"\(pre=(.),post=(.)\)", '', value),
+    container.add_attribute(key.strip('"').strip("'"), re.sub(r"\(pre=(.),post=(.)\)", '', value),
                             group_identifier=group_identifier)
     return True
 
@@ -1162,7 +1162,7 @@ def _parse_comment(self, value: str, attributes: Attributed):
                 new_item = new_item + " "
             new_item = new_item + item
             n_quotes = new_item.count('"')
-            if n_quotes/2 == int(n_quotes/2):
+            if n_quotes % 2 == 0:
                 fixed_comment_items.append(new_item)
                 new_item = ""
 
@@ -1264,7 +1264,7 @@ def _make_spectrum(self, peak_list: List, attributes: Mapping[str, str]):
                         analyte.add_attribute(
                             "MS:1001113|c-terminal flanking residue", match.group(3))
                         if match.group(4):
-                            spectrum.add_attribute(
+                            analyte.add_attribute(
                                 "MS:1000041|charge state", try_cast(match.group(4)))
                     else:
                         spectrum.add_attribute(
@@ -1287,7 +1287,7 @@ def _make_spectrum(self, peak_list: List, attributes: Mapping[str, str]):
                     if match:
                         analyte.add_attribute(
                             STRIPPED_PEPTIDE_TERM, match.group(1))
-                        spectrum.add_attribute(
+                        analyte.add_attribute(
                             "MS:1000041|charge state", try_cast(match.group(2)))
 
         #### Handle the uninterpretable terms
@@ -1429,6 +1429,7 @@ class MSPSpectralLibraryWriter(SpectralLibraryWriterBase):
         "MS:1003054|theoretical average m/z": "Mz_av",
         "MS:1003169|proforma peptidoform sequence": "ProForma",
         "MS:1000888|stripped peptide sequence": "Peptide",
+        "MS:1000041|charge state": "Charge",
     }
 
     for species_name, keys in species_map.items():
diff --git a/implementations/python/mzlib/backends/spectronaut.py b/implementations/python/mzlib/backends/spectronaut.py
index c797bf5..9f2735a 100644
--- a/implementations/python/mzlib/backends/spectronaut.py
+++ b/implementations/python/mzlib/backends/spectronaut.py
@@ -209,9 +209,11 @@ def _build_analyte(self, description: Dict[str, Any], analyte: Analyte) -> Analy
         analyte.add_attribute(STRIPPED_PEPTIDE_TERM, description['StrippedPeptide'])
         analyte.add_attribute(PROFORMA_PEPTIDE_TERM, pf_seq)
         analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass)
+        analyte.add_attribute(CHARGE_STATE, int(description['PrecursorCharge']))
 
         protein_group_id = analyte.get_next_group_identifier()
 
+
         if 'UniProtIds' in description:
             analyte.add_attribute(
                 "MS:1000885|protein accession",
diff --git a/implementations/python/mzlib/validate/rules/base.json b/implementations/python/mzlib/validate/rules/base.json
index 223c50c..d22733c 100644
--- a/implementations/python/mzlib/validate/rules/base.json
+++ b/implementations/python/mzlib/validate/rules/base.json
@@ -92,7 +92,7 @@
       ],
       "combination_logic": "OR",
       "id": "Spectrum_has_precursor_charge",
-      "level": "SHOULD",
+      "level": "MAY",
       "path": "/Library/Spectrum"
     },
     {
@@ -148,6 +148,26 @@
       "id": "Analyte_has_any_mass",
       "level": "SHOULD",
       "path": "/Library/Spectrum/Analyte"
+    },
+    {
+      "attr": [
+        {
+          "accession": "MS:1000041",
+          "allow_children": false,
+          "name": "charge state",
+          "repeatable": false
+        },
+        {
+          "accession": "MS:1000633",
+          "allow_children": false,
+          "name": "possible charge state",
+          "repeatable": false
+        }
+      ],
+      "combination_logic": "OR",
+      "id": "Analyte_has_charge",
+      "level": "SHOULD",
+      "path": "/Library/Spectrum/Analyte"
     }
   ]
 }
\ No newline at end of file
diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json
index 06888c4..bc6d6ba 100644
--- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json
+++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.json
@@ -11,7 +11,7 @@
     {
       "accession": "MS:1003188",
       "name": "library name",
-      "value": "tests/test_data/chinese_hamster_hcd_selected_head.msp"
+      "value": "tests/test_data/chinese_hamster_hcd_selected_head"
     }
   ],
   "clusters": [],
@@ -34,6 +34,11 @@
               "name": "stripped peptide sequence",
               "value": "AAAACALTPGPLADLAAR"
             },
+            {
+              "accession": "MS:1000041",
+              "name": "charge state",
+              "value": 2
+            },
             {
               "accession": "MS:1001975",
               "cv_param_group": "1",
@@ -114,11 +119,6 @@
           "value": "singleton spectrum",
           "value_accession": "MS:1003066"
         },
-        {
-          "accession": "MS:1000041",
-          "name": "charge state",
-          "value": 2
-        },
         {
           "accession": "MS:1003208",
           "name": "experimental precursor monoisotopic m/z",
@@ -541,6 +541,11 @@
               "name": "stripped peptide sequence",
               "value": "AAAACALTPGPLADLAAR"
             },
+            {
+              "accession": "MS:1000041",
+              "name": "charge state",
+              "value": 2
+            },
             {
               "accession": "MS:1001975",
               "cv_param_group": "1",
@@ -621,11 +626,6 @@
           "value": "singleton spectrum",
           "value_accession": "MS:1003066"
         },
-        {
-          "accession": "MS:1000041",
-          "name": "charge state",
-          "value": 2
-        },
         {
           "accession": "MS:1003208",
           "name": "experimental precursor monoisotopic m/z",
@@ -1399,6 +1399,11 @@
               "name": "stripped peptide sequence",
               "value": "AAAAGQTGTVPPGAPGALPLPGMAIVK"
             },
+            {
+              "accession": "MS:1000041",
+              "name": "charge state",
+              "value": 2
+            },
             {
               "accession": "MS:1001975",
               "cv_param_group": "1",
@@ -1479,11 +1484,6 @@
           "value": "singleton spectrum",
           "value_accession": "MS:1003066"
         },
-        {
-          "accession": "MS:1000041",
-          "name": "charge state",
-          "value": 2
-        },
         {
           "accession": "MS:1003208",
           "name": "experimental precursor monoisotopic m/z",
@@ -2011,6 +2011,11 @@
               "name": "stripped peptide sequence",
               "value": "AAAAGSTSVKPIFSR"
             },
+            {
+              "accession": "MS:1000041",
+              "name": "charge state",
+              "value": 2
+            },
             {
               "accession": "MS:1001975",
               "cv_param_group": "1",
@@ -2091,11 +2096,6 @@
           "value": "singleton spectrum",
           "value_accession": "MS:1003066"
         },
-        {
-          "accession": "MS:1000041",
-          "name": "charge state",
-          "value": 2
-        },
         {
           "accession": "MS:1003208",
           "name": "experimental precursor monoisotopic m/z",
@@ -2590,6 +2590,11 @@
               "name": "stripped peptide sequence",
               "value": "AAAAGSTSVKPIFSR"
             },
+            {
+              "accession": "MS:1000041",
+              "name": "charge state",
+              "value": 3
+            },
             {
               "accession": "MS:1001975",
               "cv_param_group": "1",
@@ -2670,11 +2675,6 @@
           "value": "singleton spectrum",
           "value_accession": "MS:1003066"
         },
-        {
-          "accession": "MS:1000041",
-          "name": "charge state",
-          "value": 3
-        },
         {
           "accession": "MS:1003208",
           "name": "experimental precursor monoisotopic m/z",
@@ -3319,6 +3319,11 @@
               "name": "stripped peptide sequence",
               "value": "AAAALGSHGSCSSEVEK"
             },
+            {
+              "accession": "MS:1000041",
+              "name": "charge state",
+              "value": 2
+            },
             {
               "accession": "MS:1001975",
               "cv_param_group": "1",
@@ -3399,11 +3404,6 @@
           "value": "singleton spectrum",
           "value_accession": "MS:1003066"
         },
-        {
-          "accession": "MS:1000041",
-          "name": "charge state",
-          "value": 2
-        },
         {
           "accession": "MS:1003208",
           "name": "experimental precursor monoisotopic m/z",
@@ -3769,6 +3769,11 @@
               "name": "stripped peptide sequence",
               "value": "AAAALGSHGSCSSEVEK"
             },
+            {
+              "accession": "MS:1000041",
+              "name": "charge state",
+              "value": 2
+            },
             {
               "accession": "MS:1001975",
               "cv_param_group": "1",
@@ -3849,11 +3854,6 @@
           "value": "singleton spectrum",
           "value_accession": "MS:1003066"
         },
-        {
-          "accession": "MS:1000041",
-          "name": "charge state",
-          "value": 2
-        },
         {
           "accession": "MS:1003208",
           "name": "experimental precursor monoisotopic m/z",
diff --git a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt
index aaca838..226cdf8 100644
--- a/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt
+++ b/implementations/python/tests/test_data/chinese_hamster_hcd_selected_head.mzlb.txt
@@ -1,12 +1,11 @@
 <mzSpecLib 1.0>
-MS:1003188|library name=tests/test_data/chinese_hamster_hcd_selected_head.msp
+MS:1003188|library name=tests/test_data/chinese_hamster_hcd_selected_head
 <AttributeSet Spectrum=all>
 <AttributeSet Analyte=all>
 <AttributeSet Interpretation=all>
 <Spectrum=1>
 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_46eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=855.4538
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=46
@@ -30,6 +29,7 @@ MS:1003059|number of peaks=87
 <Analyte=1>
 MS:1000224|molecular mass=1710.9076
 MS:1000888|stripped peptide sequence=AAAACALTPGPLADLAAR
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=1.4
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=855.455
@@ -137,7 +137,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=4
 <Spectrum=2>
 MS:1003061|library spectrum name=AAAACALTPGPLADLAAR/2_1(4,C,CAM)_53eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=855.4538
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=53
@@ -161,6 +160,7 @@ MS:1003059|number of peaks=204
 <Analyte=1>
 MS:1000224|molecular mass=1710.9076
 MS:1000888|stripped peptide sequence=AAAACALTPGPLADLAAR
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=4.2
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=855.4574
@@ -385,7 +385,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=5
 <Spectrum=3>
 MS:1003061|library spectrum name=AAAAGQTGTVPPGAPGALPLPGMAIVK/2_0_76eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=1207.1672
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=76
@@ -409,6 +408,7 @@ MS:1003059|number of peaks=122
 <Analyte=1>
 MS:1000224|molecular mass=2414.3344
 MS:1000888|stripped peptide sequence=AAAAGQTGTVPPGAPGALPLPGMAIVK
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=-0.9
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=1207.1661
@@ -551,7 +551,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 <Spectrum=4>
 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/2_0_44eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=731.9043
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=44
@@ -575,6 +574,7 @@ MS:1003059|number of peaks=111
 <Analyte=1>
 MS:1000224|molecular mass=1463.8086
 MS:1000888|stripped peptide sequence=AAAAGSTSVKPIFSR
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=-2.7
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=731.9023
@@ -706,7 +706,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=1
 <Spectrum=5>
 MS:1003061|library spectrum name=AAAAGSTSVKPIFSR/3_0_28eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=3
 MS:1003208|experimental precursor monoisotopic m/z=488.2719
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=28
@@ -730,6 +729,7 @@ MS:1003059|number of peaks=161
 <Analyte=1>
 MS:1000224|molecular mass=1464.8157
 MS:1000888|stripped peptide sequence=AAAAGSTSVKPIFSR
+MS:1000041|charge state=3
 [1]MS:1001975|delta m/z=3.8
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=488.2738
@@ -911,7 +911,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=0
 <Spectrum=6>
 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_50eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=830.8834
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=50
@@ -935,6 +934,7 @@ MS:1003059|number of peaks=68
 <Analyte=1>
 MS:1000224|molecular mass=1661.7668
 MS:1000888|stripped peptide sequence=AAAALGSHGSCSSEVEK
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=4.1
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=830.8868
@@ -1023,7 +1023,6 @@ MS:1003290|number of unassigned peaks among top 20 peaks=6
 <Spectrum=7>
 MS:1003061|library spectrum name=AAAALGSHGSCSSEVEK/2_1(10,C,CAM)_52eV
 MS:1003065|spectrum aggregation type=MS:1003066|singleton spectrum
-MS:1000041|charge state=2
 MS:1003208|experimental precursor monoisotopic m/z=830.8834
 MS:1000044|dissociation method=MS:1000422|beam-type collision-induced dissociation
 [1]MS:1000045|collision energy=52
@@ -1047,6 +1046,7 @@ MS:1003059|number of peaks=402
 <Analyte=1>
 MS:1000224|molecular mass=1661.7668
 MS:1000888|stripped peptide sequence=AAAALGSHGSCSSEVEK
+MS:1000041|charge state=2
 [1]MS:1001975|delta m/z=-2.0
 [1]UO:0000000|unit=UO:0000169|parts per million
 MS:1003208|experimental precursor monoisotopic m/z=830.8817

From 760df7b449cced569f89c3cbbf753bc8818afea3 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Sat, 17 Jun 2023 21:34:53 -0400
Subject: [PATCH 13/24] Checkpoint

---
 implementations/python/examples/draw_entry.py |  27 +
 .../python/examples/first_n_entries.py        |  42 ++
 implementations/python/mzlib/analyte.py       |   5 +-
 implementations/python/mzlib/attributes.py    |  25 +-
 implementations/python/mzlib/backends/base.py |  13 +-
 implementations/python/mzlib/backends/json.py |   4 +-
 implementations/python/mzlib/backends/msp.py  |   2 +-
 implementations/python/mzlib/backends/text.py | 530 ++++++++++--------
 implementations/python/mzlib/spectrum.py      |   2 +-
 .../python/mzlib/spectrum_library.py          |  33 +-
 10 files changed, 426 insertions(+), 257 deletions(-)
 create mode 100644 implementations/python/examples/draw_entry.py
 create mode 100644 implementations/python/examples/first_n_entries.py

diff --git a/implementations/python/examples/draw_entry.py b/implementations/python/examples/draw_entry.py
new file mode 100644
index 0000000..8126d7e
--- /dev/null
+++ b/implementations/python/examples/draw_entry.py
@@ -0,0 +1,27 @@
+import sys
+import matplotlib
+matplotlib.use("agg")
+
+from matplotlib import pyplot as plt
+
+from mzlib.spectrum_library import SpectrumLibrary
+from mzlib.draw import draw_spectrum
+
+
+def main(path, spectrum_key):
+    lib = SpectrumLibrary(filename=path)
+    spec = lib.get_spectrum(spectrum_number=spectrum_key)
+    draw_spectrum(spec)
+    plt.savefig(f"{path}.{spectrum_key}.annotated.pdf", bbox_inches='tight')
+
+
+if __name__ == '__main__':
+    try:
+        path = sys.argv[1]
+        index = sys.argv[2]
+        main(path, int(index))
+        sys.exit(0)
+    except (IndexError, TypeError):
+        print("USAGE: <prog> <spectrum library path> <spectrum key>")
+        print("\tWrites the annotated spectrum to <spectrum library path>.<spectrum key>.annotated.pdf")
+        sys.exit(1)
diff --git a/implementations/python/examples/first_n_entries.py b/implementations/python/examples/first_n_entries.py
new file mode 100644
index 0000000..0aa4c05
--- /dev/null
+++ b/implementations/python/examples/first_n_entries.py
@@ -0,0 +1,42 @@
+import click
+
+from mzlib import SpectrumLibrary
+from mzlib.backends import SpectralLibraryBackendBase, FormatInferenceFailure, TextSpectralLibraryWriter
+from mzlib.cluster import SpectrumCluster
+from mzlib.index import MemoryIndex, SQLIndex
+from mzlib.spectrum import Spectrum
+
+@click.command('first_n_entries')
+@click.argument('inpath', type=click.Path(exists=True))
+@click.option("-i", "--input-format", type=click.Choice(sorted(SpectralLibraryBackendBase._file_extension_to_implementation)),
+              default=None)
+@click.option("-n", '--spectra-to-read', type=int, default=20)
+def main(inpath, input_format, spectra_to_read: int=20):
+    if SQLIndex.exists(inpath):
+        index_type = SQLIndex
+    else:
+        index_type = MemoryIndex
+    click.echo(f"Opening {inpath}", err=True)
+    try:
+        library = SpectrumLibrary(filename=inpath, index_type=index_type, format=input_format)
+    except FormatInferenceFailure as err:
+        click.echo(f"{err}", err=True)
+        raise click.Abort()
+
+    stream = click.get_text_stream('stdout')
+    writer = TextSpectralLibraryWriter(stream)
+    writer.write_header(library)
+
+    for i, entry in enumerate(library, 1):
+        if i > spectra_to_read:
+            break
+        if isinstance(entry, Spectrum):
+            writer.write_spectrum(entry)
+        elif isinstance(entry, SpectrumCluster):
+            writer.write_cluster(entry)
+
+    writer.close()
+
+
+if __name__ == "__main__":
+    main.main()
\ No newline at end of file
diff --git a/implementations/python/mzlib/analyte.py b/implementations/python/mzlib/analyte.py
index dac3cb4..a53df52 100644
--- a/implementations/python/mzlib/analyte.py
+++ b/implementations/python/mzlib/analyte.py
@@ -151,12 +151,13 @@ def remove_member_interpretation(self, member_id):
         del self.member_interpretations[str(member_id)]
 
     def validate(self) -> bool:
-        '''Perform validation on each component to confirm this object is well formed.
+        """
+        Perform validation on each component to confirm this object is well formed.
 
         Returns
         -------
         bool
-        '''
+        """
         analyte_ids = set(self.analytes)
         member_ids = set(self.member_interpretations)
         valid = True
diff --git a/implementations/python/mzlib/attributes.py b/implementations/python/mzlib/attributes.py
index 1b21747..097ac50 100644
--- a/implementations/python/mzlib/attributes.py
+++ b/implementations/python/mzlib/attributes.py
@@ -76,7 +76,8 @@ def __hash__(self):
 
 
 class AttributeManager(object):
-    """A key-value pair store with optional attribute grouping
+    """
+    A key-value pair store with optional attribute grouping
 
     Attributes
     ----------
@@ -91,6 +92,7 @@ class AttributeManager(object):
         The number of attribute groups assigned.
 
     """
+
     attributes: List[Attribute]
     attribute_dict: Dict
     group_dict: Dict
@@ -118,14 +120,14 @@ def __init__(self, attributes: Iterable = None):
             self._from_iterable(attributes)
 
     def get_next_group_identifier(self) -> str:
-        """Retrieve the next un-used attribute group identifier
+        """
+        Retrieve the next un-used attribute group identifier
         and increment the internal counter.
 
         Returns
         -------
         str
         """
-
         next_value = self.group_counter
         self.group_counter += 1
         return str(next_value)
@@ -275,9 +277,7 @@ def get_by_name(self, name: str):
         return None
 
     def clear(self):
-        """Remove all content from the store.
-
-        """
+        """Remove all content from the store."""
         self._clear_attributes()
 
     def remove_attribute(self, key, group_identifier=None):
@@ -600,11 +600,13 @@ def __init__(self, attributes: Iterable=None, **kwargs):
 
 
 class AttributeManagedProperty(Generic[T]):
-    __slots__ = ("attribute", )
+    __slots__ = ("attribute", "multiple")
     attribute: str
+    multiple: bool
 
-    def __init__(self, attribute: str):
+    def __init__(self, attribute: str, multiple: bool = False):
         self.attribute = attribute
+        self.multiple = multiple
 
     def _get_group_id(self, inst: AttributeManager) -> Optional[str]:
         return getattr(inst, "group_id", None)
@@ -613,7 +615,10 @@ def __get__(self, inst: AttributeManager, cls: Type) -> T:
         if inst is None:
             return self
         if inst.has_attribute(self.attribute):
-            return inst.get_attribute(self.attribute, group_identifier=self._get_group_id(inst))
+            value = inst.get_attribute(self.attribute, group_identifier=self._get_group_id(inst))
+            if self.multiple and not isinstance(value, list):
+                value = [value]
+            return value
         return None
 
     def __set__(self, inst: AttributeManager, value: T):
@@ -642,8 +647,6 @@ def __get__(self, inst: AttributeManager, cls: Type) -> T:
         if inst is None:
             return self
         key, val = self._find_key_used(inst)
-        if key is None:
-            raise KeyError(self.attributes[0])
         return val
 
     def _find_key_used(self, inst: AttributeManager) -> Optional[Tuple[str, T]]:
diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py
index 8b6ad0f..d6aa4b3 100644
--- a/implementations/python/mzlib/backends/base.py
+++ b/implementations/python/mzlib/backends/base.py
@@ -100,7 +100,7 @@ class SpectralLibraryBackendBase(AttributedEntity, _VocabularyResolverMixin, _Li
 
     index: IndexBase
 
-    entry_attribute_sets: Dict[str, AttributeSet]
+    spectrum_attribute_sets: Dict[str, AttributeSet]
     analyte_attribute_sets: Dict[str, AttributeSet]
     interpretation_attribute_sets: Dict[str, AttributeSet]
     cluster_attribute_sets: Dict[str, AttributeSet]
@@ -183,7 +183,7 @@ def __init__(self, filename: Union[str, Path, io.FileIO]):
         self.filename = filename
         self.index = MemoryIndex()
 
-        self.entry_attribute_sets = {
+        self.spectrum_attribute_sets = {
             "all": AttributeSet("all", [])
         }
         self.analyte_attribute_sets = {
@@ -210,7 +210,7 @@ def read_header(self) -> bool:
 
     def _new_spectrum(self) -> Spectrum:
         spec = Spectrum()
-        attr_set = self.entry_attribute_sets.get("all")
+        attr_set = self.spectrum_attribute_sets.get("all")
         if attr_set:
             attr_set.apply(spec)
         return spec
@@ -371,7 +371,7 @@ def read(self) -> Iterator[Union[Spectrum, SpectrumCluster]]:
     def _add_attribute_set(self, attribute_set: AttributeSet,
                            attribute_set_type: AttributeSetTypes):
         if attribute_set_type == AttributeSetTypes.spectrum:
-            self.entry_attribute_sets[attribute_set.name] = attribute_set
+            self.spectrum_attribute_sets[attribute_set.name] = attribute_set
         elif attribute_set_type == AttributeSetTypes.analyte:
             self.analyte_attribute_sets[attribute_set.name] = attribute_set
         elif attribute_set_type == AttributeSetTypes.interpretation:
@@ -667,6 +667,11 @@ def close(self):
 
 
 class LibraryIterator(AttributedEntity, _LibraryViewMixin, Iterator[Spectrum]):
+    backend: SpectralLibraryBackendBase
+    attributes: Attributed
+    iter: Iterator[Spectrum]
+    _buffer: Spectrum
+
     def __init__(self, backend: SpectralLibraryBackendBase) -> None:
         self.backend = backend
         self.attributes = backend
diff --git a/implementations/python/mzlib/backends/json.py b/implementations/python/mzlib/backends/json.py
index d26a165..cd4d164 100644
--- a/implementations/python/mzlib/backends/json.py
+++ b/implementations/python/mzlib/backends/json.py
@@ -152,7 +152,7 @@ def _fill_attributes(self, attributes: List[Dict[str, Any]], store: Attributed,
                 if context_type == AttributeSetTypes.analyte:
                     self.analyte_attribute_sets[attrib['value']].apply(store)
                 elif context_type == AttributeSetTypes.spectrum:
-                    self.entry_attribute_sets[attrib['value']].apply(store)
+                    self.spectrum_attribute_sets[attrib['value']].apply(store)
                 elif context_type == AttributeSetTypes.interpretation:
                     self.interpretation_attribute_sets[attrib['value']].apply(store)
                 elif context_type == AttributeSetTypes.cluster:
@@ -309,7 +309,7 @@ def write_header(self, library: SpectralLibraryBackendBase):
         attributes = self._format_attributes(library.attributes)
         self.buffer[LIBRARY_METADATA_KEY] = attributes
         self.buffer[SPECTRUM_CLASSES] = {
-            c.name: self._format_attributes(c.attributes) for c in library.entry_attribute_sets.values()
+            c.name: self._format_attributes(c.attributes) for c in library.spectrum_attribute_sets.values()
         }
         self.buffer[ANALYTE_CLASSES] = {
             c.name: self._format_attributes(c.attributes) for c in library.analyte_attribute_sets.values()
diff --git a/implementations/python/mzlib/backends/msp.py b/implementations/python/mzlib/backends/msp.py
index 9b289e6..5fb2c39 100644
--- a/implementations/python/mzlib/backends/msp.py
+++ b/implementations/python/mzlib/backends/msp.py
@@ -773,7 +773,7 @@ def protein_handler(key, value, container: Attributed):
                                 match.group(1), group_identifier=group_identifier)
         container.add_attribute("MS:1001113|c-terminal flanking residue",
                                 match.group(2), group_identifier=group_identifier)
-    container.add_attribute(key.strip('"').strip("'"), re.sub(r"\(pre=(.),post=(.)\)", '', value),
+    container.add_attribute(key, re.sub(r"\(pre=(.),post=(.)\)", '', value.strip('"').strip("'")),
                             group_identifier=group_identifier)
     return True
 
diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py
index d9873ff..a3e4afc 100644
--- a/implementations/python/mzlib/backends/text.py
+++ b/implementations/python/mzlib/backends/text.py
@@ -6,7 +6,7 @@
 import enum
 
 from collections import deque
-from typing import ClassVar, List, Tuple, Union, Iterable
+from typing import ClassVar, List, Optional, Tuple, Union, Iterable
 
 from mzlib.annotation import parse_annotation
 from mzlib.spectrum import Spectrum
@@ -55,6 +55,7 @@ class _LibraryParserStateEnum(enum.Enum):
 
 
 ATTRIBUTE_SET_NAME = "MS:1003212|library attribute set name"
+PEAK_ATTRIBUTE = "MS:1003254|peak attribute"
 
 START_OF_SPECTRUM_MARKER = re.compile(r"^<(?:Spectrum)(?:=(.+))?>")
 START_OF_INTERPRETATION_MARKER = re.compile(r"^<Interpretation(?:=(.+))>")
@@ -76,6 +77,282 @@ class _LibraryParserStateEnum(enum.Enum):
 }
 
 
+class _EntryParser:
+    """
+    Moves the complexity and state management involved in parsing
+    a full entry out of :class:`TextSpectrumLibrary`, allowing it
+    to be factored into a bunch of helper methods around a single
+    piece of shared stated too granular for the main parser.
+    """
+
+    library: 'TextSpectralLibrary'
+    state: _SpectrumParserStateEnum
+    spectrum: Optional[Spectrum]
+    cluster: Optional[SpectrumCluster]
+    analyte: Optional[Analyte]
+    interpretation: Optional[Interpretation]
+    interpretation_member: Optional[InterpretationMember]
+
+    aggregation_types: List[str]
+    peak_list: List[Tuple]
+
+    start_line_number: int
+    line_number: int = -1
+
+    def __init__(self, library, start_line_number: int, spectrum_index: Optional[int]) -> None:
+        self.library = library
+        self.start_line_number = start_line_number
+        self.spectrum_index = spectrum_index
+        self.state = _SpectrumParserStateEnum.header
+
+        self.aggregation_types = None
+        self.peak_list = []
+
+        self.spectrum = None
+        self.cluster = None
+        self.analyte = None
+        self.interpretation = None
+        self.interpretation_member = None
+
+    def real_line_number_or_nothing(self):
+        if self.start_line_number is None:
+            return ''
+        message = f" on line {self.line_number + self.start_line_number}"
+        if self.spectrum_index is not None:
+            message += f" in spectrum {self.spectrum_index}"
+        message += f" in state {self.state}"
+        return message
+
+    def _parse_header(self, line):
+        if START_OF_SPECTRUM_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.header
+            self.spectrum = self.library._new_spectrum()
+            self.spectrum.index = self.spectrum_index
+            match = START_OF_SPECTRUM_MARKER.match(line)
+            self.spectrum.key = int(match.group(1)) or self.spectrum.index - 1
+            return
+
+        elif START_OF_PEAKS_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.peaks
+            return
+
+        elif START_OF_INTERPRETATION_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.interpretation
+            match = START_OF_INTERPRETATION_MARKER.match(line)
+            if self.interpretation is not None:
+                self.spectrum.add_interpretation(self.interpretation)
+            self.interpretation = self.library._new_interpretation(match.group(1))
+            self.spectrum.add_interpretation(self.interpretation)
+            self.analyte = None
+            return
+
+        elif START_OF_ANALYTE_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.analyte
+            match = START_OF_ANALYTE_MARKER.match(line)
+            self.analyte = self.library._new_analyte(match.group(1))
+            self.spectrum.add_analyte(self.analyte)
+            return
+
+        elif START_OF_CLUSTER.match(line):
+            self.state = _SpectrumParserStateEnum.cluster
+            self.cluster = self.library._new_cluster()
+            match = START_OF_CLUSTER.match(line)
+            self.cluster.key = int(match.group(1)) or self.cluster.index - 1
+            return
+
+        self.library._parse_attribute_into(
+            line, self.spectrum, self.real_line_number_or_nothing, self.state)
+
+    def _parse_interpretation(self, line):
+        if START_OF_ANALYTE_MARKER.match(line):
+            warnings.warn(
+                f"An analyte found after an interpretation was encountered, {self.real_line_number_or_nothing()}")
+            self.state = _SpectrumParserStateEnum.analyte
+            match = START_OF_ANALYTE_MARKER.match(line)
+            if self.analyte is not None:
+                self.spectrum.add_analyte(self.analyte)
+            self.analyte = self.library._new_analyte(match.group(1))
+            self.spectrum.add_analyte(self.analyte)
+            return
+        elif START_OF_INTERPRETATION_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.interpretation
+            match = START_OF_INTERPRETATION_MARKER.match(line)
+            if self.interpretation is not None:
+                self.spectrum.add_interpretation(self.interpretation)
+            self.interpretation = self.library._new_interpretation(match.group(1))
+            self.spectrum.add_interpretation(self.interpretation)
+            self.analyte = None
+            return
+        elif START_OF_PEAKS_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.peaks
+            return
+        elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.interpretation_member
+            match = START_OF_INTERPRETATION_MEMBER_MARKER.match(line)
+
+            if self.interpretation_member is not None:
+                self.interpretation.add_member_interpretation(self.interpretation_member)
+
+            self.interpretation_member = InterpretationMember(match.group(1))
+            self.interpretation.add_member_interpretation(self.interpretation_member)
+            return
+
+        self.library._parse_attribute_into(
+            line, self.interpretation.attributes, self.real_line_number_or_nothing)
+        self.library._analyte_interpretation_link(self.spectrum, self.interpretation)
+
+    def _parse_interpretation_member(self, line):
+        if START_OF_PEAKS_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.peaks
+            self.interpretation_member = None
+            self.interpretation = None
+            return
+        elif START_OF_INTERPRETATION_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.interpretation
+            match = START_OF_INTERPRETATION_MARKER.match(line)
+            if self.interpretation is not None:
+                self.spectrum.add_interpretation(self.interpretation)
+            self.interpretation = self.library._new_interpretation(match.group(1))
+            self.spectrum.add_interpretation(self.interpretation)
+            self.interpretation_member = None
+            return
+        elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.interpretation_member
+            match = START_OF_INTERPRETATION_MEMBER_MARKER.match(line)
+            if self.interpretation_member is not None:
+                self.interpretation.add_member_interpretation(self.interpretation_member)
+            self.interpretation_member = InterpretationMember(match.group(1))
+            self.interpretation.add_member_interpretation(self.interpretation_member)
+            return
+
+        self.library._parse_attribute_into(
+            line, self.interpretation_member, self.real_line_number_or_nothing)
+
+    def _parse_analyte(self, line):
+        if START_OF_PEAKS_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.peaks
+            if self.analyte is not None:
+                self.spectrum.add_analyte(self.analyte)
+                self.analyte = None
+            return
+
+        elif START_OF_ANALYTE_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.analyte
+            match = START_OF_ANALYTE_MARKER.match(line)
+            if self.analyte is not None:
+                self.spectrum.add_analyte(self.analyte)
+            self.analyte = self.library._new_analyte(match.group(1))
+            return
+
+        elif START_OF_INTERPRETATION_MARKER.match(line):
+            self.state = _SpectrumParserStateEnum.interpretation
+            match = START_OF_INTERPRETATION_MARKER.match(line)
+            if self.analyte is not None:
+                self.spectrum.add_analyte(self.analyte)
+                self.analyte = None
+
+            # Somehow we have an in-progress Interpretation that hasn't been cleared yet.
+            # This should probably be an error strictly speaking.
+            if self.interpretation is not None:
+                warnings.warn(
+                    f"Interleaved analytes and interpretations detected at {self.real_line_number_or_nothing()}")
+                self.spectrum.add_interpretation(self.interpretation)
+            self.interpretation = self.library._new_interpretation(match.group(1))
+            self.spectrum.add_interpretation(self.interpretation)
+            return
+
+        self.library._parse_attribute_into(line, self.analyte, self.real_line_number_or_nothing)
+
+    def _parse_peaks(self, line):
+        # TODO: When we know more about how different aggregations are formatted,
+        # look that up here once so we remember it and can use it to process the
+        # aggregation columns
+        if self.aggregation_types is None:
+            self.aggregation_types = self.spectrum.peak_aggregations
+        match = float_number.match(line)
+        if match is not None:
+            tokens = line.split("\t")
+            n_tokens = len(tokens)
+            if n_tokens == 2:
+                mz, intensity = tokens
+                annotation = parse_annotation("?")
+                self.peak_list.append([float(mz), float(intensity), annotation, []])
+            elif n_tokens == 3:
+                mz, intensity, annotation = tokens
+                if not annotation:
+                    annotation = "?"
+                annotation = parse_annotation(annotation)
+                self.peak_list.append([float(mz), float(intensity), annotation, []])
+            elif n_tokens > 3:
+                mz, intensity, annotation, *aggregation = tokens
+                if not annotation:
+                    annotation = "?"
+                annotation = parse_annotation(annotation)
+                self.peak_list.append(
+                    [float(mz), float(intensity), annotation, [try_cast(agg) for agg in aggregation]])
+            else:
+                raise ValueError(
+                    f"Malformed peak line {line} with {n_tokens} entries{self.real_line_number_or_nothing()}")
+        else:
+            raise ValueError(f"Malformed peak line {line}{self.real_line_number_or_nothing()}")
+
+    def _parse_cluster(self, line):
+        if START_OF_SPECTRUM_MARKER.match(line):
+            raise ValueError(
+                f"Clusters should not include spectrum sections {self.real_line_number_or_nothing()}")
+
+        elif START_OF_PEAKS_MARKER.match(line):
+            raise ValueError(
+                f"Clusters should not include peaks {self.real_line_number_or_nothing()}")
+
+        elif START_OF_INTERPRETATION_MARKER.match(line):
+            raise ValueError(
+                f"Clusters should not include interpretation sections {self.real_line_number_or_nothing()}")
+
+        elif START_OF_ANALYTE_MARKER.match(line):
+            raise ValueError(
+                f"Clusters should not include analyte sections {self.real_line_number_or_nothing()}")
+
+        elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line):
+            raise ValueError(
+                f"Clusters should not include interpretation member sections {self.real_line_number_or_nothing()}")
+
+        self.library._parse_attribute_into(
+            line, self.cluster, self.real_line_number_or_nothing, self.state)
+
+    def parse(self, buffer: Iterable[str]):
+        line: str
+        for line_number, line in enumerate(buffer):
+            self.line_number = line_number
+            line = line.strip()
+            if not line:
+                break
+            # Skip comments for now, no round-trip
+            if line.startswith("#"):
+                continue
+            elif self.state == _SpectrumParserStateEnum.header:
+                self._parse_header(line)
+            elif self.state == _SpectrumParserStateEnum.interpretation:
+                self._parse_interpretation(line)
+            elif self.state == _SpectrumParserStateEnum.interpretation_member:
+                self._parse_interpretation_member(line)
+            elif self.state == _SpectrumParserStateEnum.analyte:
+                self._parse_analyte(line)
+            elif self.state == _SpectrumParserStateEnum.peaks:
+                self._parse_peaks(line)
+            elif self.state == _SpectrumParserStateEnum.cluster:
+                self._parse_cluster(line)
+            else:
+                raise ValueError(
+                    f"Unknown state {self.state}{self.real_line_number_or_nothing()}")
+        if self.cluster:
+            return self.cluster
+        self.spectrum.peak_list = self.peak_list
+        # Backfill analytes into interpretations that never explicitly listed them.
+        self.library._default_interpretation_to_analytes(self.spectrum)
+        return self.spectrum
+
+
 def _is_header_line(line: str) -> bool:
     if START_OF_SPECTRUM_MARKER.match(line):
         return False
@@ -363,7 +640,7 @@ def _parse_attribute_into(self, line: str, store: Attributed,
             self._prepare_attribute_dict(d)
             if d['term'] == ATTRIBUTE_SET_NAME:
                 if _SpectrumParserStateEnum.header == state:
-                    attr_set = self.entry_attribute_sets[d['value']]
+                    attr_set = self.spectrum_attribute_sets[d['value']]
                 elif _SpectrumParserStateEnum.analyte == state:
                     attr_set = self.analyte_attribute_sets[d['value']]
                 elif _SpectrumParserStateEnum.interpretation == state:
@@ -397,233 +674,8 @@ def _parse_attribute_into(self, line: str, store: Attributed,
 
     def _parse(self, buffer: Iterable[str], spectrum_index: int = None,
                start_line_number: int=None) -> Union[Spectrum, SpectrumCluster]:
-        spec: Spectrum = self._new_spectrum()
-        spec.index = spectrum_index if spectrum_index is not None else -1
-        interpretation: Interpretation = None
-        analyte: Analyte = None
-        interpretation_member: InterpretationMember = None
-        cluster: SpectrumCluster = None
-
-        STATES = _SpectrumParserStateEnum
-        state: _SpectrumParserStateEnum = STATES.header
-
-        peak_list = []
-        line_number = -1
-
-        def real_line_number_or_nothing():
-            nonlocal start_line_number
-            nonlocal line_number
-            nonlocal spectrum_index
-
-            if start_line_number is None:
-                return ''
-            message = f" on line {line_number + start_line_number}"
-            if spectrum_index is not None:
-                message += f" in spectrum {spectrum_index}"
-            message += f" in state {state}"
-            return message
-
-        line: str
-        for line_number, line in enumerate(buffer):
-            line = line.strip()
-            if not line:
-                break
-            # Skip comments for now, no round-trip
-            if line.startswith("#"):
-                continue
-            if state == STATES.header:
-                if START_OF_SPECTRUM_MARKER.match(line):
-                    match = START_OF_SPECTRUM_MARKER.match(line)
-                    spec.key = int(match.group(1)) or spec.index - 1
-                    continue
-
-                elif START_OF_PEAKS_MARKER.match(line):
-                    state = STATES.peaks
-                    continue
-
-                elif START_OF_INTERPRETATION_MARKER.match(line):
-                    state = STATES.interpretation
-                    match = START_OF_INTERPRETATION_MARKER.match(line)
-                    if interpretation is not None:
-                        spec.add_interpretation(interpretation)
-                    interpretation = self._new_interpretation(match.group(1))
-                    spec.add_interpretation(interpretation)
-                    analyte = None
-                    continue
-
-                elif START_OF_ANALYTE_MARKER.match(line):
-                    state = STATES.analyte
-                    match = START_OF_ANALYTE_MARKER.match(line)
-                    analyte = self._new_analyte(match.group(1))
-                    spec.add_analyte(analyte)
-                    continue
-
-                elif START_OF_CLUSTER.match(line):
-                    state = STATES.cluster
-                    cluster = self._new_cluster()
-                    match = START_OF_CLUSTER.match(line)
-                    cluster.key = int(match.group(1)) or cluster.index - 1
-                    continue
-
-                self._parse_attribute_into(
-                    line, spec, real_line_number_or_nothing, state)
-
-            elif state == STATES.interpretation:
-                if START_OF_ANALYTE_MARKER.match(line):
-                    warnings.warn(
-                        f"An analyte found after an interpretation was encountered, {real_line_number_or_nothing()}")
-                    state = STATES.analyte
-                    match = START_OF_ANALYTE_MARKER.match(line)
-                    if analyte is not None:
-                        spec.add_analyte(analyte)
-                    analyte = self._new_analyte(match.group(1))
-                    spec.add_analyte(analyte)
-                    continue
-                elif START_OF_INTERPRETATION_MARKER.match(line):
-                    state = STATES.interpretation
-                    match = START_OF_INTERPRETATION_MARKER.match(line)
-                    if interpretation is not None:
-                        spec.add_interpretation(interpretation)
-                    interpretation = self._new_interpretation(match.group(1))
-                    spec.add_interpretation(interpretation)
-                    analyte = None
-                    continue
-                elif START_OF_PEAKS_MARKER.match(line):
-                    state = STATES.peaks
-                    continue
-                elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line):
-                    state = STATES.interpretation_member
-                    match = START_OF_INTERPRETATION_MEMBER_MARKER.match(line)
-
-                    if interpretation_member is not None:
-                        interpretation.add_member_interpretation(interpretation_member)
-
-                    interpretation_member = InterpretationMember(match.group(1))
-                    interpretation.add_member_interpretation(interpretation_member)
-                    continue
-
-                self._parse_attribute_into(
-                    line, interpretation.attributes, real_line_number_or_nothing)
-                self._analyte_interpretation_link(spec, interpretation)
-
-            elif state == STATES.interpretation_member:
-                if START_OF_PEAKS_MARKER.match(line):
-                    state = STATES.peaks
-                    interpretation_member = None
-                    interpretation = None
-                    continue
-                elif START_OF_INTERPRETATION_MARKER.match(line):
-                    state = STATES.interpretation
-                    match = START_OF_INTERPRETATION_MARKER.match(line)
-                    if interpretation is not None:
-                        spec.add_interpretation(interpretation)
-                    interpretation = self._new_interpretation(match.group(1))
-                    spec.add_interpretation(interpretation)
-                    interpretation_member = None
-                    continue
-                elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line):
-                    state = STATES.interpretation_member
-                    match = START_OF_INTERPRETATION_MEMBER_MARKER.match(line)
-                    if interpretation_member is not None:
-                        interpretation.add_member_interpretation(interpretation_member)
-                    interpretation_member = InterpretationMember(match.group(1))
-                    interpretation.add_member_interpretation(interpretation_member)
-                    continue
-
-                self._parse_attribute_into(
-                    line, interpretation_member, real_line_number_or_nothing)
-
-            elif state == STATES.analyte:
-                if START_OF_PEAKS_MARKER.match(line):
-                    state = STATES.peaks
-                    if analyte is not None:
-                        spec.add_analyte(analyte)
-                        analyte = None
-                    continue
-
-                elif START_OF_ANALYTE_MARKER.match(line):
-                    state = STATES.analyte
-                    match = START_OF_ANALYTE_MARKER.match(line)
-                    if analyte is not None:
-                        spec.add_analyte(analyte)
-                    analyte = self._new_analyte(match.group(1))
-                    continue
-
-                elif START_OF_INTERPRETATION_MARKER.match(line):
-                    state = STATES.interpretation
-                    match = START_OF_INTERPRETATION_MARKER.match(line)
-                    if analyte is not None:
-                        spec.add_analyte(analyte)
-                        analyte = None
-
-                    # Somehow we have an in-progress Interpretation that hasn't been cleared yet.
-                    # This should probably be an error strictly speaking.
-                    if interpretation is not None:
-                        warnings.warn(
-                            f"Interleaved analytes and interpretations detected at {real_line_number_or_nothing()}")
-                        spec.add_interpretation(interpretation)
-                    interpretation = self._new_interpretation(match.group(1))
-                    spec.add_interpretation(interpretation)
-                    continue
-
-                self._parse_attribute_into(line, analyte, real_line_number_or_nothing)
-
-            elif state == STATES.peaks:
-                match = float_number.match(line)
-                if match is not None:
-                    tokens = line.split("\t")
-                    n_tokens = len(tokens)
-                    if n_tokens == 3:
-                        mz, intensity, annotation = tokens
-                        if not annotation:
-                            annotation = "?"
-                        annotation = parse_annotation(annotation)
-                        peak_list.append([float(mz), float(intensity), annotation, ""])
-                    elif n_tokens > 3:
-                        mz, intensity, annotation, *aggregation = tokens
-                        if not annotation:
-                            annotation = "?"
-                        annotation = parse_annotation(annotation)
-                        peak_list.append(
-                            [float(mz), float(intensity), annotation, aggregation])
-                    else:
-                        raise ValueError(
-                            f"Malformed peak line {line} with {n_tokens} entries{real_line_number_or_nothing()}")
-                else:
-                    raise ValueError(f"Malformed peak line {line}{real_line_number_or_nothing()}")
-
-            elif state == STATES.cluster:
-                if START_OF_SPECTRUM_MARKER.match(line):
-                    raise ValueError(
-                        f"Clusters should not include spectrum sections {real_line_number_or_nothing()}")
-
-                elif START_OF_PEAKS_MARKER.match(line):
-                    raise ValueError(
-                        f"Clusters should not include peaks {real_line_number_or_nothing()}")
-
-                elif START_OF_INTERPRETATION_MARKER.match(line):
-                    raise ValueError(
-                        f"Clusters should not include interpretation sections {real_line_number_or_nothing()}")
-
-                elif START_OF_ANALYTE_MARKER.match(line):
-                    raise ValueError(
-                        f"Clusters should not include analyte sections {real_line_number_or_nothing()}")
-
-                elif START_OF_INTERPRETATION_MEMBER_MARKER.match(line):
-                    raise ValueError(
-                        f"Clusters should not include interpretation member sections {real_line_number_or_nothing()}")
-
-                self._parse_attribute_into(
-                    line, cluster, real_line_number_or_nothing, state)
-            else:
-                raise ValueError(
-                    f"Unknown state {state}{real_line_number_or_nothing()}")
-        if cluster:
-            return cluster
-        spec.peak_list = peak_list
-        # Backfill analytes into interpretations that never explicitly listed them.
-        self._default_interpretation_to_analytes(spec)
-        return spec
+        parser = _EntryParser(self, start_line_number, spectrum_index)
+        return parser.parse(buffer)
 
     def get_spectrum(self, spectrum_number: int=None,
                      spectrum_name: str=None) -> Spectrum:
@@ -656,10 +708,11 @@ class TextSpectralLibraryWriter(SpectralLibraryWriterBase):
     format_name = "text"
     default_version = '1.0'
 
-    def __init__(self, filename, version=None, **kwargs):
+    def __init__(self, filename, version=None, compact_interpretations: bool=True, **kwargs):
         super(TextSpectralLibraryWriter, self).__init__(filename)
         self.version = version
         self._coerce_handle(self.filename)
+        self.compact_interpretations = compact_interpretations
 
     def _write_attributes(self, attributes: Attributed):
         for attribute in attributes:
@@ -687,7 +740,7 @@ def write_header(self, library: SpectralLibraryBackendBase):
         self._write_attributes(
             self._filter_attributes(library.attributes, lambda x: x.key != FORMAT_VERSION_TERM)
         )
-        for attr_set in library.entry_attribute_sets.values():
+        for attr_set in library.spectrum_attribute_sets.values():
             self.write_attribute_set(attr_set, AttributeSetTypes.spectrum)
 
         for attr_set in library.analyte_attribute_sets.values():
@@ -736,10 +789,17 @@ def write_spectrum(self, spectrum: Spectrum):
             self.handle.write(f"<Interpretation={interpretation.id}>\n")
             self._write_attributes(attribs_of)
 
-            for member in interpretation.member_interpretations.values():
-                member: InterpretationMember
-                self.handle.write(f"<InterpretationMember={member.id}>\n")
-                self._write_attributes(member.attributes)
+            # When there is only one interpretation and only one interpretation member
+            # interpretation member attributes are written out as part of the interpretation
+            # itself.
+            if _n_interps == 1 and len(interpretation.member_interpretations) == 1 and self.compact_interpretations:
+                for member in interpretation.member_interpretations.values():
+                    self._write_attributes(member.attributes)
+            else:
+                for member in interpretation.member_interpretations.values():
+                    member: InterpretationMember
+                    self.handle.write(f"<InterpretationMember={member.id}>\n")
+                    self._write_attributes(member.attributes)
         self.handle.write("<Peaks>\n")
         for peak in spectrum.peak_list:
             peak_parts = [
diff --git a/implementations/python/mzlib/spectrum.py b/implementations/python/mzlib/spectrum.py
index 9dd8097..9b3dce1 100644
--- a/implementations/python/mzlib/spectrum.py
+++ b/implementations/python/mzlib/spectrum.py
@@ -74,7 +74,7 @@ def __init__(self, attributes=None, peak_list=None, analytes=None,
     precursor_charge = AttributeManagedProperty[int](CHARGE_STATE)
 
     spectrum_aggregation = AttributeFacet[SpectrumAggregation](SpectrumAggregation)
-    peak_aggregations = AttributeManagedProperty("MS:1003254|peak attribute")
+    peak_aggregations = AttributeManagedProperty("MS:1003254|peak attribute", multiple=True)
 
     def add_analyte(self, analyte: Analyte):
         self.analytes[str(analyte.id)] = analyte
diff --git a/implementations/python/mzlib/spectrum_library.py b/implementations/python/mzlib/spectrum_library.py
index 2a9878f..bd1b0ff 100644
--- a/implementations/python/mzlib/spectrum_library.py
+++ b/implementations/python/mzlib/spectrum_library.py
@@ -102,6 +102,26 @@ def _requires_backend(self):
             raise ValueError(
                 "Cannot read library data, library parser not yet initialized")
 
+    @property
+    def spectrum_attribute_sets(self):
+        self._requires_backend()
+        return self.backend.spectrum_attribute_sets
+
+    @property
+    def analyte_attribute_sets(self):
+        self._requires_backend()
+        return self.backend.analyte_attribute_sets
+
+    @property
+    def interpretation_attribute_sets(self):
+        self._requires_backend()
+        return self.backend.interpretation_attribute_sets
+
+    @property
+    def cluster_attribute_sets(self):
+        self._requires_backend()
+        return self.backend.cluster_attribute_sets
+
     #### Define getter/setter for attribute identifier
     @property
     def identifier(self) -> Optional[str]:
@@ -157,6 +177,13 @@ def read_header(self) -> bool:
         return self.backend.read_header()
 
     def read(self):
+        """
+        Create a sequential iterator over the spectrum library entries.
+
+        Yields
+        ------
+        Spectrum or SpectrumCluster
+        """
         self._requires_backend()
         return self.backend.read()
 
@@ -234,7 +261,11 @@ def get_cluster(self, cluster_number: int) -> SpectrumCluster:
 
     def find_spectra(self, specification, **query_keys) -> List[Spectrum]:
         """
-        find_spectra - Return a list of spectra given query constraints
+        Return a list of spectra given query constraints
+
+        Returns
+        -------
+        List[Spectrum]
         """
         self._requires_backend()
         return self.backend.find_spectra(specification, **query_keys)

From 539af14710202967094f41218635630df18a44bf Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Sat, 17 Jun 2023 21:55:35 -0400
Subject: [PATCH 14/24] Fix Spectronaut spectrum origin type

---
 implementations/python/mzlib/backends/spectronaut.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/implementations/python/mzlib/backends/spectronaut.py b/implementations/python/mzlib/backends/spectronaut.py
index 9f2735a..2ecadb5 100644
--- a/implementations/python/mzlib/backends/spectronaut.py
+++ b/implementations/python/mzlib/backends/spectronaut.py
@@ -90,7 +90,7 @@ def __init__(self, filename: str, index_type=None, **kwargs):
 
     def _spectrum_type(self):
         key = "MS:1003072|spectrum origin type"
-        value = "MS:1003074|predicted spectrum"
+        value = "MS:1003073|observed spectrum"
         return key, value
 
     def read_header(self) -> bool:

From d95af648f2f7c5862d7082f72a92bd550f3c42e4 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Sat, 17 Jun 2023 22:03:24 -0400
Subject: [PATCH 15/24] Make method public

---
 implementations/python/mzlib/ontology.py  | 2 +-
 implementations/python/mzlib/tools/cli.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/implementations/python/mzlib/ontology.py b/implementations/python/mzlib/ontology.py
index 9b3d623..8213831 100644
--- a/implementations/python/mzlib/ontology.py
+++ b/implementations/python/mzlib/ontology.py
@@ -51,7 +51,7 @@ def name_to_curie(self, name: str) -> str:
         term = self.find_term_by_name(name)
         return term.id
 
-    def _make_attribute_syntax(self, name: str) -> str:
+    def attribute_syntax(self, name: str) -> str:
         if self.is_curie(name):
             if "|" in name:
                 return name
diff --git a/implementations/python/mzlib/tools/cli.py b/implementations/python/mzlib/tools/cli.py
index 04ded01..dee3771 100644
--- a/implementations/python/mzlib/tools/cli.py
+++ b/implementations/python/mzlib/tools/cli.py
@@ -124,7 +124,7 @@ def convert(inpath, outpath, format=None, header_file=None, library_attributes=(
     if library_attributes:
         resolver = ControlledVocabularyResolver()
         for k, v in library_attributes:
-            k = resolver._make_attribute_syntax(k)
+            k = resolver.attribute_syntax(k)
             library.add_attribute(k, v)
     click.echo(f"Writing to {outpath}", err=True)
     fh = click.open_file(outpath, mode='w')

From 152c00612326e971388be5ae7a091e87cdfb64ec Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Mon, 19 Jun 2023 13:46:08 -0400
Subject: [PATCH 16/24] Emit warnings when space delimiters are found

---
 implementations/python/mzlib/backends/text.py | 40 +++++++++-----
 .../python/mzlib/validate/object_rule.py      | 11 ++++
 .../python/mzlib/validate/semantic_rule.py    |  7 ++-
 .../python/mzlib/validate/validator.py        | 53 +++++++++++++++----
 4 files changed, 85 insertions(+), 26 deletions(-)

diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py
index a3e4afc..0f3d59b 100644
--- a/implementations/python/mzlib/backends/text.py
+++ b/implementations/python/mzlib/backends/text.py
@@ -13,6 +13,7 @@
 from mzlib.cluster import SpectrumCluster
 from mzlib.attributes import AttributeManager, Attributed, AttributeSet
 from mzlib.analyte import Analyte, Interpretation, InterpretationMember
+from mzlib.validate.object_rule import ValidationWarning
 
 from .base import (
     SpectralLibraryBackendBase,
@@ -29,9 +30,9 @@
 term_pattern = re.compile(
     r"^(?P<term>(?P<term_accession>\S+:(?:\d|X)+)\|(?P<term_name>[^=]+))")
 key_value_term_pattern = re.compile(
-    r"^(?P<term>(?P<term_accession>[A-Za-z0-9:.]+:(?:\d|X)+)\|(?P<term_name>[^=]+))=(?P<value>.+)")
+    r"^(?P<term>(?P<term_accession>[A-Za-z0-9:.]+:(?:\d|X)+)\|(?P<term_name>[^=]+?))\s*=\s*(?P<value>.+)")
 grouped_key_value_term_pattern = re.compile(
-    r"^\[(?P<group_id>\d+)\](?P<term>(?P<term_accession>\S+:(?:\d|X)+)\|(?P<term_name>[^=]+))=(?P<value>.+)")
+    r"^\[(?P<group_id>\d+)\](?P<term>(?P<term_accession>\S+:(?:\d|X)+)\|(?P<term_name>[^=]+?))\s*=\s*(?P<value>.+)")
 float_number = re.compile(
     r"^\d+(.\d+)?")
 
@@ -57,17 +58,20 @@ class _LibraryParserStateEnum(enum.Enum):
 ATTRIBUTE_SET_NAME = "MS:1003212|library attribute set name"
 PEAK_ATTRIBUTE = "MS:1003254|peak attribute"
 
-START_OF_SPECTRUM_MARKER = re.compile(r"^<(?:Spectrum)(?:=(.+))?>")
-START_OF_INTERPRETATION_MARKER = re.compile(r"^<Interpretation(?:=(.+))>")
-START_OF_ANALYTE_MARKER = re.compile(r"^<Analyte(?:=(.+))>")
+START_OF_SPECTRUM_MARKER = re.compile(r"^<(?:Spectrum)(?:\s*=\s*(.+))?>")
+START_OF_INTERPRETATION_MARKER = re.compile(r"^<Interpretation(?:\s*=\s*(.+))>")
+START_OF_ANALYTE_MARKER = re.compile(r"^<Analyte(?:\s*=\s*(.+))>")
 START_OF_PEAKS_MARKER = re.compile(r"^<Peaks>")
 START_OF_LIBRARY_MARKER = re.compile(r"^<mzSpecLib\s+(.+)>")
-SPECTRUM_NAME_PRESENT = re.compile(r'MS:1003061\|(?:library )?spectrum name=')
-START_OF_INTERPRETATION_MEMBER_MARKER = re.compile(r"<InterpretationMember(?:=(.+))>")
+START_OF_INTERPRETATION_MEMBER_MARKER = re.compile(r"<InterpretationMember(?:\s*=\s*(.+))>")
 START_OF_ATTRIBUTE_SET = re.compile(
-    r"<AttributeSet (Spectrum|Analyte|Interpretation|Cluster)=(.+)>")
-START_OF_CLUSTER = re.compile(r"<Cluster(?:=(.+))>")
+    r"<AttributeSet (Spectrum|Analyte|Interpretation|Cluster)\s*=\s*(.+)>")
+START_OF_CLUSTER = re.compile(r"<Cluster(?:\s*=\s*(.+))>")
 
+SPECTRUM_NAME_PRESENT = re.compile(r'MS:1003061\|(?:library )?spectrum name\s*=\s*')
+SPECTRUM_NAME_MATCH = re.compile(r'MS:1003061\|(?:library )?spectrum name\s*=\s*(.+)')
+
+FALLBACK_PEAK_LINE_PATTERN = re.compile(r'(?P<mz>\d+(?:\.\d+)?)\s+(?P<intensity>\d+(?:\.\d+)?)(?:\s+(?P<rest>.+))?')
 
 attribute_set_types = {
     "spectrum": AttributeSetTypes.spectrum,
@@ -101,7 +105,7 @@ class _EntryParser:
 
     def __init__(self, library, start_line_number: int, spectrum_index: Optional[int]) -> None:
         self.library = library
-        self.start_line_number = start_line_number
+        self.start_line_number = start_line_number or 0
         self.spectrum_index = spectrum_index
         self.state = _SpectrumParserStateEnum.header
 
@@ -115,8 +119,6 @@ def __init__(self, library, start_line_number: int, spectrum_index: Optional[int
         self.interpretation_member = None
 
     def real_line_number_or_nothing(self):
-        if self.start_line_number is None:
-            return ''
         message = f" on line {self.line_number + self.start_line_number}"
         if self.spectrum_index is not None:
             message += f" in spectrum {self.spectrum_index}"
@@ -273,6 +275,14 @@ def _parse_peaks(self, line):
         if match is not None:
             tokens = line.split("\t")
             n_tokens = len(tokens)
+            if n_tokens == 1 and ' ' in line:
+                if match := FALLBACK_PEAK_LINE_PATTERN.match(line):
+                    tokens = match.groups()
+                    n_tokens = len(tokens)
+                    warnings.warn(
+                        f"Space character delimiter found in peak line{self.real_line_number_or_nothing()}",
+                        ValidationWarning
+                    )
             if n_tokens == 2:
                 mz, intensity = tokens
                 annotation = parse_annotation("?")
@@ -576,8 +586,10 @@ def create_index(self) -> int:
                         entry_is_cluster = bool(is_clus)
                         spectrum_file_offset = line_beginning_file_offset
                         spectrum_name = ''
-                    if re.match(r'MS:1003061\|(?:library )?spectrum name', line):
-                        spectrum_name = re.match(r'MS:1003061\|(?:library )?spectrum name=(.+)', line).group(1)
+
+                    if SPECTRUM_NAME_PRESENT.match(line):
+                        if match := SPECTRUM_NAME_MATCH.match(line):
+                            spectrum_name = match.group(1)
 
                     entry_buffer.append(line)
 
diff --git a/implementations/python/mzlib/validate/object_rule.py b/implementations/python/mzlib/validate/object_rule.py
index 1f80d5f..680352c 100644
--- a/implementations/python/mzlib/validate/object_rule.py
+++ b/implementations/python/mzlib/validate/object_rule.py
@@ -1,5 +1,6 @@
 
 import logging
+
 from typing import TYPE_CHECKING, List, Tuple
 
 from mzlib.attributes import Attributed
@@ -17,6 +18,16 @@
 logger.addHandler(logging.NullHandler())
 
 
+class ValidationWarning(UserWarning):
+    """
+    Indicates that something was parsed that did not halt the parser but
+    which violates the expectations of the parser.
+
+    The parser will make a best-effort attempt to interpret the value
+    correctly but when validating this will count as a violation.
+    """
+
+
 class ScopedObjectRuleBase:
     id: str
     path: str
diff --git a/implementations/python/mzlib/validate/semantic_rule.py b/implementations/python/mzlib/validate/semantic_rule.py
index a6c6b36..4d6f030 100644
--- a/implementations/python/mzlib/validate/semantic_rule.py
+++ b/implementations/python/mzlib/validate/semantic_rule.py
@@ -137,8 +137,11 @@ def __init__(self, *args, **kwargs):
         self.seen = set()
 
     def validate(self, attribute: 'AttributeSemanticRule', value: str, validator_context: "ValidatorBase"):
-        if isinstance(value, list) and attribute.repeatable:
-            return all(self.validate(attribute, v, validator_context) for v in value)
+        if isinstance(value, list):
+            if attribute.repeatable:
+                return all(self.validate(attribute, v, validator_context) for v in value)
+            else:
+                return False
         if value in self.seen:
             return False
         self.seen.add(value)
diff --git a/implementations/python/mzlib/validate/validator.py b/implementations/python/mzlib/validate/validator.py
index b5816d5..35554c1 100644
--- a/implementations/python/mzlib/validate/validator.py
+++ b/implementations/python/mzlib/validate/validator.py
@@ -1,8 +1,8 @@
 import itertools
 import logging
-
-from dataclasses import dataclass, field
+import warnings
 import re
+from dataclasses import dataclass, field
 from typing import Any, Callable, Deque, Dict, Iterator, List, Optional, Sequence, Tuple, Union
 
 from psims.controlled_vocabulary.entity import Entity, ListOfType
@@ -18,7 +18,7 @@
 
 from mzlib.validate.level import RequirementLevel
 from mzlib.validate.semantic_rule import ScopedSemanticRule, load_rule_set
-from mzlib.validate.object_rule import ScopedObjectRuleBase, SpectrumPeakAnnotationRule
+from mzlib.validate.object_rule import ScopedObjectRuleBase, SpectrumPeakAnnotationRule, ValidationWarning
 from mzlib.defaults import DEFAULT_UNITS
 
 logger = logging.getLogger(__name__)
@@ -61,6 +61,34 @@ def visited_attribute(self, attribute: Union[Tuple[str, str], Attribute]) -> boo
 
 
 
+def _warning_iterator(iterator: Iterator[Spectrum]) -> Iterator[Spectrum]:
+    while True:
+        try:
+            with warnings.catch_warnings(record=True) as w:
+                value = next(iterator)
+            vw = [a for a in w if issubclass(a.category, ValidationWarning)]
+            yield value, vw
+        except StopIteration:
+            break
+        except:
+            raise
+
+
+def _is_of_type(attrib, relation) -> bool:
+    if isinstance(relation.value_type.type_definition, type):
+        return isinstance(attrib.value, relation.value_type.type_definition)
+    else:
+        return _try_convert(attrib.value, relation.value_type.type_definition)
+
+
+def _try_convert(value, converter):
+    try:
+        converter(value)
+        return True
+    except (ValueError, TypeError):
+        return False
+
+
 class ValidatorBase(_VocabularyResolverMixin):
     error_log: List
     current_context: ValidationContext
@@ -71,7 +99,7 @@ def reset_context(self):
     def add_warning(self, obj: Attributed, path: str, identifier_path: Tuple, attrib: Any, value: Any, requirement_level: RequirementLevel, message: str):
         raise NotImplementedError()
 
-    def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary):
+    def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary, parsing_warnings: Optional[List[warnings.WarningMessage]] = None):
         raise NotImplementedError()
 
     def validate_analyte(self, analyte: Analyte, path: str, spectrum: Spectrum, library: SpectrumLibrary):
@@ -127,7 +155,7 @@ def check_attributes(self, obj: Attributed, path: str, identifer_path: Tuple) ->
                                 break
                         if hit:
                             break
-                    elif isinstance(attrib.value, rel.value_type.type_definition):
+                    elif _is_of_type(attrib, rel):
                         break
                 else:
                     self.add_warning(obj, path, identifer_path, attrib.key, attrib.value, RequirementLevel.must,
@@ -177,8 +205,8 @@ def validate_library(self, library: SpectrumLibrary, spectrum_iterator: Optional
 
         if spectrum_iterator is None:
             spectrum_iterator = library
-        for spectrum in spectrum_iterator:
-            result &= self.validate_spectrum(spectrum, path, library)
+        for spectrum, warns in _warning_iterator(spectrum_iterator):
+            result &= self.validate_spectrum(spectrum, path, library, parsing_warnings=warns)
         return result
 
     def chain(self, validator: 'ValidatorBase') -> 'ValidatorBase':
@@ -228,13 +256,18 @@ def apply_rules(self, obj: Attributed, path: str, identifier_path: Tuple) -> boo
                 logger.log(level, f"Applied {rule.id} to {path}:{identifier_path} {v}/{result}")
         return result
 
-    def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary):
+    def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary, parsing_warnings: Optional[List[warnings.WarningMessage]] = None):
         path = f"{path}/Spectrum"
         identifier_path = (spectrum.key, )
         result = self.apply_rules(spectrum, path, identifier_path)
         result &= self.check_attributes(spectrum, path, identifier_path)
         self.reset_context()
 
+        if parsing_warnings:
+            result = False
+            for parsing_warning in parsing_warnings:
+                logger.warn(str(parsing_warning.message))
+
         for _key, analyte in spectrum.analytes.items():
             result &= self.validate_analyte(analyte, path, spectrum, library)
 
@@ -287,10 +320,10 @@ def error_log(self):
             log.extend(validator.error_log)
         return log
 
-    def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary):
+    def validate_spectrum(self, spectrum: Spectrum, path: str, library: SpectrumLibrary, parsing_warnings: Optional[List[warnings.WarningMessage]] = None):
         result = True
         for validator in self.validators:
-            result &= validator.validate_spectrum(spectrum, path, library)
+            result &= validator.validate_spectrum(spectrum, path, library, parsing_warnings)
         return result
 
     def validate_analyte(self, analyte: Analyte, path: str, spectrum: Spectrum, library: SpectrumLibrary):

From 2588b41489bc9df856b572d71f7e9f8f6e23e406 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Mon, 19 Jun 2023 16:10:48 -0400
Subject: [PATCH 17/24] Ensure the object is an iterator

---
 implementations/python/mzlib/validate/validator.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/implementations/python/mzlib/validate/validator.py b/implementations/python/mzlib/validate/validator.py
index 35554c1..1ec21cc 100644
--- a/implementations/python/mzlib/validate/validator.py
+++ b/implementations/python/mzlib/validate/validator.py
@@ -62,6 +62,8 @@ def visited_attribute(self, attribute: Union[Tuple[str, str], Attribute]) -> boo
 
 
 def _warning_iterator(iterator: Iterator[Spectrum]) -> Iterator[Spectrum]:
+    # coerce to an actual iterator in case we were passed only an iterable
+    iterator = iter(iterator)
     while True:
         try:
             with warnings.catch_warnings(record=True) as w:

From 832ce4b2e74df6ae184c090b55463acf97f06c40 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Fri, 23 Jun 2023 11:48:28 -0400
Subject: [PATCH 18/24] Handle integers in aggregation

---
 implementations/python/mzlib/backends/text.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/implementations/python/mzlib/backends/text.py b/implementations/python/mzlib/backends/text.py
index 0f3d59b..24d11df 100644
--- a/implementations/python/mzlib/backends/text.py
+++ b/implementations/python/mzlib/backends/text.py
@@ -4,6 +4,7 @@
 import logging
 import warnings
 import enum
+import numbers
 
 from collections import deque
 from typing import ClassVar, List, Optional, Tuple, Union, Iterable
@@ -837,8 +838,8 @@ def close(self):
         self.handle.close()
 
 
-def format_aggregation(value: Union[float, str]) -> str:
-    if isinstance(value, float):
+def format_aggregation(value: Union[numbers.Number, str]) -> str:
+    if isinstance(value, numbers.Number):
         return "%0.4g" % value
     else:
         return value

From 9d7e64aa97d421cf9ab7c3ea4d62ca5587214b3e Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Fri, 30 Jun 2023 12:28:37 -0400
Subject: [PATCH 19/24] checkpoint

---
 .../python/mzlib/backends/bibliospec.py       |  5 +--
 .../python/mzlib/backends/encyclopedia.py     | 43 +++++++++++++++++++
 2 files changed, 44 insertions(+), 4 deletions(-)
 create mode 100644 implementations/python/mzlib/backends/encyclopedia.py

diff --git a/implementations/python/mzlib/backends/bibliospec.py b/implementations/python/mzlib/backends/bibliospec.py
index 4a0a322..23c1b4d 100644
--- a/implementations/python/mzlib/backends/bibliospec.py
+++ b/implementations/python/mzlib/backends/bibliospec.py
@@ -1,9 +1,6 @@
-from dataclasses import dataclass
-from multiprocessing import connection
-import re
-import os
 import sqlite3
 import zlib
+from dataclasses import dataclass
 
 from typing import Iterator, List, Mapping, Tuple, Iterable, Type
 
diff --git a/implementations/python/mzlib/backends/encyclopedia.py b/implementations/python/mzlib/backends/encyclopedia.py
new file mode 100644
index 0000000..513a84e
--- /dev/null
+++ b/implementations/python/mzlib/backends/encyclopedia.py
@@ -0,0 +1,43 @@
+import sqlite3
+import zlib
+from dataclasses import dataclass
+
+from typing import Iterator, List, Mapping, Tuple, Iterable, Type
+
+import numpy as np
+
+from pyteomics import proforma
+
+from mzlib import annotation
+from mzlib.analyte import FIRST_ANALYTE_KEY, FIRST_INTERPRETATION_KEY, Analyte
+from mzlib.spectrum import Spectrum, SPECTRUM_NAME, CHARGE_STATE
+from mzlib.attributes import AttributeManager, Attributed
+
+from mzlib.backends.base import SpectralLibraryBackendBase, FORMAT_VERSION_TERM, DEFAULT_VERSION
+
+from mzlib.index.base import IndexBase
+
+
+def _compress_array(array: np.ndarray, dtype: str) -> bytes:
+    """Compress the array to the EncyclopeDIA format."""
+    packed = struct.pack(">" + (dtype * len(array)), *array)
+    compressed = zlib.compress(packed, 9)
+    return compressed
+
+
+def _extract_array(byte_array: bytes, type_str="d") -> np.ndarray:
+    dtype = np.dtype(type_str)
+    decompressed = zlib.decompress(byte_array, 32)
+    decompressed_length = len(decompressed) // dtype.itemsize
+    unpacked = struct.unpack(">" + (type_str * decompressed_length), decompressed)
+    return np.array(unpacked, dtype=dtype)
+
+
+@dataclass
+class EncyclopediaIndexRecord:
+    number: int
+    precursor_mz: float
+    precursor_charge: int
+    peptide: str
+
+

From 7142e5404cea69123fb8e66aaa3526ea43a2acdb Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Fri, 30 Jun 2023 21:15:26 -0400
Subject: [PATCH 20/24] Working draft

---
 .../python/mzlib/backends/bibliospec.py       |   5 +-
 .../python/mzlib/backends/encyclopedia.py     | 142 ++++++++++++++++--
 2 files changed, 132 insertions(+), 15 deletions(-)

diff --git a/implementations/python/mzlib/backends/bibliospec.py b/implementations/python/mzlib/backends/bibliospec.py
index 23c1b4d..88c58ed 100644
--- a/implementations/python/mzlib/backends/bibliospec.py
+++ b/implementations/python/mzlib/backends/bibliospec.py
@@ -22,10 +22,11 @@ class BibliospecBase:
     connection: sqlite3.Connection
 
     def _correct_modifications_in_sequence(self, row: Mapping) -> proforma.ProForma:
-        '''Correct the modifications in Bibliospec's modified peptide sequence.
+        """
+        Correct the modifications in Bibliospec's modified peptide sequence.
 
         Bibliospec only stores modifications as delta masses.
-        '''
+        """
         mods = self.connection.execute("SELECT * FROM Modifications WHERE RefSpectraID = ?", (row['id'], )).fetchall()
         peptide = proforma.ProForma.parse(row["peptideModSeq"])
         for mod in mods:
diff --git a/implementations/python/mzlib/backends/encyclopedia.py b/implementations/python/mzlib/backends/encyclopedia.py
index 513a84e..163de90 100644
--- a/implementations/python/mzlib/backends/encyclopedia.py
+++ b/implementations/python/mzlib/backends/encyclopedia.py
@@ -18,19 +18,12 @@
 from mzlib.index.base import IndexBase
 
 
-def _compress_array(array: np.ndarray, dtype: str) -> bytes:
-    """Compress the array to the EncyclopeDIA format."""
-    packed = struct.pack(">" + (dtype * len(array)), *array)
-    compressed = zlib.compress(packed, 9)
-    return compressed
-
-
-def _extract_array(byte_array: bytes, type_str="d") -> np.ndarray:
-    dtype = np.dtype(type_str)
-    decompressed = zlib.decompress(byte_array, 32)
-    decompressed_length = len(decompressed) // dtype.itemsize
-    unpacked = struct.unpack(">" + (type_str * decompressed_length), decompressed)
-    return np.array(unpacked, dtype=dtype)
+def _decode_peaks(record: sqlite3.Row):
+    raw_data = zlib.decompress(record['MassArray'])
+    mass_array = np.frombuffer(raw_data, dtype='>d')
+    raw_data = zlib.decompress(record['IntensityArray'])
+    intensity_array = np.frombuffer(raw_data, dtype='>f')
+    return mass_array, intensity_array
 
 
 @dataclass
@@ -41,3 +34,126 @@ class EncyclopediaIndexRecord:
     peptide: str
 
 
+class EncyclopediaIndex(IndexBase):
+    connection: sqlite3.Connection
+
+    def __init__(self, connection):
+        self.connection = connection
+
+    def __getitem__(self, i):
+        if isinstance(i, int):
+            return self.search(i + 1)
+        elif isinstance(i, slice):
+            return [self.search(j + 1) for j in range(i.start or 0, i.stop or len(self), i.step or 1)]
+        else:
+            raise TypeError(f"Cannot index {self.__class__.__name__} with {i}")
+
+    def _record_from(self, row: Mapping) -> EncyclopediaIndexRecord:
+        peptide_sequence = row['PeptideModSeq']
+        return EncyclopediaIndexRecord(row['rowid'], row['PrecursorMz'], row['PrecursorCharge'], peptide_sequence)
+
+    def search(self, i):
+        if isinstance(i, int):
+            info = self.connection.execute("SELECT rowid, PrecursorMz, PrecursorCharge, PeptideModSeq FROM entries WHERE rowid = ?", (i, )).fetchone()
+            return self._record_from(info)
+        elif isinstance(i, str):
+            raise NotImplementedError()
+
+    def __iter__(self):
+        return map(self._record_from, self.connection.execute("SELECT rowid, PrecursorMz, PrecursorCharge, PeptideModSeq FROM entries ORDER BY rowid").fetchall())
+
+    def __len__(self):
+        return self.connection.execute("SELECT count(rowid) FROM entries;").fetchone()[0]
+
+
+class EncyclopediaSpectralLibrary(SpectralLibraryBackendBase):
+    """Read EncyclopeDIA SQLite3 spectral library files."""
+
+    connection: sqlite3.Connection
+
+    file_format = "dlib"
+    format_name = "encyclopedia"
+
+    @classmethod
+    def has_index_preference(cls, filename) -> Type[IndexBase]:
+        return EncyclopediaIndex
+
+    def __init__(self, filename, **kwargs):
+        super().__init__(filename)
+        self.connection = sqlite3.connect(filename)
+        self.connection.row_factory = sqlite3.Row
+        self.index = EncyclopediaIndex(self.connection)
+        self.read_header()
+
+    def read_header(self) -> bool:
+        attribs = AttributeManager()
+        attribs.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION)
+        attribs.add_attribute("MS:1003207|library creation software", "EncyclopeDIA")
+        self.attributes = attribs
+        return True
+
+    def _populate_analyte(self, analyte: Analyte, row: Mapping):
+        """
+        Fill an analyte with details describing a peptide sequence and inferring
+        from context its traits based upon the assumptions EncyclopeDIA makes.
+
+        EncyclopeDIA only stores modifications as delta masses.
+        """
+        peptide = proforma.ProForma.parse(row['PeptideModSeq'])
+        analyte.add_attribute("MS:1003169|proforma peptidoform sequence", str(peptide))
+        analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass)
+        analyte.add_attribute("MS:1000888|stripped peptide sequence", row['PeptideSeq'])
+        analyte.add_attribute(CHARGE_STATE, row['PrecursorCharge'])
+
+    def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):
+        """
+        Read a spectrum from the spectrum library.
+
+        EncyclopeDIA does not support alternative labeling of spectra with a
+        plain text name so looking up by `spectrum_name` is not supported.
+        """
+        if spectrum_number is None:
+            raise ValueError("Only spectrum number queries are supported. spectrum_number must have an integer value")
+
+        info = self.connection.execute("SELECT rowid, * FROM entries WHERE rowid = ?;", (spectrum_number, )).fetchone()
+        spectrum = self._new_spectrum()
+        spectrum.key = info['rowid']
+        spectrum.index = info['rowid'] - 1
+        spectrum.precursor_mz = info['PrecursorMz']
+        try:
+            spectrum.add_attribute("MS:1000894|retention time", info['RTInSeconds'] / 60.0)
+        except KeyError:
+            pass
+
+        try:
+            spectrum.add_attribute(
+                "MS:1003203|constituent spectrum file",
+                info['SourceFile']
+            )
+        except KeyError:
+            pass
+
+
+        analyte = self._new_analyte(1)
+        self._populate_analyte(analyte, info)
+
+        spectrum.add_analyte(analyte)
+
+        interp = self._new_interpretation(1)
+        interp.add_analyte(analyte)
+        spectrum.add_interpretation(interp)
+
+        mz_array, intensity_array = _decode_peaks(info)
+        n_peaks = len(mz_array)
+        spectrum.add_attribute("MS:1003059|number of peaks", n_peaks)
+
+        peak_list = []
+        for i, mz in enumerate(mz_array):
+            row = (mz, intensity_array[i], [], '')
+            peak_list.append(row)
+        spectrum.peak_list = peak_list
+        return spectrum
+
+    def read(self) -> Iterator[Spectrum]:
+        for rec in self.index:
+            yield self.get_spectrum(rec.number)

From 70e4fb3e9a7949d8fd9ad15e69a49ea690548ba5 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Sat, 15 Jul 2023 22:06:57 -0400
Subject: [PATCH 21/24] Working draft

---
 .../python/mzlib/backends/encyclopedia.py     | 23 +++++++++++++------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/implementations/python/mzlib/backends/encyclopedia.py b/implementations/python/mzlib/backends/encyclopedia.py
index 163de90..b36ec02 100644
--- a/implementations/python/mzlib/backends/encyclopedia.py
+++ b/implementations/python/mzlib/backends/encyclopedia.py
@@ -2,16 +2,16 @@
 import zlib
 from dataclasses import dataclass
 
-from typing import Iterator, List, Mapping, Tuple, Iterable, Type
+from typing import Any, Iterator, List, Mapping, Tuple, Iterable, Type
 
 import numpy as np
 
 from pyteomics import proforma
 
 from mzlib import annotation
-from mzlib.analyte import FIRST_ANALYTE_KEY, FIRST_INTERPRETATION_KEY, Analyte
+from mzlib.analyte import FIRST_ANALYTE_KEY, FIRST_INTERPRETATION_KEY, Analyte, ProteinDescription
 from mzlib.spectrum import Spectrum, SPECTRUM_NAME, CHARGE_STATE
-from mzlib.attributes import AttributeManager, Attributed
+from mzlib.attributes import AttributeManager, Attributed, Attribute
 
 from mzlib.backends.base import SpectralLibraryBackendBase, FORMAT_VERSION_TERM, DEFAULT_VERSION
 
@@ -78,7 +78,7 @@ class EncyclopediaSpectralLibrary(SpectralLibraryBackendBase):
     def has_index_preference(cls, filename) -> Type[IndexBase]:
         return EncyclopediaIndex
 
-    def __init__(self, filename, **kwargs):
+    def __init__(self, filename: str, **kwargs):
         super().__init__(filename)
         self.connection = sqlite3.connect(filename)
         self.connection.row_factory = sqlite3.Row
@@ -92,7 +92,7 @@ def read_header(self) -> bool:
         self.attributes = attribs
         return True
 
-    def _populate_analyte(self, analyte: Analyte, row: Mapping):
+    def _populate_analyte(self, analyte: Analyte, row: Mapping[str, Any]):
         """
         Fill an analyte with details describing a peptide sequence and inferring
         from context its traits based upon the assumptions EncyclopeDIA makes.
@@ -105,6 +105,14 @@ def _populate_analyte(self, analyte: Analyte, row: Mapping):
         analyte.add_attribute("MS:1000888|stripped peptide sequence", row['PeptideSeq'])
         analyte.add_attribute(CHARGE_STATE, row['PrecursorCharge'])
 
+        cursor = self.connection.execute(
+            "SELECT ProteinAccession FROM peptidetoprotein WHERE PeptideSeq = ?;", (row['PeptideSeq'], ))
+        for protrow in cursor:
+            accession = protrow['ProteinAccession']
+            analyte.add_attribute_group([
+                Attribute('MS:1000885|protein accession', accession)
+            ])
+
     def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):
         """
         Read a spectrum from the spectrum library.
@@ -128,7 +136,7 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):
         try:
             spectrum.add_attribute(
                 "MS:1003203|constituent spectrum file",
-                info['SourceFile']
+                f"file://{info['SourceFile']}"
             )
         except KeyError:
             pass
@@ -148,8 +156,9 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):
         spectrum.add_attribute("MS:1003059|number of peaks", n_peaks)
 
         peak_list = []
+        # EncyclopeDIA does not encode product ion identities
         for i, mz in enumerate(mz_array):
-            row = (mz, intensity_array[i], [], '')
+            row = (mz, intensity_array[i], [], [])
             peak_list.append(row)
         spectrum.peak_list = peak_list
         return spectrum

From ae93e948835406245ee1a2d408c100eaabb9665e Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Fri, 28 Jul 2023 12:28:47 -0400
Subject: [PATCH 22/24] Decoy label

---
 .../python/mzlib/backends/encyclopedia.py         | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/implementations/python/mzlib/backends/encyclopedia.py b/implementations/python/mzlib/backends/encyclopedia.py
index b36ec02..d05ba4b 100644
--- a/implementations/python/mzlib/backends/encyclopedia.py
+++ b/implementations/python/mzlib/backends/encyclopedia.py
@@ -18,6 +18,10 @@
 from mzlib.index.base import IndexBase
 
 
+DECOY_SPECTRUM = "MS:1003192|decoy spectrum"
+DECOY_PEPTIDE_SPECTRUM = "MS:1003195|unnatural peptidoform decoy spectrum"
+
+
 def _decode_peaks(record: sqlite3.Row):
     raw_data = zlib.decompress(record['MassArray'])
     mass_array = np.frombuffer(raw_data, dtype='>d')
@@ -106,12 +110,17 @@ def _populate_analyte(self, analyte: Analyte, row: Mapping[str, Any]):
         analyte.add_attribute(CHARGE_STATE, row['PrecursorCharge'])
 
         cursor = self.connection.execute(
-            "SELECT ProteinAccession FROM peptidetoprotein WHERE PeptideSeq = ?;", (row['PeptideSeq'], ))
+            "SELECT ProteinAccession, isDecoy FROM peptidetoprotein WHERE PeptideSeq = ?;", (row['PeptideSeq'], ))
+
+        had_decoy = False
         for protrow in cursor:
             accession = protrow['ProteinAccession']
+            is_decoy = bool(int(protrow['isDecoy']))
+            had_decoy = had_decoy or is_decoy
             analyte.add_attribute_group([
                 Attribute('MS:1000885|protein accession', accession)
             ])
+        return had_decoy
 
     def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):
         """
@@ -143,7 +152,9 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):
 
 
         analyte = self._new_analyte(1)
-        self._populate_analyte(analyte, info)
+        had_decoy = self._populate_analyte(analyte, info)
+        if had_decoy:
+            spectrum.add_attribute(DECOY_SPECTRUM, DECOY_PEPTIDE_SPECTRUM)
 
         spectrum.add_analyte(analyte)
 

From 0c07226482c4b3677525dc04bbcdde4b06bd2d1d Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Fri, 11 Aug 2023 11:12:46 -0400
Subject: [PATCH 23/24] Fix N-terminal modification parsing

---
 implementations/python/mzlib/backends/base.py |  7 +++++--
 .../python/mzlib/backends/spectronaut.py      | 20 ++++++++++++++++---
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/implementations/python/mzlib/backends/base.py b/implementations/python/mzlib/backends/base.py
index d6aa4b3..b93a35d 100644
--- a/implementations/python/mzlib/backends/base.py
+++ b/implementations/python/mzlib/backends/base.py
@@ -577,9 +577,12 @@ def read(self) -> Iterator[Spectrum]:
         with open_stream(self.filename, 'rt') as stream:
             i = 0
             reader = self._open_reader(stream)
+            if self._headers:
+                # Skip the header line if we've already parsed them
+                _ = next(reader)
             buffering_reader = self._batch_rows(reader)
             for i, buffer in enumerate(buffering_reader):
-                yield self._parse(buffer, i)
+                yield self._parse_from_buffer(buffer, i)
 
 
 class SpectralLibraryWriterBase(_VocabularyResolverMixin, metaclass=SubclassRegisteringMetaclass):
@@ -629,7 +632,7 @@ def write_library(self, library: SpectralLibraryBackendBase):
         step = max(min(n // 100, 5000), 1)
         ident = ''
         i = 0
-        for i, entry in enumerate(library):
+        for i, entry in enumerate(library.read()):
             if i % step == 0 and i:
                 if isinstance(entry, SpectrumCluster):
                     tag = "cluster "
diff --git a/implementations/python/mzlib/backends/spectronaut.py b/implementations/python/mzlib/backends/spectronaut.py
index 9b0f685..32209c3 100644
--- a/implementations/python/mzlib/backends/spectronaut.py
+++ b/implementations/python/mzlib/backends/spectronaut.py
@@ -34,9 +34,12 @@ def _rewrite_modified_peptide_as_proforma(sequence: str) -> str:
     last_paren = None
     for i, c in enumerate(sequence):
         if c == ']':
+            # Erase any text in parentheses as these indicate the modification
+            # rule and not the modificatin name. We could look at the modification
+            # rule to infer N-term and C-term rules, but we don't have enough examples
             if last_paren is not None:
                 k = i - last_paren
-                for j in range(k + 1):
+                for _ in range(k + 1):
                     buffer.pop()
                 last_paren = None
                 buffer.append(c)
@@ -45,7 +48,15 @@ def _rewrite_modified_peptide_as_proforma(sequence: str) -> str:
             buffer.append(c)
         else:
             buffer.append(c)
-    return ''.join(buffer)
+    pf_seq = ''.join(buffer)
+    # A peptide with an N-terminal modification will start with a square brace
+    # but needs to have a "-" added to be well-formed ProForma
+    if pf_seq.startswith("["):
+        i = pf_seq.find(']') + 1
+        if i == 0:
+            raise ValueError(f"Malformed peptide sequence {sequence}")
+        pf_seq = f"{pf_seq[:i]}-{pf_seq[i:]}"
+    return pf_seq
 
 
 def _parse_value(value: str) -> Union[float, int, str, bool]:
@@ -204,7 +215,10 @@ def _generate_peaks(self, batch: List[Dict[str, Any]]) -> List[Tuple[float, floa
 
     def _build_analyte(self, description: Dict[str, Any], analyte: Analyte) -> Analyte:
         pf_seq = _rewrite_modified_peptide_as_proforma(description['ModifiedPeptide'])
-        peptide = proforma.ProForma.parse(pf_seq)
+        try:
+            peptide = proforma.ProForma.parse(pf_seq)
+        except Exception as err:
+            breakpoint()
 
         analyte.add_attribute(STRIPPED_PEPTIDE_TERM, description['StrippedPeptide'])
         analyte.add_attribute(PROFORMA_PEPTIDE_TERM, pf_seq)

From b9e0d5a847637b762d58827a0e0100e6bd229e70 Mon Sep 17 00:00:00 2001
From: Joshua Klein <mobiusklein@gmail.com>
Date: Fri, 11 Aug 2023 11:20:58 -0400
Subject: [PATCH 24/24] Remove breakpoint

---
 implementations/python/mzlib/backends/spectronaut.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/implementations/python/mzlib/backends/spectronaut.py b/implementations/python/mzlib/backends/spectronaut.py
index 32209c3..4506992 100644
--- a/implementations/python/mzlib/backends/spectronaut.py
+++ b/implementations/python/mzlib/backends/spectronaut.py
@@ -215,11 +215,7 @@ def _generate_peaks(self, batch: List[Dict[str, Any]]) -> List[Tuple[float, floa
 
     def _build_analyte(self, description: Dict[str, Any], analyte: Analyte) -> Analyte:
         pf_seq = _rewrite_modified_peptide_as_proforma(description['ModifiedPeptide'])
-        try:
-            peptide = proforma.ProForma.parse(pf_seq)
-        except Exception as err:
-            breakpoint()
-
+        peptide = proforma.ProForma.parse(pf_seq)
         analyte.add_attribute(STRIPPED_PEPTIDE_TERM, description['StrippedPeptide'])
         analyte.add_attribute(PROFORMA_PEPTIDE_TERM, pf_seq)
         analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass)