Skip to content

Commit

Permalink
Merge branch 'releases' into output-IM
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfG authored Nov 20, 2024
2 parents 43a1a2a + adf3be1 commit 1906b45
Show file tree
Hide file tree
Showing 9 changed files with 118 additions and 47 deletions.
18 changes: 3 additions & 15 deletions .github/workflows/build_and_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,16 @@ jobs:
- uses: actions/checkout@v4
with:
lfs: "true"

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build sdist
run: python -m build --sdist --outdir dist

- uses: actions/upload-artifact@v4
with:
name: dist-source
Expand All @@ -41,18 +37,10 @@ jobs:
- uses: actions/checkout@v4
with:
lfs: "true"

- uses: actions/setup-python@v5
name: Set up Python
with:
python-version: "3.11"

- name: Install cibuildwheel
run: python -m pip install cibuildwheel>=2

- name: Build wheels
run: python -m cibuildwheel --output-dir dist

uses: pypa/[email protected]
with:
output-dir: dist
- uses: actions/upload-artifact@v4
with:
name: dist-${{ matrix.os }}
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@ jobs:
runs-on: ubuntu-latest
strategy:
max-parallel: 4
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ version: 2
build:
os: "ubuntu-20.04"
tools:
python: "3.8"
python: "3.11"
jobs:
post_checkout:
# Download and uncompress the binary
Expand Down
2 changes: 1 addition & 1 deletion docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Pip package
.. image:: https://flat.badgen.net/badge/install%20with/pip/green
:target: https://pypi.org/project/ms2pip/

With Python 3.8 or higher, run:
With Python 3.9 or higher, run:

.. code-block:: bash
Expand Down
8 changes: 8 additions & 0 deletions docs/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ For instance:
ms2pip correlate --psm-filetype sage results.sage.tsv spectra.mgf
``correlate-single``
--------------------

Predict spectrum intensities for a single peptide and correlate them with observed intensities from
an :py:class:`ObservedSpectrum` object. This mode is only available through the Python API, not
through the command-line interface.


``get-training-data``
---------------------

Expand Down
73 changes: 58 additions & 15 deletions ms2pip/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@
from rich.progress import track

import ms2pip.exceptions as exceptions
from ms2pip import spectrum_output
from ms2pip._cython_modules import ms2pip_pyx
from ms2pip._utils.encoder import Encoder
from ms2pip._utils.feature_names import get_feature_names
from ms2pip._utils.ion_mobility import IonMobility
from ms2pip._utils.psm_input import read_psms
from ms2pip._utils.retention_time import RetentionTime
from ms2pip._utils.ion_mobility import IonMobility
from ms2pip._utils.xgb_models import get_predictions_xgb, validate_requested_xgb_model
from ms2pip.constants import MODELS
from ms2pip.result import ProcessingResult, calculate_correlations
from ms2pip.search_space import ProteomeSearchSpace
from ms2pip.spectrum import ObservedSpectrum
from ms2pip.spectrum_input import read_spectrum_file
from ms2pip.spectrum_output import SUPPORTED_FORMATS

Expand Down Expand Up @@ -291,6 +291,62 @@ def correlate(
return results


def correlate_single(
observed_spectrum: ObservedSpectrum,
ms2_tolerance: float = 0.02,
model: str = "HCD",
) -> ProcessingResult:
"""
Correlate single observed spectrum with predicted intensities.\f
Parameters
----------
observed_spectrum
ObservedSpectrum instance with observed m/z and intensity values and peptidoform.
ms2_tolerance
MS2 tolerance in Da for observed spectrum peak annotation. By default, 0.02 Da.
model
Model to use for prediction. Default: "HCD".
Returns
-------
result: ProcessingResult
Result with theoretical m/z, predicted intensity, observed intensity, and correlation.
"""
# Check peptidoform in observed spectrum
if not isinstance(observed_spectrum.peptidoform, Peptidoform):
raise ValueError("Peptidoform must be set in observed spectrum to correlate.")

# Annotate spectrum and get target intensities
with Encoder.from_peptidoform(observed_spectrum.peptidoform) as encoder:
ms2pip_pyx.ms2pip_init(*encoder.encoder_files)
enc_peptidoform = encoder.encode_peptidoform(observed_spectrum.peptidoform)
targets = ms2pip_pyx.get_targets(
enc_peptidoform,
observed_spectrum.mz.astype(np.float32),
observed_spectrum.intensity.astype(np.float32),
float(ms2_tolerance),
MODELS[model]["peaks_version"],
)

# Reshape to dict with intensities per ion type
ion_types = [it.lower() for it in MODELS[model]["ion_types"]]
observed_intensity = {
i: np.array(p, dtype=np.float32).clip(min=np.log2(0.001)) # Clip negative intensities
for i, p in zip(ion_types, targets)
}

# Predict spectrum and add target intensities
result = predict_single(observed_spectrum.peptidoform, model=model)
result.observed_intensity = observed_intensity

# Add correlation
calculate_correlations([result])

return result


def get_training_data(
psms: Union[PSMList, str, Path],
spectrum_file: Union[str, Path],
Expand Down Expand Up @@ -704,19 +760,6 @@ def _add_xgboost_predictions(self, results: List[ProcessingResult]) -> List[Proc

return results

# TODO IMPLEMENT
def write_predictions(
self, all_preds: pd.DataFrame, peptides: pd.DataFrame, output_filename: str
):
raise NotImplementedError
spec_out = spectrum_output.SpectrumOutput(
all_preds,
peptides,
self.params["ms2pip"],
output_filename=output_filename,
)
spec_out.write_results(self.output_formats)


def _process_peptidoform(
psm_index: int,
Expand Down
4 changes: 2 additions & 2 deletions ms2pip/search_space.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ class ProteomeSearchSpace(BaseModel):
min_length: int = 8
max_length: int = 30
min_precursor_mz: Optional[float] = 0
max_precursor_mz: Optional[float] = np.Inf
max_precursor_mz: Optional[float] = np.inf
cleavage_rule: str = "trypsin"
missed_cleavages: int = 2
semi_specific: bool = False
Expand All @@ -184,7 +184,7 @@ def __init__(self, **data: Any):
min_precursor_mz
Minimum precursor m/z for peptides. Default is 0.
max_precursor_mz
Maximum precursor m/z for peptides. Default is np.Inf.
Maximum precursor m/z for peptides. Default is np.inf.
cleavage_rule
Cleavage rule for peptide digestion. Default is "trypsin".
missed_cleavages
Expand Down
20 changes: 9 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,16 @@ classifiers = [
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Development Status :: 5 - Production/Stable",
]
requires-python = ">=3.8"
requires-python = ">=3.9"
dependencies = [
"numpy>=1.16,<2",
"pandas>=1,<2",
"numpy>=1.25,<3",
"pandas>=1,<3",
"pyarrow",
"pyteomics>=3.5,<5",
"tomlkit>=0.5,<1",
"sqlalchemy>=1.3,<2",
"click>=7,<9",
"xgboost>=1.3,<2",
"xgboost>=1.3",
"lxml>=4",
"rich>=13",
"pydantic>=2",
Expand Down Expand Up @@ -75,25 +75,23 @@ publication = "https://doi.org/10.1093/nar/gkad335/"
ms2pip = "ms2pip.__main__:main"

[build-system]
requires = ["setuptools", "cython", "oldest-supported-numpy"]
requires = ["setuptools", "cython", "numpy>=2.0"]
build-backend = "setuptools.build_meta"

[tool.setuptools.packages.find]
include = ["ms2pip*"]

[tool.black]
line-length = 99
target-version = ['py38']
target-version = ['py39']

[tool.ruff]
line-length = 99
target-version = 'py38'
target-version = 'py39'

[tool.cibuildwheel]
build = "cp3*-manylinux_x86_64 cp3*-win_amd64 cp3*-macosx_x86_64 cp3*-macosx_arm64"
skip = "cp36-* cp37-* cp312-*" # EOL / no Numpy wheels available yet for Python 3.12
manylinux-x86_64-image = "manylinux2014"
# test-command = "pytest {package}/tests"
build = "cp3*-manylinux_x86_64 cp3*-musllinux_x86_64 cp3*-win_amd64 cp3*-macosx_x86_64 cp3*-macosx_arm64"
skip = "cp36-* cp37-* cp38-* cp313-*" # No ms2rescore-rs for 3.13
test-command = "ms2pip --help"

[tool.cibuildwheel.macos]
Expand Down
35 changes: 34 additions & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import numpy as np
from psm_utils import PSM, Peptidoform
import pandas as pd

from ms2pip.core import get_training_data
from ms2pip.core import get_training_data, predict_single
from ms2pip.result import ProcessingResult


def _test_get_training_data():
Expand All @@ -13,3 +16,33 @@ def _test_get_training_data():
processes=1
)
pd.testing.assert_frame_equal(expected_df, output_df)

def test_predict_single():
pep = Peptidoform("ACDE/2")
result = predict_single(pep)

expected = ProcessingResult(
psm_index=0,
psm=PSM(peptidoform=pep, spectrum_id=0),
theoretical_mz={
"b": np.array([72.04435, 175.05354, 290.08047], dtype=np.float32),
"y": np.array([148.0604, 263.0873, 366.0965], dtype=np.float32),
},
predicted_intensity={
"b": np.array([-9.14031, -7.6102686, -7.746709], dtype=np.float32),
"y": np.array([-5.8988147, -5.811797, -7.069088], dtype=np.float32),
},
observed_intensity=None,
correlation=None,
feature_vectors=None,
)

assert result.psm_index == expected.psm_index
assert result.psm == expected.psm
np.testing.assert_array_almost_equal(result.theoretical_mz["b"], expected.theoretical_mz["b"])
np.testing.assert_array_almost_equal(result.theoretical_mz["y"], expected.theoretical_mz["y"])
np.testing.assert_array_almost_equal(result.predicted_intensity["b"], expected.predicted_intensity["b"])
np.testing.assert_array_almost_equal(result.predicted_intensity["y"], expected.predicted_intensity["y"])
assert result.observed_intensity == expected.observed_intensity
assert result.correlation == expected.correlation
assert result.feature_vectors == expected.feature_vectors

0 comments on commit 1906b45

Please sign in to comment.