Merge branch 'main' into refactor-parse-spectra

compomics · Dec 5, 2024 · 20585c7 · 20585c7
2 parents 41ea6c8 + f8cab91
commit 20585c7
Show file tree

Hide file tree

Showing 34 changed files with 501 additions and 464 deletions.
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -54,7 +54,7 @@ jobs:
       - name: Install package and dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install --only-binary :all: . pyinstaller
+          pip install --only-binary :all: .[ionmob] pyinstaller
 
       - name: Install Inno Setup
         uses: crazy-max/ghaction-chocolatey@v3
@@ -83,3 +83,43 @@ jobs:
           tag: ${{ github.ref }}
           file_glob: true
           file: dist/*.exe
+
+  docker-image:
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+      contents: read
+      attestations: write
+      id-token: write
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v4
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
+        with:
+          images: ghcr.io/${{ github.repository }}
+
+      - name: Build and push Docker images
+        id: push
+        uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+      - name: Generate artifact attestation
+        uses: actions/attest-build-provenance@v1
+        with:
+          subject-name: ghcr.io/${{ github.repository }}
+          subject-digest: ${{ steps.push.outputs.digest }}
+          push-to-registry: true
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.9", "3.10", "3.11"]
     steps:
       - uses: actions/checkout@v4
 
@@ -55,7 +55,7 @@ jobs:
       - name: Install package and dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install --only-binary :all: . pyinstaller
+          pip install --only-binary :all: .[ionmob] pyinstaller
 
       - name: Install Inno Setup
         uses: crazy-max/ghaction-chocolatey@v1

diff --git a/README.md b/README.md
@@ -10,11 +10,6 @@
 
 Modular and user-friendly platform for AI-assisted rescoring of peptide identifications
 
-> ⚠️ Note: This is the documentation for the fully redeveloped version 3.0 of MS²Rescore. While
-> MS²Rescore 3.0 has been drastically improved over the previous version, you might run into some
-> unforeseen issues. Please report any issues you encounter on the [issue tracker][issues] or post
-> your questions on the [GitHub Discussions][discussions] forum.
-
 ## About MS²Rescore
 
 MS²Rescore performs ultra-sensitive peptide identification rescoring with LC-MS predictors such as
@@ -52,8 +47,7 @@ timsTOF fragmentation and IM2Deep for ion mobility separation. Bruker .d and min
 files are directly supported through the [timsrust](https://github.com/MannLabs/timsrust) library.
 
 Checkout our [preprint](https://doi.org/10.1101/2024.05.29.596400) for more information and the
-[TIMS²Rescore documentation](https://ms2rescore.readthedocs.io/en/stable/userguide/tims2rescore)
-to get started.
+[TIMS²Rescore documentation][tims2rescore] to get started.
 
 ## Citing
 
@@ -104,15 +98,16 @@ make a [pull request][pr]!
 [issues]: https://github.com/compomics/ms2rescore/issues/
 [discussions]: https://github.com/compomics/ms2rescore/discussions/
 [pr]: https://github.com/compomics/ms2rescore/pulls/
-[desktop]: https://ms2rescore.readthedocs.io/gui.html
+[desktop]: https://ms2rescore.readthedocs.io/en/stable/gui/
 [desktop-installer]: https://github.com/compomics/ms2rescore/releases/latest
-[cli]: https://ms2rescore.readthedocs.io/cli/cli.html
-[python-package]: https://ms2rescore.readthedocs.io/api/ms2rescore.html
-[docker]: https://ms2rescore.readthedocs.io/installation.html#docker-container
+[cli]: https://ms2rescore.readthedocs.io/en/stable/cli/
+[python-package]: https://ms2rescore.readthedocs.io/en/stable/api/ms2rescore/
+[docker]: https://ms2rescore.readthedocs.io/en/stable/installation#docker-container
 [publication-branch]: https://github.com/compomics/ms2rescore/tree/pub
 [ms2pip]: https://github.com/compomics/ms2pip
 [deeplc]: https://github.com/compomics/deeplc
 [percolator]: https://github.com/percolator/percolator/
 [mokapot]: https://mokapot.readthedocs.io/
 [psm_utils]: https://github.com/compomics/psm_utils
 [file-formats]: https://psm-utils.readthedocs.io/en/stable/#supported-file-formats
+[tims2rescore]: https://ms2rescore.readthedocs.io/en/stable/userguide/tims2Rescore
diff --git a/docs/source/_static/img/ms2rescore-overview.png b/docs/source/_static/img/ms2rescore-overview.png
diff --git a/docs/source/config_schema.md b/docs/source/config_schema.md
@@ -67,6 +67,7 @@
     - **One of**
       - *string*
       - *null*
+  - **`write_flashlfq`** *(boolean)*: Write results to a FlashLFQ-compatible file. Default: `false`.
   - **`write_report`** *(boolean)*: Write an HTML report with various QC metrics and charts. Default: `false`.
   - **`profile`** *(boolean)*: Write a txt report using cProfile for profiling. Default: `false`.
 ## Definitions
@@ -93,7 +94,6 @@
   - **`train_fdr`** *(number)*: FDR threshold for training Mokapot. Minimum: `0`. Maximum: `1`. Default: `0.01`.
   - **`write_weights`** *(boolean)*: Write Mokapot weights to a text file. Default: `false`.
   - **`write_txt`** *(boolean)*: Write Mokapot results to a text file. Default: `false`.
-  - **`write_flashlfq`** *(boolean)*: Write Mokapot results to a FlashLFQ-compatible file. Default: `false`.
 - <a id="definitions/percolator"></a>**`percolator`** *(object)*: Percolator rescoring engine configuration. Can contain additional properties. Refer to *[#/definitions/rescoring_engine](#definitions/rescoring_engine)*.
   - **`init-weights`**: Weights file for scoring function. Default: `false`.
     - **One of**

diff --git a/docs/source/tutorials/in-depth-python-api.ipynb b/docs/source/tutorials/in-depth-python-api.ipynb
@@ -7,6 +7,18 @@
     "# Using the Python API "
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This tutorial shows how to use the MS²Rescore Python API for each step of the rescoring process\n",
+    "individually. This is useful if you want to customize rescoring for your own Python\n",
+    "workflow or if you want to understand how MS²Rescore works.\n",
+    "\n",
+    "Note that the full MS²Rescore workflow is also available from Python with the single function call\n",
+    "`ms2rescore.rescore()`."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -37342,7 +37354,7 @@
     "from pyteomics.mgf import read as read_mgf\n",
     "\n",
     "for spectrum in read_mgf(\"../../../examples/mgf/20161213_NGHF_DBJ_SA_Exp3A_HeLa_1ug_7min_15000_02.mgf\"):\n",
-    "    print(spectrum[\"params\"][\"title\"])\n",
+    "    print(spectrum[\"params\"][\"title\"])  # noqa T201\n",
     "    break"
    ]
   },
@@ -37362,7 +37374,7 @@
    "source": [
     "import re\n",
     "spectrum_id = re.match(r\".*scan=(\\d+)$\", spectrum[\"params\"][\"title\"]).group(1)\n",
-    "print(spectrum_id)"
+    "print(spectrum_id)  # noqa T201"
    ]
   },
   {
@@ -38120,7 +38132,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import plotly.express as px\n",
     "from ms2rescore.report.charts import (\n",
     "    calculate_feature_qvalues,\n",
     "    feature_ecdf_auc_bar,\n",

diff --git a/docs/source/userguide/configuration.rst b/docs/source/userguide/configuration.rst
@@ -244,13 +244,13 @@ expression pattern that extracts the decoy status from the protein name:
 
   .. code-block:: json
 
-    "decoy_pattern": "DECOY_"
+    "id_decoy_pattern": "DECOY_"
 
 .. tab:: TOML
 
     .. code-block:: toml
 
-      decoy_pattern = "DECOY_"
+      id_decoy_pattern = "DECOY_"
 
 
 Multi-rank rescoring

diff --git a/examples/msgfplus-ms2rescore.json b/examples/msgfplus-ms2rescore.json
@@ -7,9 +7,6 @@
         },
         "log_level": "debug",
         "processes": 16,
-        "feature_generators": {
-            "basic": {}
-        },
         "rescoring_engine": {
             "mokapot": {
                 "fasta_file": "examples/proteins/uniprot-proteome-human-contaminants.fasta",

diff --git a/examples/msgfplus-ms2rescore.toml b/examples/msgfplus-ms2rescore.toml
@@ -5,25 +5,7 @@ psm_reader_kwargs = { "score_column" = "PSMScore" }
 log_level = "debug"
 processes = 16
 
-# [ms2rescore.modification_mapping]
-
-# [ms2rescore.fixed_modifications]
-
-[ms2rescore.feature_generators.basic]
-# No options, but setting heading enables feature generator
-
-# [ms2rescore.feature_generators.ms2pip]
-# model = "HCD"
-# ms2_tolerance = 0.02
-
-# [ms2rescore.feature_generators.deeplc]
-# deeplc_retrain = false
-
-# [ms2rescore.feature_generators.maxquant]
-# No options, but setting heading enables feature generator
-
 [ms2rescore.rescoring_engine.mokapot]
 fasta_file = "examples/proteins/uniprot-proteome-human-contaminants.fasta"
 write_weights = true
 write_txt = true
-# write_flashlfq = true
diff --git a/ms2rescore.spec b/ms2rescore.spec
@@ -16,6 +16,8 @@ project = "ms2rescore"
 bundle_name = "ms2rescore"
 bundle_identifier = f"{bundle_name}.{__version__}"
 
+extra_requirements = {"ionmob"}
+
 # Requirements config
 skip_requirements_regex = r"^(?:.*\..*)"
 
@@ -28,6 +30,7 @@ requirements = {
     if "; extra ==" not in req  # Exclude optional dependencies
 }
 requirements.update([project, "xgboost"])
+requirements.update(extra_requirements)
 
 hidden_imports = set()
 datas = []

diff --git a/ms2rescore/__init__.py b/ms2rescore/__init__.py
@@ -1,6 +1,6 @@
 """MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and RTs."""
 
-__version__ = "3.1.0"
+__version__ = "3.1.2"
 
 from warnings import filterwarnings
 

diff --git a/ms2rescore/__main__.py b/ms2rescore/__main__.py
@@ -209,12 +209,12 @@ def main(tims=False):
     cli_args = parser.parse_args()
 
     configurations = []
-    if cli_args.config_file:
-        configurations.append(cli_args.config_file)
     if tims:
         configurations.append(
             json.load(importlib.resources.open_text(package_data, "config_default_tims.json"))
         )
+    if cli_args.config_file:
+        configurations.append(cli_args.config_file)
     configurations.append(cli_args)
 
     try:

diff --git a/ms2rescore/core.py b/ms2rescore/core.py
@@ -171,6 +171,16 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
     logger.info(f"Writing output to {output_file_root}.psms.tsv...")
     psm_utils.io.write_file(psm_list, output_file_root + ".psms.tsv", filetype="tsv")
 
+    if config["write_flashlfq"]:
+        logger.info(f"Writing output to {output_file_root}.flashlfq.tsv...")
+        psm_utils.io.write_file(
+            psm_list,
+            output_file_root + ".flashlfq.tsv",
+            filetype="flashlfq",
+            fdr_threshold=0.01,
+            only_target=True,  # TODO: Make FDR threshold configurable
+        )
+
     # Write report
     if config["write_report"]:
         try:

diff --git a/ms2rescore/feature_generators/deeplc.py b/ms2rescore/feature_generators/deeplc.py
@@ -141,11 +141,10 @@ def add_features(self, psm_list: PSMList) -> None:
                 )
 
                 # Disable wild logging to stdout by Tensorflow, unless in debug mode
-                with (
-                    contextlib.redirect_stdout(open(os.devnull, "w"))
-                    if not self._verbose
-                    else contextlib.nullcontext()
-                ):
+
+                with contextlib.redirect_stdout(
+                    open(os.devnull, "w", encoding="utf-8")
+                ) if not self._verbose else contextlib.nullcontext():
                     # Make new PSM list for this run (chain PSMs per spectrum to flat list)
                     psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values())))
 

diff --git a/ms2rescore/feature_generators/im2deep.py b/ms2rescore/feature_generators/im2deep.py
@@ -17,7 +17,7 @@
 
 import numpy as np
 import pandas as pd
-from im2deep.calibrate import im2ccs
+from im2deep.utils import im2ccs
 from im2deep.im2deep import predict_ccs
 from psm_utils import PSMList
 
@@ -91,7 +91,7 @@ def add_features(self, psm_list: PSMList) -> None:
 
                 # Disable wild logging to stdout by TensorFlow, unless in debug mode
                 with (
-                    contextlib.redirect_stdout(open(os.devnull, "w"))
+                    contextlib.redirect_stdout(open(os.devnull, "w", encoding="utf-8"))
                     if not self._verbose
                     else contextlib.nullcontext()
                 ):