diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 18af5ca..9a3c051 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -13,7 +13,7 @@ jobs:
strategy:
max-parallel: 4
matrix:
- python-version: ['3.7', '3.8', '3.9', '3.10']
+ python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
steps:
- uses: actions/checkout@v2
diff --git a/README.md b/README.md
index 78afc6c..f29a364 100644
--- a/README.md
+++ b/README.md
@@ -325,7 +325,8 @@ next to the predictions for singly charged b- and y-ions.
| CIDch2 | CID | Linear ion trap | Tryptic digest |
| Immuno-HCD | HCD | Orbitrap | Immunopeptides |
| CID-TMT | CID | Linear ion trap | Tryptic digest, TMT-labeled |
-
+| timsTOF2023 | CID | Ion mobility quadrupole time-of-flight | Tryptic-, elastase digest, immuno class 1 |
+| timsTOF2024 | CID | Ion mobility quadrupole time-of-flight | Tryptic-, elastase digest, immuno class 1 & class 2 |
### Models, version numbers, and the train and test datasets used to create each model
@@ -342,6 +343,8 @@ next to the predictions for singly charged b- and y-ions.
| HCD2021 | v20210416 | [Combined dataset] (520 579) | [PXD008034](https://doi.org/10.1016/j.jprot.2017.12.006) (35 269) | 0.932361
| Immuno-HCD | v20210316 | [Combined dataset] (460 191) | [PXD005231 (HLA-I)](https://doi.org/10.1101/098780) (46 753)
[PXD020011 (HLA-II)](https://doi.org/10.3389/fimmu.2020.01981 ) (23 941) | 0.963736
0.942383
| CID-TMT | v20220104 | [in-house dataset] (72 138) | [PXD005890](https://doi.org/10.1021/acs.jproteome.7b00091) (69 768) | 0.851085
+| timsTOF2023 | v20230912 | [Combined dataset] (234 973) | PXD043026
PXD046535
PXD046543 (13 012) | 0.892540 (tryptic)
0.871258 (elastase)
0.899834 (class I)
0.635548 (class II)
+| timsTOF2024 | v20240105 | [Combined dataset] (480 024) | PXD043026
PXD046535
PXD046543
PXD038782 (25 265) | 0.883270 (tryptic)
0.814374 (elastase)
0.887192 (class I)
0.847951 (class II)
To train custom MS²PIP models, please refer to [Training new MS²PIP models](http://compomics.github.io/projects/ms2pip_c/wiki/Training-new-MS2PIP-models.html) on our Wiki pages.
diff --git a/ms2pip/ms2pipC.py b/ms2pip/ms2pipC.py
index 01cca33..603010f 100644
--- a/ms2pip/ms2pipC.py
+++ b/ms2pip/ms2pipC.py
@@ -180,7 +180,7 @@
"model_20220104_CID_TMT_Y.xgboost": "299539179ca55d4ac82e9aed6a4e0bd134a9a41e",
},
},
- "timsTOF": {
+ "timsTOF2023": {
"id": 12,
"ion_types": ["B", "Y"],
"peaks_version": "general",
@@ -194,8 +194,25 @@
"model_20230912_timsTOF_Y.xgboost": "8edd87e0fba5f338d0a0881b5afbcf2f48ec5268",
},
},
+ "timsTOF2024": {
+ "id": 13,
+ "ion_types": ["B", "Y"],
+ "peaks_version": "general",
+ "features_version": "normal",
+ "xgboost_model_files": {
+ "b": "model_20240105_timsTOF_B.xgboost",
+ "y": "model_20240105_timsTOF_Y.xgboost",
+ },
+ "model_hash": {
+ "model_20240105_timsTOF_B.xgboost": "d70e145c15cf2bfa30968077a68409699b2fa541",
+ "model_20240105_timsTOF_Y.xgboost": "3f0414ee1ad7cff739e0d6242e25bfc22b6ebfe5",
+ },
+ },
}
+
+
MODELS["HCD"] = MODELS["HCD2021"]
+MODELS["timsTOF"] = MODELS["timsTOF2024"]
def process_peptides(worker_num, data, afile, modfile, modfile2, PTMmap, model):
@@ -340,7 +357,9 @@ def process_spectra(
ces = specdict["ce"]
else:
specdict = (
- data[["spec_id", "peptide", "modifications", "charge"]].set_index("spec_id").to_dict()
+ data[["spec_id", "peptide", "modifications", "charge"]]
+ .set_index("spec_id")
+ .to_dict()
)
peptides = specdict["peptide"]
modifications = specdict["modifications"]
@@ -449,9 +468,7 @@ def process_spectra(
if "ce" in data.columns:
dvectors.append(
np.array(
- ms2pip_pyx.get_vector_ce(
- peptide, modpeptide, charge, colen
- ),
+ ms2pip_pyx.get_vector_ce(peptide, modpeptide, charge, colen),
dtype=np.uint16,
)
) # SD: added collision energy
@@ -825,7 +842,9 @@ def run(self):
index=True,
lineterminator="\n",
)
- except TypeError: # Pandas < 1.5 (Required for Python 3.7 support)
+ except (
+ TypeError
+ ): # Pandas < 1.5 (Required for Python 3.7 support)
correlations.to_csv(
corr_filename,
index=True,
@@ -966,9 +985,9 @@ def _write_vector_file(self, results):
# dtargets is a dict, containing targets for every ion type (keys are int)
for i, t in dtargets.items():
- df[
- "targets_{}".format(MODELS[self.model]["ion_types"][i])
- ] = np.concatenate(t, axis=None)
+ df["targets_{}".format(MODELS[self.model]["ion_types"][i])] = (
+ np.concatenate(t, axis=None)
+ )
df["psmid"] = psmids
all_results.append(df)