diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 18af5ca..9a3c051 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v2 diff --git a/README.md b/README.md index 78afc6c..f29a364 100644 --- a/README.md +++ b/README.md @@ -325,7 +325,8 @@ next to the predictions for singly charged b- and y-ions. | CIDch2 | CID | Linear ion trap | Tryptic digest | | Immuno-HCD | HCD | Orbitrap | Immunopeptides | | CID-TMT | CID | Linear ion trap | Tryptic digest, TMT-labeled | - +| timsTOF2023 | CID | Ion mobility quadrupole time-of-flight | Tryptic-, elastase digest, immuno class 1 | +| timsTOF2024 | CID | Ion mobility quadrupole time-of-flight | Tryptic-, elastase digest, immuno class 1 & class 2 | ### Models, version numbers, and the train and test datasets used to create each model @@ -342,6 +343,8 @@ next to the predictions for singly charged b- and y-ions. | HCD2021 | v20210416 | [Combined dataset] (520 579) | [PXD008034](https://doi.org/10.1016/j.jprot.2017.12.006) (35 269) | 0.932361 | Immuno-HCD | v20210316 | [Combined dataset] (460 191) | [PXD005231 (HLA-I)](https://doi.org/10.1101/098780) (46 753)
[PXD020011 (HLA-II)](https://doi.org/10.3389/fimmu.2020.01981 ) (23 941) | 0.963736
0.942383 | CID-TMT | v20220104 | [in-house dataset] (72 138) | [PXD005890](https://doi.org/10.1021/acs.jproteome.7b00091) (69 768) | 0.851085 +| timsTOF2023 | v20230912 | [Combined dataset] (234 973) | PXD043026
PXD046535
PXD046543 (13 012) | 0.892540 (tryptic)
0.871258 (elastase)
0.899834 (class I)
0.635548 (class II) +| timsTOF2024 | v20240105 | [Combined dataset] (480 024) | PXD043026
PXD046535
PXD046543
PXD038782 (25 265) | 0.883270 (tryptic)
0.814374 (elastase)
0.887192 (class I)
0.847951 (class II) To train custom MS²PIP models, please refer to [Training new MS²PIP models](http://compomics.github.io/projects/ms2pip_c/wiki/Training-new-MS2PIP-models.html) on our Wiki pages. diff --git a/ms2pip/ms2pipC.py b/ms2pip/ms2pipC.py index 01cca33..603010f 100644 --- a/ms2pip/ms2pipC.py +++ b/ms2pip/ms2pipC.py @@ -180,7 +180,7 @@ "model_20220104_CID_TMT_Y.xgboost": "299539179ca55d4ac82e9aed6a4e0bd134a9a41e", }, }, - "timsTOF": { + "timsTOF2023": { "id": 12, "ion_types": ["B", "Y"], "peaks_version": "general", @@ -194,8 +194,25 @@ "model_20230912_timsTOF_Y.xgboost": "8edd87e0fba5f338d0a0881b5afbcf2f48ec5268", }, }, + "timsTOF2024": { + "id": 13, + "ion_types": ["B", "Y"], + "peaks_version": "general", + "features_version": "normal", + "xgboost_model_files": { + "b": "model_20240105_timsTOF_B.xgboost", + "y": "model_20240105_timsTOF_Y.xgboost", + }, + "model_hash": { + "model_20240105_timsTOF_B.xgboost": "d70e145c15cf2bfa30968077a68409699b2fa541", + "model_20240105_timsTOF_Y.xgboost": "3f0414ee1ad7cff739e0d6242e25bfc22b6ebfe5", + }, + }, } + + MODELS["HCD"] = MODELS["HCD2021"] +MODELS["timsTOF"] = MODELS["timsTOF2024"] def process_peptides(worker_num, data, afile, modfile, modfile2, PTMmap, model): @@ -340,7 +357,9 @@ def process_spectra( ces = specdict["ce"] else: specdict = ( - data[["spec_id", "peptide", "modifications", "charge"]].set_index("spec_id").to_dict() + data[["spec_id", "peptide", "modifications", "charge"]] + .set_index("spec_id") + .to_dict() ) peptides = specdict["peptide"] modifications = specdict["modifications"] @@ -449,9 +468,7 @@ def process_spectra( if "ce" in data.columns: dvectors.append( np.array( - ms2pip_pyx.get_vector_ce( - peptide, modpeptide, charge, colen - ), + ms2pip_pyx.get_vector_ce(peptide, modpeptide, charge, colen), dtype=np.uint16, ) ) # SD: added collision energy @@ -825,7 +842,9 @@ def run(self): index=True, lineterminator="\n", ) - except TypeError: # Pandas < 1.5 (Required for Python 3.7 support) + except ( + TypeError + ): # Pandas < 1.5 (Required for Python 3.7 support) correlations.to_csv( corr_filename, index=True, @@ -966,9 +985,9 @@ def _write_vector_file(self, results): # dtargets is a dict, containing targets for every ion type (keys are int) for i, t in dtargets.items(): - df[ - "targets_{}".format(MODELS[self.model]["ion_types"][i]) - ] = np.concatenate(t, axis=None) + df["targets_{}".format(MODELS[self.model]["ion_types"][i])] = ( + np.concatenate(t, axis=None) + ) df["psmid"] = psmids all_results.append(df)