Skip to content

Commit

Permalink
🚧 start extracting MQ params
Browse files Browse the repository at this point in the history
- most selected parameters are easy to get
- differences between version 1.6 and higher to previous (1.5)
- fragment_mass_tolerance
   -> which fragenation method was used?
   -> missing information in extracted data for v1.5
  • Loading branch information
Henry committed Nov 13, 2023
1 parent 030eb20 commit 682825d
Showing 1 changed file with 73 additions and 25 deletions.
98 changes: 73 additions & 25 deletions proteobench/io/params/maxquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@
import xml.etree.ElementTree as ET
from pathlib import Path

import numpy as np
import pandas as pd

from proteobench.io.params import ProteoBenchParameters

logger = logging.getLogger()


Expand Down Expand Up @@ -127,45 +130,90 @@ def build_Series_from_records(records, index_length=4):
return pd.Series((v for (k, v) in records), index=idx)


# %%
def extract_params(fname) -> ProteoBenchParameters:
params = ProteoBenchParameters()

record = read_file(fname)
record = build_Series_from_records(record, 4).sort_index()
params.search_engine = "Andromeda"
params.software_version = record.loc["maxQuantVersion"].squeeze()
params.ident_fdr_psm = None
params.ident_fdr_peptide = record.loc["peptideFdr"].squeeze()
params.ident_fdr_protein = record.loc["proteinFdr"].squeeze()
params.enable_match_between_runs = record.loc["matchBetweenRuns"].squeeze()
precursor_mass_tolerance = record.loc[
pd.IndexSlice["parameterGroups", "parameterGroup", "mainSearchTol", :]
].squeeze()
params.precursor_mass_tolerance = f"{precursor_mass_tolerance} ppm"
fragment_mass_tolerance = None # ! differences between version >1.6 and <=1.5
params.fragment_mass_tolerance = fragment_mass_tolerance
params.enzyme = record.loc[
("parameterGroups", "parameterGroup", "enzymes", "string")
].squeeze()
params.allowed_miscleavages = record.loc[
pd.IndexSlice["parameterGroups", "parameterGroup", "maxMissedCleavages", :]
].squeeze()
params.min_peptide_length = record.loc["minPepLen"].squeeze()
params.max_peptide_length = None
# fixed mods
if params.software_version > "1.6.0.0":
fixed_mods = record.loc[
pd.IndexSlice["parameterGroups", "parameterGroup", "fixedModifications", :]
].squeeze()
if isinstance(fixed_mods, str):
params.fixed_mods = fixed_mods
else:
params.fixed_mods = ",".join(fixed_mods)
else:
fixed_mods = record.loc[
pd.IndexSlice["fixedModifications", :]
].squeeze()
if isinstance(fixed_mods, str):
params.fixed_mods = fixed_mods
else:
params.fixed_mods = ",".join(fixed_mods)

variable_mods = record.loc[
pd.IndexSlice["parameterGroups", "parameterGroup", "variableModifications", :]
].squeeze()
if isinstance(variable_mods, str):
params.variable_mods = variable_mods
else:
params.variable_mods = ",".join(variable_mods)
params.max_mods = record.loc[
("parameterGroups", "parameterGroup", "maxNmods")
].squeeze()
params.min_precursor_charge = None
params.max_precursor_charge = record.loc[
pd.IndexSlice["parameterGroups", "parameterGroup", "maxCharge", :]
].squeeze()
return params


# create a first version of json files to match
if __name__ == "__main__":
from pprint import pprint

for test_file in [
"../../../test/params/mqpar_MQ1.6.3.3_MBR.xml",
"../../../test/params/mqpar_MQ2.1.3.0_noMBR.xml",
"../../../test/params/mqpar1.5.3.30_MBR.xml",
]:
print(f"{test_file = }")
record_example = read_file(test_file)
record = read_file(test_file)
(
Path(test_file)
.with_suffix(".json")
.write_text(
json.dumps(
record_example,
record,
indent=4,
)
)
)
flattend = build_Series_from_records(record_example, 4)
flattend = flattend.to_frame("run_identifier")
flattend.to_csv(Path(test_file).with_suffix(".csv"))

# %%
int(
flattend.loc["parameterGroups"]
.loc["parameterGroup"]
.loc["firstSearchTol"]
.squeeze()
)

# %%
# ! Parse msmsParamsArray
ms2_params = (
flattend.loc["msmsParamsArray"].loc["msmsParams"].reset_index(-1, drop=True)
)
ms2_params.loc["Name", "mode"] = ms2_params.loc["Name"].squeeze()
ms2_params["mode"] = ms2_params["mode"].fillna(method="ffill")
ms2_params = ms2_params.set_index("mode", append=True)
ms2_params.loc[("MatchTolerance", "FTMS")]
# ? reset_index level -1
# ? update and fillna -> then set as index again
record = build_Series_from_records(record, 4)
record = record.to_frame("run_identifier")
# flattend.to_csv(Path(test_file).with_suffix(".csv"))
params = extract_params(test_file)
pprint(params.__dict__)

0 comments on commit 682825d

Please sign in to comment.