Skip to content

Commit

Permalink
✨ extract ms2 parameter based on specified method
Browse files Browse the repository at this point in the history
- MQ stores several settings in parameter file, which are then applied
  based on information stored in the rawfile metadata
  • Loading branch information
Henry committed Nov 19, 2023
1 parent 5a95fa0 commit f47c6fc
Show file tree
Hide file tree
Showing 2 changed files with 271 additions and 3 deletions.
21 changes: 18 additions & 3 deletions proteobench/io/params/maxquant.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,15 @@ def build_Series_from_records(records, index_length=4):
return pd.Series((v for (k, v) in records), index=idx)


def extract_params(fname) -> ProteoBenchParameters:
def extract_params(fname, ms2frac="FTMS") -> ProteoBenchParameters:
params = ProteoBenchParameters()

record = read_file(fname)
# select ms2 fragmentation method specified by parameter
# MaxQuant does this to our knowledge based on the binary rawfile metadata
record["msmsParamsArray"] = [
d for d in record["msmsParamsArray"] if d["msmsParams"]["Name"] == ms2frac
]
record = build_Series_from_records(record, 4).sort_index()
params.search_engine = "Andromeda"
params.software_version = record.loc["maxQuantVersion"].squeeze()
Expand All @@ -147,7 +152,17 @@ def extract_params(fname) -> ProteoBenchParameters:
pd.IndexSlice["parameterGroups", "parameterGroup", "mainSearchTol", :]
].squeeze()
params.precursor_mass_tolerance = f"{precursor_mass_tolerance} ppm"
fragment_mass_tolerance = None # ! differences between version >1.6 and <=1.5
# ! differences between version >1.6 and <=1.5
fragment_mass_tolerance = record.loc[
pd.IndexSlice["msmsParamsArray", "msmsParams", "MatchTolerance", :]
].squeeze()
in_ppm = bool(
record.loc[
pd.IndexSlice["msmsParamsArray", "msmsParams", "MatchToleranceInPpm", :]
].squeeze()
)
if in_ppm:
fragment_mass_tolerance = f"{fragment_mass_tolerance} ppm"
params.fragment_mass_tolerance = fragment_mass_tolerance
params.enzyme = record.loc[
("parameterGroups", "parameterGroup", "enzymes", "string")
Expand Down Expand Up @@ -214,5 +229,5 @@ def extract_params(fname) -> ProteoBenchParameters:
record = build_Series_from_records(record, 4)
record = record.to_frame("run_identifier")
record.to_csv(Path(test_file).with_suffix(".csv"))
params = extract_params(test_file)
params = extract_params(test_file, ms2frac="FTMS")
pprint(params.__dict__)
253 changes: 253 additions & 0 deletions test/params/mqpar1.5.3.30_noMBR.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
<?xml version="1.0" encoding="utf-8"?>
<MaxQuantParams xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" aifSilWeight="0" aifIsoWeight="0"
aifTopx="0" aifCorrelation="0" aifCorrelationFirstPass="0" aifMinMass="0" aifMsmsTol="0"
aifSecondPass="false" aifIterative="false" aifThresholdFdr="0" writeMsScansTable="true"
writeMsmsScansTable="true" writeMs3ScansTable="true" writeAllPeptidesTable="true"
writeMzRangeTable="true" disableMd5="false">
<name>Session1</name>
<maxQuantVersion>1.5.3.30</maxQuantVersion>
<tempFolder />
<numThreads>3</numThreads>
<sendEmail>false</sendEmail>
<fixedCombinedFolder />
<ionCountIntensities>false</ionCountIntensities>
<verboseColumnHeaders>false</verboseColumnHeaders>
<fullMinMz>-1.7976931348623157E+308</fullMinMz>
<fullMaxMz>1.7976931348623157E+308</fullMaxMz>
<calcPeakProperties>false</calcPeakProperties>
<showCentroidMassDifferences>false</showCentroidMassDifferences>
<showIsotopeMassDifferences>false</showIsotopeMassDifferences>
<filePaths>
<string>/users/user/EuBIC benchmarking
project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw</string>
<string>/users/user/EuBIC benchmarking
project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw</string>
<string>/users/user/EuBIC benchmarking
project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw</string>
<string>/users/user/EuBIC benchmarking
project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw</string>
<string>/users/user/EuBIC benchmarking
project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw</string>
<string>/users/user/EuBIC benchmarking
project\MQ15330_noMBR\LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw</string>
</filePaths>
<experiments>
<string>A_Sample_Alpha_01</string>
<string>A_Sample_Alpha_02</string>
<string>A_Sample_Alpha_03</string>
<string>B_Sample_Alpha_01</string>
<string>B_Sample_Alpha_02</string>
<string>B_Sample_Alpha_03</string>
</experiments>
<fractions>
<short>32767</short>
<short>32767</short>
<short>32767</short>
<short>32767</short>
<short>32767</short>
<short>32767</short>
</fractions>
<paramGroupIndices>
<int>0</int>
<int>0</int>
<int>0</int>
<int>0</int>
<int>0</int>
<int>0</int>
</paramGroupIndices>
<parameterGroups>
<parameterGroup>
<maxCharge>7</maxCharge>
<minPeakLen>2</minPeakLen>
<useMs1Centroids>false</useMs1Centroids>
<useMs2Centroids>false</useMs2Centroids>
<cutPeaks>true</cutPeaks>
<gapScans>1</gapScans>
<minTime>NaN</minTime>
<maxTime>NaN</maxTime>
<matchType>MatchFromAndTo</matchType>
<centroidMatchTol>8</centroidMatchTol>
<centroidMatchTolInPpm>true</centroidMatchTolInPpm>
<centroidHalfWidth>35</centroidHalfWidth>
<centroidHalfWidthInPpm>true</centroidHalfWidthInPpm>
<valleyFactor>1.4</valleyFactor>
<advancedPeakSplitting>false</advancedPeakSplitting>
<intensityThreshold>500</intensityThreshold>
<msInstrument>0</msInstrument>
<intensityDetermination>0</intensityDetermination>
<labelMods>
<string />
</labelMods>
<reQuantify>false</reQuantify>
<lfqSkipNorm>false</lfqSkipNorm>
<lfqMinEdgesPerNode>3</lfqMinEdgesPerNode>
<lfqAvEdgesPerNode>6</lfqAvEdgesPerNode>
<lfqMaxFeatures>100000</lfqMaxFeatures>
<fastLfq>true</fastLfq>
<lfqRestrictFeatures>false</lfqRestrictFeatures>
<lfqMinRatioCount>2</lfqMinRatioCount>
<useNormRatiosForHybridLfq>true</useNormRatiosForHybridLfq>
<maxLabeledAa>0</maxLabeledAa>
<maxNmods>5</maxNmods>
<maxMissedCleavages>2</maxMissedCleavages>
<multiplicity>1</multiplicity>
<enzymes>
<string>Trypsin/P</string>
</enzymes>
<enzymesFirstSearch />
<useEnzymeFirstSearch>false</useEnzymeFirstSearch>
<useVariableModificationsFirstSearch>false</useVariableModificationsFirstSearch>
<variableModifications>
<string>Oxidation (M)</string>
<string>Acetyl (Protein N-term)</string>
</variableModifications>
<useMultiModification>false</useMultiModification>
<multiModifications />
<isobaricLabels />
<variableModificationsFirstSearch />
<hasAdditionalVariableModifications>false</hasAdditionalVariableModifications>
<additionalVariableModifications />
<additionalVariableModificationProteins />
<doMassFiltering>true</doMassFiltering>
<firstSearchTol>20</firstSearchTol>
<mainSearchTol>4.5</mainSearchTol>
<searchTolInPpm>true</searchTolInPpm>
<isotopeMatchTol>2</isotopeMatchTol>
<isotopeMatchTolInPpm>true</isotopeMatchTolInPpm>
<isotopeTimeCorrelation>0.6</isotopeTimeCorrelation>
<theorIsotopeCorrelation>0.6</theorIsotopeCorrelation>
<recalibrationInPpm>true</recalibrationInPpm>
<intensityDependentCalibration>false</intensityDependentCalibration>
<minScoreForCalibration>70</minScoreForCalibration>
<matchLibraryFile>false</matchLibraryFile>
<libraryFile />
<matchLibraryMassTolPpm>0</matchLibraryMassTolPpm>
<matchLibraryTimeTolMin>0</matchLibraryTimeTolMin>
<matchLabelTimeTolMin>0</matchLabelTimeTolMin>
<reporterMassTolerance>NaN</reporterMassTolerance>
<reporterPif>NaN</reporterPif>
<filterPif>false</filterPif>
<reporterFraction>NaN</reporterFraction>
<reporterBasePeakRatio>NaN</reporterBasePeakRatio>
<timsHalfWidth>0</timsHalfWidth>
<timsStep>0</timsStep>
<timsResolution>0</timsResolution>
<timsMinMsmsIntensity>0</timsMinMsmsIntensity>
<timsRemovePrecursor>true</timsRemovePrecursor>
<crosslinkSearch>false</crosslinkSearch>
<crosslinkMaxMonoUnsaturated>0</crosslinkMaxMonoUnsaturated>
<crosslinkMaxMonoSaturated>0</crosslinkMaxMonoSaturated>
<crosslinkMaxDiUnsaturated>0</crosslinkMaxDiUnsaturated>
<crosslinkMaxDiSaturated>0</crosslinkMaxDiSaturated>
<crosslinkUseSeparateFasta>false</crosslinkUseSeparateFasta>
<crosslinkFastaFiles />
<crosslinkMode>PeptidesWithCleavedLinker</crosslinkMode>
<lcmsRunType>Standard</lcmsRunType>
<lfqMode>0</lfqMode>
<enzymeMode>0</enzymeMode>
<enzymeModeFirstSearch>0</enzymeModeFirstSearch>
</parameterGroup>
</parameterGroups>
<fixedModifications>
<string>Carbamidomethyl (C)</string>
</fixedModifications>
<fastaFiles>
<string>/users/user/EuBIC benchmarking
project\MQ15330_noMBR\BenchmarkFASTAModule1_DDA_NOCONTA.fasta</string>
</fastaFiles>
<fastaFilesFirstSearch />
<fixedSearchFolder />
<advancedRatios>true</advancedRatios>
<rtShift>false</rtShift>
<separateLfq>false</separateLfq>
<lfqStabilizeLargeRatios>true</lfqStabilizeLargeRatios>
<lfqRequireMsms>true</lfqRequireMsms>
<decoyMode>revert</decoyMode>
<includeContaminants>true</includeContaminants>
<topxWindow>100</topxWindow>
<maxPeptideMass>4600</maxPeptideMass>
<epsilonMutationScore>true</epsilonMutationScore>
<mutatedPeptidesSeparately>true</mutatedPeptidesSeparately>
<minDeltaScoreUnmodifiedPeptides>0</minDeltaScoreUnmodifiedPeptides>
<minDeltaScoreModifiedPeptides>6</minDeltaScoreModifiedPeptides>
<minScoreUnmodifiedPeptides>0</minScoreUnmodifiedPeptides>
<minScoreModifiedPeptides>40</minScoreModifiedPeptides>
<secondPeptide>true</secondPeptide>
<matchBetweenRuns>false</matchBetweenRuns>
<matchUnidentifiedFeatures>false</matchUnidentifiedFeatures>
<matchBetweenRunsFdr>false</matchBetweenRunsFdr>
<dependentPeptides>false</dependentPeptides>
<dependentPeptideFdr>0</dependentPeptideFdr>
<dependentPeptideMassBin>0</dependentPeptideMassBin>
<msmsConnection>false</msmsConnection>
<ibaq>false</ibaq>
<useDeltaScore>false</useDeltaScore>
<splitProteinGroupsByTaxonomy>false</splitProteinGroupsByTaxonomy>
<taxonomyLevel>Species</taxonomyLevel>
<avalon>false</avalon>
<ibaqLogFit>false</ibaqLogFit>
<razorProteinFdr>true</razorProteinFdr>
<deNovoSequencing>false</deNovoSequencing>
<deNovoVarMods>true</deNovoVarMods>
<massDifferenceSearch>false</massDifferenceSearch>
<minPepLen>7</minPepLen>
<peptideFdr>0.01</peptideFdr>
<proteinFdr>0.01</proteinFdr>
<siteFdr>0.01</siteFdr>
<minPeptideLengthForUnspecificSearch>8</minPeptideLengthForUnspecificSearch>
<maxPeptideLengthForUnspecificSearch>25</maxPeptideLengthForUnspecificSearch>
<useNormRatiosForOccupancy>true</useNormRatiosForOccupancy>
<minPeptides>1</minPeptides>
<minRazorPeptides>1</minRazorPeptides>
<minUniquePeptides>0</minUniquePeptides>
<useCounterparts>false</useCounterparts>
<advancedSiteIntensities>true</advancedSiteIntensities>
<customProteinQuantification>false</customProteinQuantification>
<customProteinQuantificationFile />
<minRatioCount>2</minRatioCount>
<restrictProteinQuantification>true</restrictProteinQuantification>
<restrictMods>
<string>Oxidation (M)</string>
<string>Acetyl (Protein N-term)</string>
</restrictMods>
<matchingTimeWindow>0</matchingTimeWindow>
<alignmentTimeWindow>0</alignmentTimeWindow>
<numberOfCandidatesMultiplexedMsms>25</numberOfCandidatesMultiplexedMsms>
<numberOfCandidatesMsms>15</numberOfCandidatesMsms>
<massDifferenceMods />
<mainSearchMaxCombinations>200</mainSearchMaxCombinations>
<msmsParamsArray>
<msmsParams Name="FTMS" MatchToleranceInPpm="true" DeisotopeToleranceInPpm="true"
DeNovoToleranceInPpm="true" Deisotope="true" Topx="12" HigherCharges="true"
IncludeWater="true" IncludeAmmonia="true" DependentLosses="true" Recalibration="false">
<MatchTolerance>20</MatchTolerance>
<DeisotopeTolerance>7</DeisotopeTolerance>
<DeNovoTolerance>10</DeNovoTolerance>
</msmsParams>
<msmsParams Name="ITMS" MatchToleranceInPpm="false" DeisotopeToleranceInPpm="false"
DeNovoToleranceInPpm="false" Deisotope="false" Topx="8" HigherCharges="true"
IncludeWater="true" IncludeAmmonia="true" DependentLosses="true" Recalibration="false">
<MatchTolerance>0.5</MatchTolerance>
<DeisotopeTolerance>0.15</DeisotopeTolerance>
<DeNovoTolerance>0.25</DeNovoTolerance>
</msmsParams>
<msmsParams Name="TOF" MatchToleranceInPpm="true" DeisotopeToleranceInPpm="false"
DeNovoToleranceInPpm="false" Deisotope="true" Topx="10" HigherCharges="true"
IncludeWater="true" IncludeAmmonia="true" DependentLosses="true" Recalibration="false">
<MatchTolerance>40</MatchTolerance>
<DeisotopeTolerance>0.01</DeisotopeTolerance>
<DeNovoTolerance>0.02</DeNovoTolerance>
</msmsParams>
<msmsParams Name="Unknown" MatchToleranceInPpm="false" DeisotopeToleranceInPpm="false"
DeNovoToleranceInPpm="false" Deisotope="false" Topx="8" HigherCharges="true"
IncludeWater="true" IncludeAmmonia="true" DependentLosses="true" Recalibration="false">
<MatchTolerance>0.5</MatchTolerance>
<DeisotopeTolerance>0.15</DeisotopeTolerance>
<DeNovoTolerance>0.25</DeNovoTolerance>
</msmsParams>
</msmsParamsArray>
<compositionPrediction>0</compositionPrediction>
<quantMode>1</quantMode>
<variationMode>none</variationMode>
</MaxQuantParams>

0 comments on commit f47c6fc

Please sign in to comment.