Skip to content

Commit

Permalink
all comments addressed
Browse files Browse the repository at this point in the history
  • Loading branch information
VarunAnanth2003 committed Aug 23, 2024
1 parent df68c1d commit cfd39e8
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 13 deletions.
7 changes: 6 additions & 1 deletion casanovo/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ devices:
# See pyteomics.parser.expasy_rules for valid enzymes
enzyme: "trypsin"
# Digestion type for candidate peptide generation.
# Full: standard digestion. Semi: Include products of semi-specific cleavage
# full: standard digestion. semi: Include products of semi-specific cleavage
digestion: "full"
# Number of allowed missed cleavages when digesting protein
missed_cleavages: 0
Expand All @@ -55,6 +55,11 @@ missed_cleavages: 0
max_mods:
# Maximum peptide length to consider
max_peptide_len: 50
# Toggle allowed modifications on/off
# Permanent fixed mod (don't include): C+57.021
# Allowed variable mods: M+15.995, N+0.984, Q+0.984,
# Allowed N-terminal mods: +42.011, +43.006, -17.027, +43.006-17.027
allowed_mods: "M+15.995,N+0.984,Q+0.984,+42.011,+43.006,-17.027,+43.006-17.027"


###
Expand Down
68 changes: 56 additions & 12 deletions casanovo/data/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,6 @@
PROTON = 1.00727646677
ISOTOPE_SPACING = 1.003355

var_mods = {
"d": ["N", "Q"],
"ox": ["M"],
"ace-": True,
"carb-": True,
"nh3x-": True,
"carbnh3x-": True,
}
fixed_mods = {"carbm": ["C"]}


class ProteinDatabase:
"""
Expand All @@ -51,6 +41,8 @@ class ProteinDatabase:
The precursor mass tolerance in ppm.
isotope_error : List[int]
Isotopes to consider when comparing predicted and observed precursor m/z's.
allowed_mods : List[str]
A list of allowed modifications to consider.
"""

def __init__(
Expand All @@ -64,7 +56,11 @@ def __init__(
max_mods: int,
precursor_tolerance: float,
isotope_error: List[int],
allowed_mods: List[str],
):
self.fixed_mods, self.var_mods = self._construct_mods_dict(
allowed_mods
)
self.digest = self._digest_fasta(
fasta_path,
enzyme,
Expand Down Expand Up @@ -197,8 +193,8 @@ def _digest_fasta(
for pep, prot in peptide_list:
peptide_isoforms = parser.isoforms(
pep,
variable_mods=var_mods,
fixed_mods=fixed_mods,
variable_mods=self.var_mods,
fixed_mods=self.fixed_mods,
max_mods=max_mods,
)
peptide_isoforms = list(
Expand All @@ -218,6 +214,54 @@ def _digest_fasta(
logger.info("Digestion complete. %d peptides generated.", len(pdb_df))
return pdb_df

def _construct_mods_dict(self, allowed_mods):
"""
Constructs dictionaries of fixed and variable modifications.
Parameters
----------
allowed_mods : str
A comma-separated list of allowed modifications.
Returns
-------
fixed_mods : dict
A dictionary of fixed modifications.
var_mods : dict
A dictionary of variable modifications.
"""
fixed_mods = {"carbm": ["C"]}
var_mods = {}

if allowed_mods is "" or None:
return fixed_mods, var_mods
for mod in allowed_mods.split(","):
if mod == "M+15.995":
if "ox" not in var_mods:
var_mods["ox"] = []
var_mods["ox"].append("M")
elif mod == "N+0.984":
if "d" not in var_mods:
var_mods["d"] = []
var_mods["d"].append("N")
elif mod == "Q+0.984":
if "d" not in var_mods:
var_mods["d"] = []
var_mods["d"].append("Q")
elif mod == "+42.011":
var_mods["ace-"] = True
elif mod == "+43.006":
var_mods["carb-"] = True
elif mod == "-17.027":
var_mods["nh3x-"] = True
elif mod == "+43.006-17.027":
var_mods["carbnh3x-"] = True
else:
logger.error("Modification %s not recognized.", mod)
raise ValueError(f"Modification {mod} not recognized.")

return fixed_mods, var_mods

@jit
def _to_mz(precursor_mass, charge):
"""
Expand Down
1 change: 1 addition & 0 deletions casanovo/denovo/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ def db_search(
self.config.max_mods,
self.config.precursor_mass_tol,
self.config.isotope_error_range,
self.config.allowed_mods,
)
self.loaders.setup(stage="test", annotated=False)
self.trainer.predict(self.model, self.loaders.db_dataloader())
Expand Down
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,10 @@ def tiny_config(tmp_path):
"-17.027": -17.026549,
"+43.006-17.027": 25.980265,
},
"allowed_mods": (
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
}

cfg_file = tmp_path / "config.yml"
Expand Down
56 changes: 56 additions & 0 deletions tests/unit_tests/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ def test_digest_fasta_cleave(tiny_fasta_file):
max_mods=0,
precursor_tolerance=20,
isotope_error=[0],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
peptide_list = list(pdb.digest["peptide"])
assert peptide_list == expected
Expand Down Expand Up @@ -356,6 +360,10 @@ def test_digest_fasta_mods(tiny_fasta_file):
max_mods=1,
precursor_tolerance=20,
isotope_error=[0],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
peptide_list = list(pdb.digest["peptide"])
peptide_list = [
Expand Down Expand Up @@ -389,6 +397,10 @@ def test_length_restrictions(tiny_fasta_file):
max_mods=0,
precursor_tolerance=20,
isotope_error=[0],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
peptide_list = list(pdb.digest["peptide"])
assert peptide_list == expected_long
Expand All @@ -403,6 +415,10 @@ def test_length_restrictions(tiny_fasta_file):
max_mods=0,
precursor_tolerance=20,
isotope_error=[0],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
peptide_list = list(pdb.digest["peptide"])
assert peptide_list == expected_short
Expand Down Expand Up @@ -433,6 +449,10 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
max_mods=0,
precursor_tolerance=20,
isotope_error=[0],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
peptide_list = list(pdb.digest["peptide"])
assert peptide_list == expected_argc
Expand All @@ -447,6 +467,10 @@ def test_digest_fasta_enzyme(tiny_fasta_file):
max_mods=0,
precursor_tolerance=20,
isotope_error=[0],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
peptide_list = list(pdb.digest["peptide"])
assert peptide_list == expected_aspn
Expand All @@ -472,6 +496,10 @@ def test_get_candidates(tiny_fasta_file):
max_mods=0,
precursor_tolerance=10000,
isotope_error=[0],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
assert expected_smallwindow == candidates
Expand All @@ -486,6 +514,10 @@ def test_get_candidates(tiny_fasta_file):
max_mods=0,
precursor_tolerance=150000,
isotope_error=[0],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
assert expected_midwindow == candidates
Expand All @@ -500,6 +532,10 @@ def test_get_candidates(tiny_fasta_file):
max_mods=0,
precursor_tolerance=600000,
isotope_error=[0],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
assert expected_widewindow == candidates
Expand Down Expand Up @@ -563,6 +599,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
max_mods=0,
precursor_tolerance=10000,
isotope_error=[0],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
pdb.digest = peptide_list
candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
Expand All @@ -578,6 +618,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
max_mods=0,
precursor_tolerance=10000,
isotope_error=[1],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
pdb.digest = peptide_list
candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
Expand All @@ -593,6 +637,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
max_mods=0,
precursor_tolerance=10000,
isotope_error=[2],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
pdb.digest = peptide_list
candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
Expand All @@ -608,6 +656,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
max_mods=0,
precursor_tolerance=10000,
isotope_error=[3],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
pdb.digest = peptide_list
candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
Expand All @@ -623,6 +675,10 @@ def test_get_candidates_isotope_error(tiny_fasta_file):
max_mods=0,
precursor_tolerance=10000,
isotope_error=[0, 1, 2, 3],
allowed_mods=(
"M+15.995,N+0.984,Q+0.984,"
"+42.011,+43.006,-17.027,+43.006-17.027"
),
)
pdb.digest = peptide_list
candidates, _ = pdb.get_candidates(precursor_mz=496.2, charge=2)
Expand Down

0 comments on commit cfd39e8

Please sign in to comment.