Skip to content

Commit

Permalink
- Fix: Fixed the razor method of sum peptide to protein, the intensit…
Browse files Browse the repository at this point in the history
…y was not stable in different runs.
  • Loading branch information
byemaxx committed Sep 23, 2024
1 parent d4c3443 commit 3aae95b
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 8 deletions.
5 changes: 5 additions & 0 deletions Docs/ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Version: 1.114.3
## Date: 2024-09-22
### Changes:
- Fix: Fixed the razor method of sum peptide to protein, the intensity was not stable in different runs.

# Version: 1.114.2
## Date: 2024-09-22
### Changes:
Expand Down
25 changes: 19 additions & 6 deletions metax/taxafunc_analyzer/analyzer_utils/razor_sum.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ def __init__(self, df, column_map, peptide_mun_threshold=1,


def sum_protein_intensity(self, greedy_method='heap'):
# reset the results to avoid the influence of previous results
self.res_intensity_dict = {} #
self.__multi_target_count = 0
self.mini_target_set = None
self.filtered_target_to_peptides = None

self.greedy_method = greedy_method
print('Start to sum protein intensity using method: [razor]')
if self.column_map['sample_list'] is None or len(self.column_map['sample_list']) == 0:
Expand All @@ -35,7 +41,7 @@ def sum_protein_intensity(self, greedy_method='heap'):

# show summary
print(f"Total peptides count: {len(pep_to_target)}")
# calculate the mean of the multi-target peptides of each sample
# calculate the the multi-target peptides
self.__multi_target_count = self.__multi_target_count/len(self.column_map['sample_list'])
print(f"Multi-target peptides count: {self.__multi_target_count} ({self.__multi_target_count / len(pep_to_target) * 100:.2f}%)")

Expand Down Expand Up @@ -104,7 +110,8 @@ def get_mini_target_set(self, greedy_method='heap'):

self.remove_protein_less_than_threshold()

peptides = set(self.df[self.column_map['peptide']])
# peptides = set(self.df[self.column_map['peptide']])
peptides = list(dict.fromkeys(self.df[self.column_map['peptide']]))
target_to_peptides = self._create_target_to_peptides()
mini_target_set = self.find_minimum_target_set(peptides, target_to_peptides)
filtered_target_to_peptides = {target: target_to_peptides[target] for target in mini_target_set}
Expand All @@ -123,15 +130,19 @@ def _create_pep_to_target_razor(self):
"""
self.get_mini_target_set(self.greedy_method)

peptides = set(self.df[self.column_map['peptide']])
# keep the order of the peptides
peptides = list(dict.fromkeys(self.df[self.column_map['peptide']]))
filtered_target_to_peptides = self.filtered_target_to_peptides

peptide_to_target = defaultdict(list)
for peptide in tqdm(peptides, desc="Assigning peptides to targets"):
possible_targets = [target for target, peps in filtered_target_to_peptides.items() if peptide in peps]
# possible_targets = [target for target, peps in filtered_target_to_peptides.items() if peptide in peps]
possible_targets = sorted([target for target, peps in filtered_target_to_peptides.items() if peptide in peps])

if possible_targets:
max_target_count = max(len(filtered_target_to_peptides[target]) for target in possible_targets)
best_targets = [target for target in possible_targets if len(filtered_target_to_peptides[target]) == max_target_count]
# best_targets = [target for target in possible_targets if len(filtered_target_to_peptides[target]) == max_target_count]
best_targets = sorted([target for target in possible_targets if len(filtered_target_to_peptides[target]) == max_target_count])
peptide_to_target[peptide].extend(best_targets)

return peptide_to_target
Expand Down Expand Up @@ -232,6 +243,7 @@ def _update_output_dict(self, target_list, sample_name, intensity):
self.res_intensity_dict.setdefault(sample_name, {}).setdefault(target, 0)
self.res_intensity_dict[sample_name][target] += intensity
else:
target_list = sorted(target_list)
if self.share_intensity:
intensity /= len(target_list)
for target in target_list:
Expand Down Expand Up @@ -263,7 +275,8 @@ def _update_output_dict(self, target_list, sample_name, intensity):
}
sia = RazorSum(df, column_map, peptide_mun_threshold=3)

res_df = sia.sum_protein_intensity(greedy_method='heap')
res_df = sia.sum_protein_intensity(greedy_method='greedy')

# res_df.to_csv('razor_protein_intensity.tsv', sep='\t')

# or get minimum target set only
Expand Down
2 changes: 1 addition & 1 deletion metax/utils/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = '1.114.2'
__version__ = '1.114.3'
API_version = '2'
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "MetaXTools"
version = "1.114.2"
version = "1.114.3"
description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics."
readme = "README_PyPi.md"
license = { text = "NorthOmics" }
Expand Down

0 comments on commit 3aae95b

Please sign in to comment.