diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md index 942f5b3..cc089fc 100644 --- a/Docs/ChangeLog.md +++ b/Docs/ChangeLog.md @@ -1,3 +1,8 @@ +# Version: 1.114.3 +## Date: 2024-09-22 +### Changes: +- Fix: Fixed the razor method of sum peptide to protein, the intensity was not stable in different runs. + # Version: 1.114.2 ## Date: 2024-09-22 ### Changes: diff --git a/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py b/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py index 00e025e..7de67f0 100644 --- a/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py +++ b/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py @@ -20,6 +20,12 @@ def __init__(self, df, column_map, peptide_mun_threshold=1, def sum_protein_intensity(self, greedy_method='heap'): + # reset the results to avoid the influence of previous results + self.res_intensity_dict = {} # + self.__multi_target_count = 0 + self.mini_target_set = None + self.filtered_target_to_peptides = None + self.greedy_method = greedy_method print('Start to sum protein intensity using method: [razor]') if self.column_map['sample_list'] is None or len(self.column_map['sample_list']) == 0: @@ -35,7 +41,7 @@ def sum_protein_intensity(self, greedy_method='heap'): # show summary print(f"Total peptides count: {len(pep_to_target)}") - # calculate the mean of the multi-target peptides of each sample + # calculate the the multi-target peptides self.__multi_target_count = self.__multi_target_count/len(self.column_map['sample_list']) print(f"Multi-target peptides count: {self.__multi_target_count} ({self.__multi_target_count / len(pep_to_target) * 100:.2f}%)") @@ -104,7 +110,8 @@ def get_mini_target_set(self, greedy_method='heap'): self.remove_protein_less_than_threshold() - peptides = set(self.df[self.column_map['peptide']]) + # peptides = set(self.df[self.column_map['peptide']]) + peptides = list(dict.fromkeys(self.df[self.column_map['peptide']])) target_to_peptides = self._create_target_to_peptides() mini_target_set = self.find_minimum_target_set(peptides, target_to_peptides) filtered_target_to_peptides = {target: target_to_peptides[target] for target in mini_target_set} @@ -123,15 +130,19 @@ def _create_pep_to_target_razor(self): """ self.get_mini_target_set(self.greedy_method) - peptides = set(self.df[self.column_map['peptide']]) + # keep the order of the peptides + peptides = list(dict.fromkeys(self.df[self.column_map['peptide']])) filtered_target_to_peptides = self.filtered_target_to_peptides peptide_to_target = defaultdict(list) for peptide in tqdm(peptides, desc="Assigning peptides to targets"): - possible_targets = [target for target, peps in filtered_target_to_peptides.items() if peptide in peps] + # possible_targets = [target for target, peps in filtered_target_to_peptides.items() if peptide in peps] + possible_targets = sorted([target for target, peps in filtered_target_to_peptides.items() if peptide in peps]) + if possible_targets: max_target_count = max(len(filtered_target_to_peptides[target]) for target in possible_targets) - best_targets = [target for target in possible_targets if len(filtered_target_to_peptides[target]) == max_target_count] + # best_targets = [target for target in possible_targets if len(filtered_target_to_peptides[target]) == max_target_count] + best_targets = sorted([target for target in possible_targets if len(filtered_target_to_peptides[target]) == max_target_count]) peptide_to_target[peptide].extend(best_targets) return peptide_to_target @@ -232,6 +243,7 @@ def _update_output_dict(self, target_list, sample_name, intensity): self.res_intensity_dict.setdefault(sample_name, {}).setdefault(target, 0) self.res_intensity_dict[sample_name][target] += intensity else: + target_list = sorted(target_list) if self.share_intensity: intensity /= len(target_list) for target in target_list: @@ -263,7 +275,8 @@ def _update_output_dict(self, target_list, sample_name, intensity): } sia = RazorSum(df, column_map, peptide_mun_threshold=3) - res_df = sia.sum_protein_intensity(greedy_method='heap') + res_df = sia.sum_protein_intensity(greedy_method='greedy') + # res_df.to_csv('razor_protein_intensity.tsv', sep='\t') # or get minimum target set only diff --git a/metax/utils/version.py b/metax/utils/version.py index 43029aa..dfbae1d 100644 --- a/metax/utils/version.py +++ b/metax/utils/version.py @@ -1,2 +1,2 @@ -__version__ = '1.114.2' +__version__ = '1.114.3' API_version = '2' \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 02df8b5..f0c29c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "MetaXTools" -version = "1.114.2" +version = "1.114.3" description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics." readme = "README_PyPi.md" license = { text = "NorthOmics" }