Skip to content

Commit

Permalink
Changed the method of summing peptiede intensity to protein intensity…
Browse files Browse the repository at this point in the history
…, changed the method "razor" to same as MaxQuant, and added a new method "rank".
  • Loading branch information
byemaxx committed Jun 25, 2024
1 parent 7c9df6e commit a2c1b7c
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 31 deletions.
5 changes: 5 additions & 0 deletions Docs/ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# Version: 1.107.7
## Date: 2024-06-24
### Changes:
- Change: Changed the method of summing peptiede intensity to protein intensity, changed the method "razor" to same as MaxQuant, and added a new method "rank".

# Version: 1.107.6
## Date: 2024-06-19
### Changes:
Expand Down
13 changes: 10 additions & 3 deletions Docs/MetaX_Cookbook.md
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,16 @@ The Data Overview provides basic information about your data, such as the number

Click **Create Proteins Intensity Table** to sum peptides to proteins if the Protein column is in the original table.

- **Occam's Razor and Anti-Razor:** Methods available for inferring shared peptides.
1. Build the rank of proteins.
2. Choose the protein with a higher rank for the shared peptide.
- **Occam's Razor**, **Anti-Razor** and **Rank:** Methods available for inferring shared peptides.
- Razor:
1. Build a minimal set of proteins to cover all peptides (Set Cover Problem).
2. For each peptide, choose the protein which has most peptides (if multiple proteins have the same number of peptides, share intensity to them).
- Anti-Razor:
- All proteins are shared the intensity of each peptide.
- Rank:
1. Build the rank of proteins.
2. Choose the protein with a higher rank for the shared peptide.


- **Methods to Build Protein Rank:**
- unique_counts: Use the counts of proteins inferred by unique peptides.
Expand Down
113 changes: 97 additions & 16 deletions utils/AnalyzerUtils/SumProteinIntensity.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,52 @@
# This file is used to sum the protein intensity for each sample
# Method: razor or anti-razor
# Method: razor, anti-razor or rank
# By sample: True or False
# Output: a dataframe with protein as index and sample as columns
##############################################
# USAGE:
# from utils.AnalyzerUtils.SumProteinIntensity import SumProteinIntensity
# out = SumProteinIntensity(sw)
# df1 = out.sum_protein_intensity(method='razor', by_sample=False, rank_method='count')
# df2 = out.sum_protein_intensity(method='razor', by_sample=False, rank_method='shared')
# df3 = out.sum_protein_intensity(method='razor', by_sample=False, rank_method='unique')
# df0 = out.sum_protein_intensity(method='razor')
# df1 = out.sum_protein_intensity(method='rank', by_sample=False, rank_method='all_counts')
# df2 = out.sum_protein_intensity(method='rank', by_sample=False, rank_method='shared_intensity')
# df3 = out.sum_protein_intensity(method='rank', by_sample=False, rank_method='unique_counts')
# df4 = out.sum_protein_intensity(method='anti-razor')
##############################################

from collections import defaultdict
import pandas as pd
from tqdm import tqdm


class SumProteinIntensity:
def __init__(self, taxa_func_analyzer):
self.tfa = taxa_func_analyzer
self.res_intensity_dict = {} #store all sample to output
self.rank_dict = {} #store the rank of protein intensity for each sample temporarily
self.rank_method = None
self.rank_method = None # only used for rank method
self.extract_col_name = [self.tfa.peptide_col_name, self.tfa.protein_col_name] + self.tfa.sample_list
self.df = self.tfa.original_df.loc[:,self.extract_col_name]
self._init_dicts()


def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='unique_counts'):

if method not in ['razor', 'anti-razor']:
raise ValueError('Method must in ["razor", "anti-razor"]')
if method not in ['razor', 'anti-razor', 'rank']:
raise ValueError('Method must in ["razor", "anti-razor", "rank"]')
if rank_method not in ['shared_intensity', 'all_counts', 'unique_counts', 'unique_intensity']:
raise ValueError('Rank method must in ["shared_intensity", "all_counts", "unique_counts", "unique_intensity"]')

self.rank_method = rank_method

if method == 'razor':
if method == 'rank':
print(f"\n-------------Start to sum protein intensity using method: [{method}] by_sample: [{by_sample}] rank_method: [{rank_method}]-------------")
# make a dict to count the intensity of each protein, intensity sahred by peptides will be divided by the number of peptides
if by_sample:
for sample in self.tfa.sample_list:
# update the dict for each sample
print(f'Creating protein rank dict for [{sample}] by shared intensity', end='\r')
self._update_protein_rank_dict(sample_name = sample, rank_method = rank_method)
self._sum_protein_razor(sample, by_sample)
self._sum_protein_rank(sample, by_sample)

else: # without sample
# only need to create the dict once
Expand All @@ -51,8 +55,12 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
self._update_protein_rank_dict(sample_name = None, rank_method = rank_method)

for sample in self.tfa.sample_list:
self._sum_protein_razor(sample, by_sample)

self._sum_protein_rank(sample, by_sample)
elif method == 'razor':
print('start to sum protein intensity using method: [razor]')
# use Set Cover Problem to get the protein list, then sum the intensity
pep_to_protein = self._create_pep_to_protein_razor()
self._sum_protein_razor(pep_to_protein)

elif method == 'anti-razor':
print(f"\n-------------Start to sum protein intensity using method: [{method}] by_sample: [True] rank_method: [Shared]-------------")
Expand All @@ -77,6 +85,82 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
return res_df


def _create_pep_to_protein_razor(self) -> dict:
"""
Create a dictionary mapping peptides to proteins based on a minimum protein set.
Returns:
dict: A dictionary mapping peptides to proteins.
key: peptide
value: a list of proteins
"""
# crate a function to find the minimum protein set
def find_minimum_protein_set(peptides, protein_to_peptides):
print('Start to create protein dict using "Set Cover Problem"')
peptides_to_cover = set(peptides)
selected_proteins = set()

with tqdm(total=len(peptides_to_cover), desc="Covering peptides") as pbar:
while peptides_to_cover:
best_protein = None
peptides_covered_by_best = set()
for protein, covered_peptides in protein_to_peptides.items():
covered = peptides_to_cover & covered_peptides
if len(covered) > len(peptides_covered_by_best):
best_protein = protein
peptides_covered_by_best = covered

if not best_protein:
break

selected_proteins.add(best_protein)
peptides_to_cover -= peptides_covered_by_best
pbar.update(len(peptides_covered_by_best))

return selected_proteins

df = self.df.loc[:, [self.tfa.peptide_col_name, self.tfa.protein_col_name]]
# Create a dictionary mapping proteins to peptides
protein_to_peptides = defaultdict(set)
peptides = set(df[self.tfa.peptide_col_name])

for _, row in tqdm(df.iterrows(), total=df.shape[0], desc="Creating protein to peptides mapping"):
sequence = row[self.tfa.peptide_col_name]
proteins = row[self.tfa.protein_col_name].split(';')
for protein in proteins:
protein_to_peptides[protein].add(sequence)

mini_protein_set = find_minimum_protein_set(peptides, protein_to_peptides)

# remove the proteins not in the mini_protein_set from the protein_to_peptides
filtered_protein_to_peptides = {protein: protein_to_peptides[protein] for protein in mini_protein_set}
# Assign each peptide to the protein that contains it with the highest peptide count
print('Assigning peptides to proteins')
peptide_to_protein = defaultdict(list)
for peptide in tqdm(peptides, desc="Assigning peptides to proteins"):
possible_proteins = [protein for protein, peps in filtered_protein_to_peptides.items() if peptide in peps]
if possible_proteins:
# 找到包含该肽最多的蛋白质
max_protein_count = max(len(filtered_protein_to_peptides[protein]) for protein in possible_proteins)
best_proteins = [protein for protein in possible_proteins if len(filtered_protein_to_peptides[protein]) == max_protein_count]
peptide_to_protein[peptide].extend(best_proteins)

return peptide_to_protein

def _sum_protein_razor(self, peptide_to_protein: dict):

for sample in tqdm(self.tfa.sample_list):
print(f'Assigning protein intensity for [{sample}]')
df = self.df.loc[:,[ self.tfa.peptide_col_name, sample]]
# create a dict to store the intensity of each peptide
df.set_index(self.tfa.peptide_col_name, inplace=True)
peptide_intensity_dict = df.to_dict()[sample]
for peptide, proteins in peptide_to_protein.items():
intensity = peptide_intensity_dict[peptide]
self._update_output_dict(proteins, sample, intensity)



def _init_dicts(self):
for sample in self.tfa.sample_list:
self.res_intensity_dict[sample] = {}
Expand Down Expand Up @@ -147,7 +231,7 @@ def _update_output_dict(self, protein_list: list, sample_name:str, intensity:flo
self.res_intensity_dict[sample_name][protein] = intensity


def _sum_protein_razor(self, sample_name:str, by_sample=False):
def _sum_protein_rank(self, sample_name:str, by_sample=False):
# print in one line
print(f'Asigning protein intensity for [{sample_name}]', end='\r')
df = self.df.loc[:,[ self.tfa.protein_col_name, sample_name]]
Expand Down Expand Up @@ -180,7 +264,4 @@ def _sum_protein_anti_razor(self, sample_name:str):
for row in df.itertuples():
proteins = row[1].split(';')
intensity = row[2]
self._update_output_dict(proteins, sample_name, intensity)



self._update_output_dict(proteins, sample_name, intensity)
8 changes: 4 additions & 4 deletions utils/GUI.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,21 +831,21 @@ def change_theme(self, theme, silent=False):

def change_event_checkBox_create_protein_table(self):
if self.checkBox_create_protein_table.isChecked():
self.checkBox_infrence_protein_by_sample.setEnabled(True)
self.comboBox_protein_ranking_method.setEnabled(True)
# self.checkBox_infrence_protein_by_sample.setEnabled(True)
# self.comboBox_protein_ranking_method.setEnabled(True)
self.comboBox_method_of_protein_inference.setEnabled(True)
else:
self.comboBox_method_of_protein_inference.setEnabled(False)
self.checkBox_infrence_protein_by_sample.setEnabled(False)
self.comboBox_protein_ranking_method.setEnabled(False)

def update_method_of_protein_inference(self):
if self.comboBox_method_of_protein_inference.currentText() == "anti-razor":
if self.comboBox_method_of_protein_inference.currentText() in ["razor", "anti-razor"]:
# set checked
self.checkBox_infrence_protein_by_sample.setChecked(True)
self.checkBox_infrence_protein_by_sample.setEnabled(False)
self.comboBox_protein_ranking_method.setEnabled(False)
else:
else: # method is ["rank"]
self.checkBox_infrence_protein_by_sample.setEnabled(True)
self.comboBox_protein_ranking_method.setEnabled(True)
self.checkBox_infrence_protein_by_sample.setChecked(False)
Expand Down
11 changes: 8 additions & 3 deletions utils/MetaX_GUI/MainWindow.ui
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
<enum>Qt::LeftToRight</enum>
</property>
<property name="currentIndex">
<number>6</number>
<number>2</number>
</property>
<widget class="QWidget" name="tab">
<attribute name="title">
Expand Down Expand Up @@ -798,6 +798,11 @@
<string>anti-razor</string>
</property>
</item>
<item>
<property name="text">
<string>rank</string>
</property>
</item>
</widget>
</item>
<item row="1" column="2">
Expand Down Expand Up @@ -1315,7 +1320,7 @@
<item row="1" column="0">
<widget class="QTabWidget" name="tabWidget_4">
<property name="currentIndex">
<number>1</number>
<number>0</number>
</property>
<widget class="QWidget" name="tab_12">
<attribute name="title">
Expand Down Expand Up @@ -8026,7 +8031,7 @@
<x>0</x>
<y>0</y>
<width>1059</width>
<height>21</height>
<height>23</height>
</rect>
</property>
<widget class="QMenu" name="menuTools">
Expand Down
10 changes: 6 additions & 4 deletions utils/MetaX_GUI/Ui_MainWindow.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'c:\Users\Qing\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\utils\MetaX_GUI\MainWindow.ui'
# Form implementation generated from reading ui file 'c:\Users\max\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\utils\MetaX_GUI\MainWindow.ui'
#
# Created by: PyQt5 UI code generator 5.15.9
#
Expand Down Expand Up @@ -438,6 +438,7 @@ def setupUi(self, metaX_main):
self.comboBox_method_of_protein_inference.setObjectName("comboBox_method_of_protein_inference")
self.comboBox_method_of_protein_inference.addItem("")
self.comboBox_method_of_protein_inference.addItem("")
self.comboBox_method_of_protein_inference.addItem("")
self.gridLayout_37.addWidget(self.comboBox_method_of_protein_inference, 0, 2, 1, 1)
self.horizontalLayout_2 = QtWidgets.QHBoxLayout()
self.horizontalLayout_2.setObjectName("horizontalLayout_2")
Expand Down Expand Up @@ -4208,7 +4209,7 @@ def setupUi(self, metaX_main):
self.statusbar.setObjectName("statusbar")
metaX_main.setStatusBar(self.statusbar)
self.menuBar = QtWidgets.QMenuBar(metaX_main)
self.menuBar.setGeometry(QtCore.QRect(0, 0, 1059, 21))
self.menuBar.setGeometry(QtCore.QRect(0, 0, 1059, 23))
self.menuBar.setObjectName("menuBar")
self.menuTools = QtWidgets.QMenu(self.menuBar)
self.menuTools.setObjectName("menuTools")
Expand Down Expand Up @@ -4268,9 +4269,9 @@ def setupUi(self, metaX_main):

self.retranslateUi(metaX_main)
self.stackedWidget.setCurrentIndex(0)
self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(6)
self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(2)
self.toolBox_2.setCurrentIndex(0)
self.tabWidget_4.setCurrentIndex(1)
self.tabWidget_4.setCurrentIndex(0)
self.tabWidget_3.setCurrentIndex(0)
self.tabWidget.setCurrentIndex(1)
self.tabWidget_2.setCurrentIndex(1)
Expand Down Expand Up @@ -4369,6 +4370,7 @@ def retranslateUi(self, metaX_main):
self.checkBox_infrence_protein_by_sample.setText(_translate("metaX_main", "Inference by each Sample"))
self.comboBox_method_of_protein_inference.setItemText(0, _translate("metaX_main", "razor"))
self.comboBox_method_of_protein_inference.setItemText(1, _translate("metaX_main", "anti-razor"))
self.comboBox_method_of_protein_inference.setItemText(2, _translate("metaX_main", "rank"))
self.label_136.setText(_translate("metaX_main", "Protein Ranking Method"))
self.comboBox_protein_ranking_method.setItemText(0, _translate("metaX_main", "unique_counts"))
self.comboBox_protein_ranking_method.setItemText(1, _translate("metaX_main", "all_counts"))
Expand Down
2 changes: 1 addition & 1 deletion utils/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = '1.107.6'
__version__ = '1.107.7'
API_version = '1'

0 comments on commit a2c1b7c

Please sign in to comment.