diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md index 3c07c5b..9e78d66 100644 --- a/Docs/ChangeLog.md +++ b/Docs/ChangeLog.md @@ -1,7 +1,8 @@ # Version: 1.115.4 ## Date: 2024-10-07 ### Changes: -- TODO: use the peptide number for 'self.peptide_num_used' after filtering the minimum peptide number +- Fix: Fixed the bug of when use Anydata moed, the report will raise error. +- Change: changed the approche of filter the minimum number of peptides threshold for the protein.(Avaliable for Razor and Anti-Razor method) # Version: 1.115.3 ## Date: 2024-10-04 diff --git a/metax/gui/main_gui.py b/metax/gui/main_gui.py index 73d4479..7230595 100644 --- a/metax/gui/main_gui.py +++ b/metax/gui/main_gui.py @@ -993,10 +993,14 @@ def update_method_of_protein_inference(self): self.checkBox_infrence_protein_by_sample.setChecked(True) self.checkBox_infrence_protein_by_sample.setEnabled(False) self.comboBox_protein_ranking_method.setEnabled(False) + # enable the peptide_num_threshold + self.spinBox_peptide_num_threshold_protein.setEnabled(True) else: # method is ["rank"] self.checkBox_infrence_protein_by_sample.setEnabled(True) self.comboBox_protein_ranking_method.setEnabled(True) self.checkBox_infrence_protein_by_sample.setChecked(False) + # disable the peptide_num_threshold + self.spinBox_peptide_num_threshold_protein.setEnabled(False) @@ -1853,11 +1857,11 @@ def run_after_set_multi_tables(self): # Final message if self.tfa.any_df_mode: - num_item = self.tfa.custom_df.shape[0] + original_num_peptide = self.tfa.custom_df.shape[0] msg = f"""

Custom data is ready!

-

Number of items: [{num_item}]

+

Number of items: [{original_num_peptide}]

""" diff --git a/metax/gui/metax_gui/main_window.ui b/metax/gui/metax_gui/main_window.ui index 530ed76..818a6dc 100644 --- a/metax/gui/metax_gui/main_window.ui +++ b/metax/gui/metax_gui/main_window.ui @@ -46,7 +46,7 @@ Qt::LeftToRight - 2 + 4 false @@ -245,8 +245,8 @@ 0 0 - 391 - 80 + 528 + 534 @@ -1476,7 +1476,7 @@ 16777215 - 280 + 300 @@ -1505,7 +1505,7 @@ 0 0 - 660 + 1016 232 @@ -2759,7 +2759,7 @@ 16777215 - 280 + 300 @@ -2776,7 +2776,7 @@ 0 0 - 621 + 999 150 @@ -3750,7 +3750,7 @@ 16777215 - 240 + 280 @@ -3779,7 +3779,7 @@ 0 0 - 878 + 1020 128 @@ -4801,7 +4801,7 @@ QTabWidget::Triangular - 3 + 2 @@ -5144,7 +5144,7 @@ - + false @@ -5239,64 +5239,11 @@ - - - - Qt::Horizontal - - - - - - - false - - - Run Deseq2 - - - - - - - - - - - 0 - 0 - - - - Groups (Default all) - - - - - - - Control Group - - - - - - - - 0 - 0 - - - - Comparing in Each Condition - - - @@ -5321,8 +5268,61 @@ + + + + + 0 + 0 + + + + Comparing in Each Condition + + + + + + + + + + + 0 + 0 + + + + Groups (Default all) + + + + + + + Control Group + + + + + + + Qt::Horizontal + + + + + + + false + + + Run Deseq2 + + + @@ -6207,7 +6207,7 @@ 16777215 - 220 + 240 @@ -7437,7 +7437,7 @@ 16777215 - 220 + 240 @@ -7454,8 +7454,8 @@ 0 0 - 620 - 65 + 1016 + 105 @@ -7819,7 +7819,7 @@ QTabWidget::Triangular - 0 + 1 @@ -8146,7 +8146,7 @@ 16777215 - 220 + 240 @@ -9258,7 +9258,7 @@ 16777215 - 220 + 240 @@ -9275,8 +9275,8 @@ 0 0 - 383 - 68 + 1016 + 141 diff --git a/metax/gui/metax_gui/ui_main_window.py b/metax/gui/metax_gui/ui_main_window.py index efb045f..f1c6c9b 100644 --- a/metax/gui/metax_gui/ui_main_window.py +++ b/metax/gui/metax_gui/ui_main_window.py @@ -147,7 +147,7 @@ def setupUi(self, metaX_main): self.toolBox_2.setMaximumSize(QtCore.QSize(1677, 16777215)) self.toolBox_2.setObjectName("toolBox_2") self.page_2 = QtWidgets.QWidget() - self.page_2.setGeometry(QtCore.QRect(0, 0, 391, 80)) + self.page_2.setGeometry(QtCore.QRect(0, 0, 528, 534)) self.page_2.setObjectName("page_2") self.gridLayout_27 = QtWidgets.QGridLayout(self.page_2) self.gridLayout_27.setObjectName("gridLayout_27") @@ -730,7 +730,7 @@ def setupUi(self, metaX_main): self.line_7.setObjectName("line_7") self.gridLayout_26.addWidget(self.line_7, 1, 0, 1, 3) self.groupBox_basic_plot = QtWidgets.QGroupBox(self.tab_12) - self.groupBox_basic_plot.setMaximumSize(QtCore.QSize(16777215, 280)) + self.groupBox_basic_plot.setMaximumSize(QtCore.QSize(16777215, 300)) self.groupBox_basic_plot.setObjectName("groupBox_basic_plot") self.gridLayout_40 = QtWidgets.QGridLayout(self.groupBox_basic_plot) self.gridLayout_40.setObjectName("gridLayout_40") @@ -744,7 +744,7 @@ def setupUi(self, metaX_main): self.scrollArea.setWidgetResizable(True) self.scrollArea.setObjectName("scrollArea") self.scrollAreaWidgetContents = QtWidgets.QWidget() - self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 660, 232)) + self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 1016, 232)) self.scrollAreaWidgetContents.setObjectName("scrollAreaWidgetContents") self.gridLayout_34 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents) self.gridLayout_34.setObjectName("gridLayout_34") @@ -1400,7 +1400,7 @@ def setupUi(self, metaX_main): self.pushButton_basic_heatmap_add.setObjectName("pushButton_basic_heatmap_add") self.gridLayout_23.addWidget(self.pushButton_basic_heatmap_add, 5, 3, 1, 1) self.groupBox_basic_heatmap_plot_settings = QtWidgets.QGroupBox(self.tab_13) - self.groupBox_basic_heatmap_plot_settings.setMaximumSize(QtCore.QSize(16777215, 280)) + self.groupBox_basic_heatmap_plot_settings.setMaximumSize(QtCore.QSize(16777215, 300)) self.groupBox_basic_heatmap_plot_settings.setObjectName("groupBox_basic_heatmap_plot_settings") self.gridLayout_41 = QtWidgets.QGridLayout(self.groupBox_basic_heatmap_plot_settings) self.gridLayout_41.setObjectName("gridLayout_41") @@ -1408,7 +1408,7 @@ def setupUi(self, metaX_main): self.scrollArea_2.setWidgetResizable(True) self.scrollArea_2.setObjectName("scrollArea_2") self.scrollAreaWidgetContents_2 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 621, 150)) + self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 999, 150)) self.scrollAreaWidgetContents_2.setObjectName("scrollAreaWidgetContents_2") self.gridLayout_50 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_2) self.gridLayout_50.setObjectName("gridLayout_50") @@ -1943,7 +1943,7 @@ def setupUi(self, metaX_main): self.gridLayout_46.addWidget(self.checkBox_2, 1, 0, 1, 1) self.gridLayout_75.addLayout(self.gridLayout_46, 0, 0, 1, 1) self.groupBox_cross_heatmap_settings = QtWidgets.QGroupBox(self.groupBox_cross_heatmap_plot) - self.groupBox_cross_heatmap_settings.setMaximumSize(QtCore.QSize(16777215, 240)) + self.groupBox_cross_heatmap_settings.setMaximumSize(QtCore.QSize(16777215, 280)) self.groupBox_cross_heatmap_settings.setObjectName("groupBox_cross_heatmap_settings") self.gridLayout_52 = QtWidgets.QGridLayout(self.groupBox_cross_heatmap_settings) self.gridLayout_52.setObjectName("gridLayout_52") @@ -1957,7 +1957,7 @@ def setupUi(self, metaX_main): self.scrollArea_cross_heatmap_settings.setWidgetResizable(True) self.scrollArea_cross_heatmap_settings.setObjectName("scrollArea_cross_heatmap_settings") self.scrollAreaWidgetContents_3 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_3.setGeometry(QtCore.QRect(0, 0, 878, 128)) + self.scrollAreaWidgetContents_3.setGeometry(QtCore.QRect(0, 0, 1020, 128)) self.scrollAreaWidgetContents_3.setObjectName("scrollAreaWidgetContents_3") self.gridLayout_38 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_3) self.gridLayout_38.setObjectName("gridLayout_38") @@ -2591,7 +2591,7 @@ def setupUi(self, metaX_main): self.pushButton_dunnett_test = QtWidgets.QPushButton(self.tab_16) self.pushButton_dunnett_test.setEnabled(False) self.pushButton_dunnett_test.setObjectName("pushButton_dunnett_test") - self.gridLayout_33.addWidget(self.pushButton_dunnett_test, 10, 1, 1, 2) + self.gridLayout_33.addWidget(self.pushButton_dunnett_test, 10, 1, 1, 1) self.horizontalLayout_39 = QtWidgets.QHBoxLayout() self.horizontalLayout_39.setObjectName("horizontalLayout_39") self.label_112 = QtWidgets.QLabel(self.tab_16) @@ -2636,42 +2636,11 @@ def setupUi(self, metaX_main): self.horizontalLayout_73.addWidget(self.comboBox_group_control_condition_group) self.horizontalLayout_39.addLayout(self.horizontalLayout_73) self.gridLayout_33.addLayout(self.horizontalLayout_39, 1, 1, 1, 2) - self.line_26 = QtWidgets.QFrame(self.tab_16) - self.line_26.setFrameShape(QtWidgets.QFrame.HLine) - self.line_26.setFrameShadow(QtWidgets.QFrame.Sunken) - self.line_26.setObjectName("line_26") - self.gridLayout_33.addWidget(self.line_26, 9, 1, 1, 2) - self.pushButton_multi_deseq2 = QtWidgets.QPushButton(self.tab_16) - self.pushButton_multi_deseq2.setEnabled(False) - self.pushButton_multi_deseq2.setObjectName("pushButton_multi_deseq2") - self.gridLayout_33.addWidget(self.pushButton_multi_deseq2, 11, 1, 1, 2) self.gridLayout_72 = QtWidgets.QGridLayout() self.gridLayout_72.setObjectName("gridLayout_72") - self.horizontalLayout_dunnett_group = QtWidgets.QHBoxLayout() - self.horizontalLayout_dunnett_group.setObjectName("horizontalLayout_dunnett_group") - self.gridLayout_72.addLayout(self.horizontalLayout_dunnett_group, 1, 1, 1, 1) - self.label_114 = QtWidgets.QLabel(self.tab_16) - sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed) - sizePolicy.setHorizontalStretch(0) - sizePolicy.setVerticalStretch(0) - sizePolicy.setHeightForWidth(self.label_114.sizePolicy().hasHeightForWidth()) - self.label_114.setSizePolicy(sizePolicy) - self.label_114.setObjectName("label_114") - self.gridLayout_72.addWidget(self.label_114, 0, 1, 1, 1) self.comboBox_dunnett_control_group = QtWidgets.QComboBox(self.tab_16) self.comboBox_dunnett_control_group.setObjectName("comboBox_dunnett_control_group") self.gridLayout_72.addWidget(self.comboBox_dunnett_control_group, 1, 0, 1, 1) - self.label_115 = QtWidgets.QLabel(self.tab_16) - self.label_115.setObjectName("label_115") - self.gridLayout_72.addWidget(self.label_115, 0, 0, 1, 1) - self.checkBox_comparing_group_control_in_condition = QtWidgets.QCheckBox(self.tab_16) - sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed) - sizePolicy.setHorizontalStretch(0) - sizePolicy.setVerticalStretch(0) - sizePolicy.setHeightForWidth(self.checkBox_comparing_group_control_in_condition.sizePolicy().hasHeightForWidth()) - self.checkBox_comparing_group_control_in_condition.setSizePolicy(sizePolicy) - self.checkBox_comparing_group_control_in_condition.setObjectName("checkBox_comparing_group_control_in_condition") - self.gridLayout_72.addWidget(self.checkBox_comparing_group_control_in_condition, 2, 0, 1, 1) self.horizontalLayout_24 = QtWidgets.QHBoxLayout() self.horizontalLayout_24.setObjectName("horizontalLayout_24") self.label_140 = QtWidgets.QLabel(self.tab_16) @@ -2687,7 +2656,38 @@ def setupUi(self, metaX_main): self.comboBox_group_control_comparing_each_condition_meta.setObjectName("comboBox_group_control_comparing_each_condition_meta") self.horizontalLayout_24.addWidget(self.comboBox_group_control_comparing_each_condition_meta) self.gridLayout_72.addLayout(self.horizontalLayout_24, 2, 1, 1, 1) + self.checkBox_comparing_group_control_in_condition = QtWidgets.QCheckBox(self.tab_16) + sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed) + sizePolicy.setHorizontalStretch(0) + sizePolicy.setVerticalStretch(0) + sizePolicy.setHeightForWidth(self.checkBox_comparing_group_control_in_condition.sizePolicy().hasHeightForWidth()) + self.checkBox_comparing_group_control_in_condition.setSizePolicy(sizePolicy) + self.checkBox_comparing_group_control_in_condition.setObjectName("checkBox_comparing_group_control_in_condition") + self.gridLayout_72.addWidget(self.checkBox_comparing_group_control_in_condition, 2, 0, 1, 1) + self.horizontalLayout_dunnett_group = QtWidgets.QHBoxLayout() + self.horizontalLayout_dunnett_group.setObjectName("horizontalLayout_dunnett_group") + self.gridLayout_72.addLayout(self.horizontalLayout_dunnett_group, 1, 1, 1, 1) + self.label_114 = QtWidgets.QLabel(self.tab_16) + sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed) + sizePolicy.setHorizontalStretch(0) + sizePolicy.setVerticalStretch(0) + sizePolicy.setHeightForWidth(self.label_114.sizePolicy().hasHeightForWidth()) + self.label_114.setSizePolicy(sizePolicy) + self.label_114.setObjectName("label_114") + self.gridLayout_72.addWidget(self.label_114, 0, 1, 1, 1) + self.label_115 = QtWidgets.QLabel(self.tab_16) + self.label_115.setObjectName("label_115") + self.gridLayout_72.addWidget(self.label_115, 0, 0, 1, 1) self.gridLayout_33.addLayout(self.gridLayout_72, 4, 1, 1, 2) + self.line_26 = QtWidgets.QFrame(self.tab_16) + self.line_26.setFrameShape(QtWidgets.QFrame.HLine) + self.line_26.setFrameShadow(QtWidgets.QFrame.Sunken) + self.line_26.setObjectName("line_26") + self.gridLayout_33.addWidget(self.line_26, 9, 1, 1, 2) + self.pushButton_multi_deseq2 = QtWidgets.QPushButton(self.tab_16) + self.pushButton_multi_deseq2.setEnabled(False) + self.pushButton_multi_deseq2.setObjectName("pushButton_multi_deseq2") + self.gridLayout_33.addWidget(self.pushButton_multi_deseq2, 10, 2, 1, 1) self.tabWidget_3.addTab(self.tab_16, "") self.tab_19 = QtWidgets.QWidget() self.tab_19.setObjectName("tab_19") @@ -3184,7 +3184,7 @@ def setupUi(self, metaX_main): self.gridLayout_co_expr_sample.setObjectName("gridLayout_co_expr_sample") self.gridLayout_47.addLayout(self.gridLayout_co_expr_sample, 3, 1, 1, 3) self.groupBox_co_expression_plot_settings = QtWidgets.QGroupBox(self.tab_5) - self.groupBox_co_expression_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220)) + self.groupBox_co_expression_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240)) self.groupBox_co_expression_plot_settings.setObjectName("groupBox_co_expression_plot_settings") self.gridLayout_56 = QtWidgets.QGridLayout(self.groupBox_co_expression_plot_settings) self.gridLayout_56.setObjectName("gridLayout_56") @@ -3829,7 +3829,7 @@ def setupUi(self, metaX_main): self.label_100.setObjectName("label_100") self.gridLayout_24.addWidget(self.label_100, 5, 0, 1, 1) self.groupBox_expression_trends_plot_settings = QtWidgets.QGroupBox(self.tab_15) - self.groupBox_expression_trends_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220)) + self.groupBox_expression_trends_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240)) self.groupBox_expression_trends_plot_settings.setObjectName("groupBox_expression_trends_plot_settings") self.gridLayout_60 = QtWidgets.QGridLayout(self.groupBox_expression_trends_plot_settings) self.gridLayout_60.setObjectName("gridLayout_60") @@ -3837,7 +3837,7 @@ def setupUi(self, metaX_main): self.scrollArea_5.setWidgetResizable(True) self.scrollArea_5.setObjectName("scrollArea_5") self.scrollAreaWidgetContents_6 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 620, 65)) + self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 1016, 105)) self.scrollAreaWidgetContents_6.setObjectName("scrollAreaWidgetContents_6") self.gridLayout_57 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_6) self.gridLayout_57.setObjectName("gridLayout_57") @@ -4210,7 +4210,7 @@ def setupUi(self, metaX_main): self.label_149.setObjectName("label_149") self.gridLayout_4.addWidget(self.label_149, 0, 0, 1, 1) self.groupBox_taxa_func_link_plot_settings = QtWidgets.QGroupBox(self.tab_8) - self.groupBox_taxa_func_link_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220)) + self.groupBox_taxa_func_link_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240)) self.groupBox_taxa_func_link_plot_settings.setObjectName("groupBox_taxa_func_link_plot_settings") self.gridLayout_65 = QtWidgets.QGridLayout(self.groupBox_taxa_func_link_plot_settings) self.gridLayout_65.setObjectName("gridLayout_65") @@ -4838,7 +4838,7 @@ def setupUi(self, metaX_main): self.pushButton_plot_network.setObjectName("pushButton_plot_network") self.gridLayout_6.addWidget(self.pushButton_plot_network, 10, 1, 1, 3) self.groupBox_taxa_func_link_net_plot_settings = QtWidgets.QGroupBox(self.tab_9) - self.groupBox_taxa_func_link_net_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220)) + self.groupBox_taxa_func_link_net_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240)) self.groupBox_taxa_func_link_net_plot_settings.setObjectName("groupBox_taxa_func_link_net_plot_settings") self.gridLayout_63 = QtWidgets.QGridLayout(self.groupBox_taxa_func_link_net_plot_settings) self.gridLayout_63.setObjectName("gridLayout_63") @@ -4846,7 +4846,7 @@ def setupUi(self, metaX_main): self.scrollArea_7.setWidgetResizable(True) self.scrollArea_7.setObjectName("scrollArea_7") self.scrollAreaWidgetContents_8 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 383, 68)) + self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 1016, 141)) self.scrollAreaWidgetContents_8.setObjectName("scrollAreaWidgetContents_8") self.gridLayout_66 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_8) self.gridLayout_66.setObjectName("gridLayout_66") @@ -5417,12 +5417,12 @@ def setupUi(self, metaX_main): self.retranslateUi(metaX_main) self.stackedWidget.setCurrentIndex(0) - self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(2) + self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(4) self.toolBox_2.setCurrentIndex(0) self.tabWidget_4.setCurrentIndex(1) - self.tabWidget_3.setCurrentIndex(3) + self.tabWidget_3.setCurrentIndex(2) self.tabWidget.setCurrentIndex(1) - self.tabWidget_2.setCurrentIndex(0) + self.tabWidget_2.setCurrentIndex(1) self.tabWidget_6.setCurrentIndex(1) self.toolBox_metalab_res_anno.setCurrentIndex(0) self.tabWidget_5.setCurrentIndex(0) @@ -5839,11 +5839,11 @@ def retranslateUi(self, metaX_main): self.comboBox_table_for_dunnett.setItemText(3, _translate("metaX_main", "peptides")) self.label_113.setText(_translate("metaX_main", "Meta")) self.checkBox_group_control_in_condition.setText(_translate("metaX_main", "In Condition")) - self.pushButton_multi_deseq2.setText(_translate("metaX_main", "Run Deseq2")) + self.label_140.setText(_translate("metaX_main", " By:")) + self.checkBox_comparing_group_control_in_condition.setText(_translate("metaX_main", "Comparing in Each Condition")) self.label_114.setText(_translate("metaX_main", "Groups (Default all)")) self.label_115.setText(_translate("metaX_main", "Control Group")) - self.checkBox_comparing_group_control_in_condition.setText(_translate("metaX_main", "Comparing in Each Condition")) - self.label_140.setText(_translate("metaX_main", " By:")) + self.pushButton_multi_deseq2.setText(_translate("metaX_main", "Run Deseq2")) self.tabWidget_3.setTabText(self.tabWidget_3.indexOf(self.tab_16), _translate("metaX_main", "Group-Control TEST ")) self.label_166.setText(_translate("metaX_main", "Groups")) self.pushButton_deseq2.setText(_translate("metaX_main", "Run DESeq2")) diff --git a/metax/taxafunc_analyzer/analyzer.py b/metax/taxafunc_analyzer/analyzer.py index b21e774..540e421 100644 --- a/metax/taxafunc_analyzer/analyzer.py +++ b/metax/taxafunc_analyzer/analyzer.py @@ -54,6 +54,7 @@ def __init__( self.peptide_col_name = peptide_col_name self.protein_col_name = protein_col_name + self.protein_separator = ';' self.custom_col_name = custom_col_name self.sample_list: Optional[List[str]] = None self.meta_df: Optional[pd.DataFrame] = None @@ -78,6 +79,7 @@ def __init__( self.any_df_mode = any_df_mode # if True, the consider the TaxaFunc df as other_df self.custom_df: Optional[pd.DataFrame] = None # other df, any df that user want to add self.peptide_num_used = {'taxa': 0, 'func': 0, 'taxa_func': 0, 'protein': 0} + self.distinct_peptides_list: list|None = None self.split_func_status:bool = False self.split_func_sep:str = '' @@ -689,6 +691,18 @@ def run_lfq_for_taxa_func(self, df_taxa_func): return df_taxa_func + def calculate_distinct_peptides(self): #! NOT USED YET + # extract the peptide column and protein_col_name + print("Calculating distinct peptides list...") + extract_cols = [self.peptide_col_name, self.protein_col_name] + df = self.original_df[extract_cols] + separate_protein = self.protein_separator + df['protein_num'] = df[self.protein_col_name].apply(lambda x: len(x.split(separate_protein))) + df = df[df['protein_num'] == 1] + distinct_peptides = df[self.peptide_col_name].tolist() + self.distinct_peptides_list = distinct_peptides + + def update_data_preprocess_parameters(self, data_preprocess_params): normalize_method = data_preprocess_params['normalize_method'] @@ -706,13 +720,12 @@ def update_data_preprocess_parameters(self, data_preprocess_params): return data_preprocess_params - def filter_peptides_num_for_splited_func(self, df, peptide_num_threshold, df_type, distinct_threshold_mode=False): + def filter_peptides_num_for_splited_func(self, df, peptide_num_threshold, df_type): ''' Only for the splited func table or taxa_func table - df: the splited func table or taxa_func table which has been grouped, index is the func or taxa_func - peptide_num_threshold: the threshold of peptide number for each func or taxa_func - df_type: 'func' or 'taxa_func' - - distinct_threshold_mode: TODO ''' valid_df_types = ['func', 'taxa_func'] @@ -750,6 +763,49 @@ def filter_peptides_num(self, df, peptide_num_threshold, df_type, distinct_thres else: item_col = 'Taxon' if df_type == 'taxa' else self.func_name + # # if True: #! Need to be implemented + # if distinct_threshold_mode: + # if self.distinct_peptides_list is None: + # self.calculate_distinct_peptides() + + # peptides_in_taxa_func = defaultdict(list) + # peptides_in_taxa = defaultdict(list) + # peptides_in_func = defaultdict(list) + # skiped_peptides_list = [] + # for row in tqdm(df.itertuples(index=False), total=len(df), desc="Creating peptides_dict"): + # peptide = row[0] + # if peptide not in self.distinct_peptides_list: + # skiped_peptides_list.append(peptide) + # continue + + # if df_type == 'taxa': + # taxa = row[1] + # # Append peptide to taxa list + # peptides_in_taxa[taxa].append(peptide) + + # if self.split_func_status: + # func_list = [f.strip() for f in row[2].split(self.split_func_sep)] + # # Process each function in the func_list + # for func in func_list: + # peptides_in_func[func].append(peptide) + # taxa_func = f'{taxa}&&&&{func}' + # peptides_in_taxa_func[taxa_func].append(peptide) + # else: + # if df_type in ['func', 'taxa_func']: + # taxa = row[1] + # func = row[2] + # # Append peptide to func list + # peptides_in_func[func].append(peptide) + # # Create combined key for taxa_func + # taxa_func = f'{taxa}&&&&{func}' + # peptides_in_taxa_func[taxa_func].append(peptide) + + # peitides_dict = {'taxa': peptides_in_taxa, 'func': peptides_in_func, 'taxa_func': peptides_in_taxa_func} + # remove_list = [k for k, v in peitides_dict[df_type].items() if len(v) < peptide_num] + # skiped_peptides_list = set(skiped_peptides_list) + + + # else: # Group by item_col and filter based on peptide number dict_item_pep_num = df.groupby(item_col).size().to_dict() remove_list = [k for k, v in dict_item_pep_num.items() if v < peptide_num] @@ -761,7 +817,7 @@ def filter_peptides_num(self, df, peptide_num_threshold, df_type, distinct_thres df = df.drop('taxa_func', axis=1) self.peptide_num_used[df_type] = len(df) - print(f"Removed [{len(remove_list)} {df_type}] from [{df_original_len - len(df)} Peptides] with less than [{peptide_num}] peptides.") + print(f"Removed [{len(set((remove_list)))} {df_type}] from [{df_original_len - len(df)} Peptides] with less than [{peptide_num}] peptides.") return df @@ -819,7 +875,10 @@ def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00, self.peptide_num_used['protein'] = 0 sum_protein_params['quant_method'] = quant_method df_peptide_for_protein = self.detect_and_handle_outliers(df=self.original_df, **outlier_params) - self.protein_df = SumProteinIntensity(taxa_func_analyzer=self, df=df_peptide_for_protein).sum_protein_intensity( **sum_protein_params) + self.protein_df = SumProteinIntensity(taxa_func_analyzer=self, df=df_peptide_for_protein, + peptide_num_threshold=sum_protein_params['peptide_num_threshold'], + protein_separator = self.protein_separator + ).sum_protein_intensity( **sum_protein_params) self.protein_df = self.data_preprocess(df=self.protein_df,df_name = 'protein', **data_preprocess_params) @@ -1105,9 +1164,10 @@ def get_df(self, table_name:str = 'taxa'): 'batch_meta': 'None', 'processing_order': ['transform', 'normalize', 'batch']}, peptide_num_threshold = {'taxa': 3, 'func': 3, 'taxa_func': 3}, - keep_unknow_func=False, sum_protein=False, - sum_protein_params = {'method': 'razor', 'by_sample': False, 'rank_method': 'unique_counts', 'greedy_method': 'heap', 'peptide_num_threshold': 3}, - split_func=True, split_func_params = {'split_by': '|', 'share_intensity': False}, + keep_unknow_func=False, + sum_protein=True, + sum_protein_params = {'method': 'anti-razor', 'by_sample': False, 'rank_method': 'unique_counts', 'greedy_method': 'heap', 'peptide_num_threshold': 3}, + split_func=False, split_func_params = {'split_by': '|', 'share_intensity': False}, taxa_and_func_only_from_otf=False, quant_method='sum' ) diff --git a/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py b/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py index 7b20bda..5caf2d1 100644 --- a/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py +++ b/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py @@ -130,7 +130,8 @@ def get_mini_target_set(self, greedy_method='heap'): self.greedy_method = greedy_method print('Start to get minimum target set using method: [razor]') # only extract the peptide and target columns - extract_cols = [self.column_map['peptide'], self.column_map['target']] + self.column_map['sample_list'] if self.column_map['sample_list'] else [] + extract_cols = [self.column_map['peptide'], self.column_map['target']] + extract_cols = extract_cols + self.column_map['sample_list'] if self.column_map['sample_list'] else extract_cols # if NA in target column, or '', raise error if self.df[self.column_map['target']].isna().any() or '' in self.df[self.column_map['target']].values: raise ValueError(f'NA or empty value in target column: {self.column_map["target"]}') diff --git a/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py b/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py index 17209aa..d6c0e26 100644 --- a/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py +++ b/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py @@ -69,7 +69,7 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un greedy_method: str, default 'heap'. only used for `razor` method options: ['greedy', 'heap'] peptide_num_threshold: int, default None - the protein must have at least 3 peptides to be considered as a target + the protein must have at least number peptides to be considered as a target quant_method: str, default 'sum' options: ['sum', 'lfq'] ''' @@ -82,23 +82,10 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un if peptide_num_threshold is not None: self.peptide_num_threshold = peptide_num_threshold - # remove the protein with less than the threshold of peptides - # use teh methood in RazorSum - razor_integrator = RazorSum(df=self.df, - column_map={ - 'peptide': self.tfa.peptide_col_name, - 'target': self.tfa.protein_col_name, - 'sample_list': self.tfa.sample_list, - }, - peptide_num_threshold=self.peptide_num_threshold, - share_intensity=self.share_intensity, - greedy_method=greedy_method, - protein_separator= self.protein_separator) - self.rank_method = rank_method self.check_protein_col() - self.df = razor_integrator.remove_protein_less_than_threshold() + #innitialize the peptide number used as the total number of peptides self.tfa.peptide_num_used['protein'] = len(self.df) if method == 'rank': @@ -121,8 +108,19 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un self._sum_protein_rank(sample, by_sample) elif method == 'razor': print('start to sum protein intensity using method: [razor]') + # use teh methood in RazorSum + razor_integrator = RazorSum(df=self.df, + column_map={ + 'peptide': self.tfa.peptide_col_name, + 'target': self.tfa.protein_col_name, + 'sample_list': self.tfa.sample_list, + }, + peptide_num_threshold=self.peptide_num_threshold, + share_intensity=self.share_intensity, + greedy_method=greedy_method, + protein_separator= self.protein_separator) if quant_method == 'sum': - razor_integrator.peptide_num_threshold = 1 # set the threshold to 1, to avoid run filter again + # razor_integrator.peptide_num_threshold = 1 # set the threshold to 1, to avoid run filter again res_df = razor_integrator.sum_protein_intensity(greedy_method=greedy_method) elif quant_method == 'lfq': from .lfq import run_lfq @@ -137,10 +135,13 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un # move teh 2 columns to the front res_df = res_df[['peptides', 'peptide_num'] + [col for col in res_df.columns if col not in ['peptides', 'peptide_num']]] + self.tfa.peptide_num_used['protein'] = len(razor_integrator.df) return res_df elif method == 'anti-razor': print(f"\n-------------Start to sum protein intensity using method: [{method}] by_sample: [True] rank_method: [Shared]-------------") + #calculate the peptide number for each protein + self.filter_protein_by_peptide_num() for sample in self.tfa.sample_list: self._sum_protein_anti_razor(sample) @@ -161,6 +162,30 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un return res_df + + def filter_protein_by_peptide_num(self): + if self.peptide_num_threshold < 2: + return self.df + else: + peptide_col_name = self.tfa.peptide_col_name + protein_col_name = self.tfa.protein_col_name + df= self.df.copy() + target_to_peptides = defaultdict(set) + for _, row in tqdm(df.iterrows(), total=df.shape[0], desc="Creating target to peptides mapping"): + sequence = row[peptide_col_name] + targets = row[protein_col_name].split(self.protein_separator) + for target in targets: + target_to_peptides[target].add(sequence) + proteins_less_than_threshold = [target for target, peps in target_to_peptides.items() if len(peps) < self.peptide_num_threshold] + print(f'Number of proteins with less than {self.peptide_num_threshold} peptides: {len(proteins_less_than_threshold)}') + # remove the proteins with less than 3 peptides from the protein column of the df + df[protein_col_name] = df[protein_col_name].apply(lambda x: ';'.join([protein for protein in x.split(self.protein_separator) if protein not in proteins_less_than_threshold])) + self.df[protein_col_name] = df[protein_col_name] + # remove the row with empty protein + self.df = self.df[self.df[protein_col_name].str.strip() != ''] + self.tfa.peptide_num_used['protein'] = len(self.df) + return self.df + # razor method def find_minimum_protein_set(self, peptides, protein_to_peptides): protein_to_peptides_copy = protein_to_peptides.copy() diff --git a/pyproject.toml b/pyproject.toml index b99ae8e..bb5194f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "MetaXTools" -version = "1.115.3" +version = "1.115.4" description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics." readme = "README_PyPi.md" license = { text = "NorthOmics" }