From 3cccd4ebfeeb52fc7298f3c790af87d32fc3d5c9 Mon Sep 17 00:00:00 2001
From: Qing <44231502+byemaxx@users.noreply.github.com>
Date: Mon, 12 Aug 2024 14:19:48 -0400
Subject: [PATCH 1/2] - Fix: 1. Fixed the group order was not correct in the
 title of the volcano plot. 2. col scale bug when plot the basic heatmap. -
 Change: 1. Enable alpha/beta divversity for all type of tables. 2. Only
 asiign peptide to one protein rather than sahre the intensity when sum
 peptide to protein by razor method.

---
 Docs/ChangeLog.md                             |   7 +
 metax/gui/main_gui.py                         |  22 +-
 metax/gui/metax_gui/main_window.ui            |  34 +--
 metax/gui/metax_gui/ui_main_window.py         |  26 +--
 metax/peptide_annotator/convert_id_to_name.py |  16 +-
 metax/taxafunc_analyzer/analyzer.py           |   1 +
 .../analyzer_utils/sum_protein_intensity.py   |  25 +-
 metax/taxafunc_ploter/diversity_plot.py       |  21 +-
 metax/taxafunc_ploter/heatmap_plot.py         |   7 +-
 metax/taxafunc_ploter/volcano_plot.py         |  18 +-
 metax/taxafunc_ploter/volcano_plot_js.py      |   8 +-
 metax/utils/scripts/razor_sum.py              | 214 ++++++++++++++++++
 metax/utils/version.py                        |   2 +-
 13 files changed, 336 insertions(+), 65 deletions(-)
 create mode 100644 metax/utils/scripts/razor_sum.py

diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md
index 6b68082..c078e15 100644
--- a/Docs/ChangeLog.md
+++ b/Docs/ChangeLog.md
@@ -1,3 +1,10 @@
+# Version: 1.110.0
+## Date: 2024-08-12
+### Changes:
+- Fix: 1. Fixed the group order was not correct in the title of the volcano plot. 2. col scale bug when plot the basic heatmap.
+- Change: 1. Enable alpha/beta divversity for all type of tables. 2. Only asiign peptide to one protein rather than sahre the intensity when sum peptide to protein by razor method.
+
+
 # Version: 1.109.12
 ## Date: 2024-08-10
 ### Changes:
diff --git a/metax/gui/main_gui.py b/metax/gui/main_gui.py
index 2d48dbe..771f677 100644
--- a/metax/gui/main_gui.py
+++ b/metax/gui/main_gui.py
@@ -592,8 +592,11 @@ def get_list_by_df_type(self, df_type:str, remove_no_linked:bool=False, silent:b
         return res_list
             
     def change_event_checkBox_basic_plot_table(self):
-        taxa_only_button_list = [self.pushButton_plot_alpha_div, self.pushButton_plot_beta_div, 
-                                 self.pushButton_plot_sunburst, self.pushButton_plot_basic_treemap]
+        taxa_only_button_list = [
+                                # self.pushButton_plot_alpha_div, 
+                                # self.pushButton_plot_beta_div, 
+                                 self.pushButton_plot_sunburst, 
+                                 self.pushButton_plot_basic_treemap]
         
         taxa_func_button_list = [self.pushButton_plot_basic_sankey]
 
@@ -2984,6 +2987,8 @@ def enable_multi_button(self, state=True):
         self.pushButton_trends_clean_list,
         self.comboBox_trends_table,
         self.pushButton_plot_pca_js,
+        self.pushButton_plot_alpha_div, 
+        self.pushButton_plot_beta_div,
         self.pushButton_trends_add_a_list,
         self.pushButton_co_expr_add_a_list,
         self.pushButton_basic_heatmap_add_a_list,
@@ -4217,8 +4222,9 @@ def get_title_by_table_name(self, table_name):
                                                              width=width, height=height, font_size=font_size, 
                                                              plot_all_samples=plot_all_samples, theme=theme,
                                                              sub_meta = sub_meta, show_fliers = show_fliers,
-                                                             legend_col_num=legend_col_num, rename_sample = rename_sample)
-                self.update_table_dict('alpha_diversity', aplha_diversity_df)
+                                                             legend_col_num=legend_col_num, rename_sample = rename_sample, 
+                                                             df_type=table_name, title_name=title_name)
+                self.update_table_dict(f'alpha_diversity({title_name})', aplha_diversity_df)
             elif method == "beta_div":
                 self.show_message('Beta diversity is running, please wait...')
                 metric = self.comboBox_beta_div_method.currentText()
@@ -4227,8 +4233,8 @@ def get_title_by_table_name(self, table_name):
                                                             rename_sample = rename_sample,
                                                             show_label = show_label, adjust_label = adjust_label, 
                                                             theme=theme,sub_meta = sub_meta, legend_col_num=legend_col_num,
-                                                            dot_size = dot_size)
-                self.update_table_dict('beta_diversity_distance_matrix', beta_diversity_distance_matrix)
+                                                            dot_size = dot_size, df_type=table_name, title_name=title_name)
+                self.update_table_dict(f'beta_diversity_distance_matrix({title_name})', beta_diversity_distance_matrix)
                                                             
 
             elif method == 'sunburst':
@@ -4926,7 +4932,7 @@ def plot_deseq2_volcano(self):
             height = self.spinBox_fc_plot_height.value()
             group1 = self.comboBox_deseq2_group1.currentText()
             group2 = self.comboBox_deseq2_group2.currentText()
-            title_name = f'{group1} vs {group2} of {table_name.split("(")[1].split(")")[0]}'
+            title_name = f'{group2} vs {group1} of {table_name.split("(")[1].split(")")[0]}'
             font_size = self.spinBox_deseq2_font_size.value()
             dot_size = self.spinBox_deseq2_dot_size.value()
             plot_js = self.checkBox_deseq2_js_volcano.isChecked()
@@ -5085,7 +5091,7 @@ def deseq2_plot_sankey(self):
             return None
         try:
             df = self.table_dict[table_name]
-            title_name = f'{group1} vs {group2} of {table_name.split("(")[1].split(")")[0]}'
+            title_name = f'{group2} vs {group1} of {table_name.split("(")[1].split(")")[0]}'
 
             pic = SankeyPlot(self.tfa, theme=self.html_theme).plot_fc_sankey(df, width=width, height=height, pvalue=pvalue, p_type = p_type,
                                                       log2fc_min=log2fc_min, log2fc_max=log2fc_max, title =title_name, font_size=font_size)
diff --git a/metax/gui/metax_gui/main_window.ui b/metax/gui/metax_gui/main_window.ui
index 2844d1d..b586e1c 100644
--- a/metax/gui/metax_gui/main_window.ui
+++ b/metax/gui/metax_gui/main_window.ui
@@ -46,7 +46,7 @@
            <enum>Qt::LeftToRight</enum>
           </property>
           <property name="currentIndex">
-           <number>4</number>
+           <number>3</number>
           </property>
           <property name="documentMode">
            <bool>false</bool>
@@ -245,8 +245,8 @@
                 <rect>
                  <x>0</x>
                  <y>0</y>
-                 <width>528</width>
-                 <height>573</height>
+                 <width>391</width>
+                 <height>80</height>
                 </rect>
                </property>
                <attribute name="label">
@@ -1400,7 +1400,7 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>1016</width>
+                        <width>660</width>
                         <height>232</height>
                        </rect>
                       </property>
@@ -2672,7 +2672,7 @@
                         <x>0</x>
                         <y>0</y>
                         <width>1016</width>
-                        <height>162</height>
+                        <height>158</height>
                        </rect>
                       </property>
                       <layout class="QGridLayout" name="gridLayout_50">
@@ -2808,7 +2808,7 @@
                              </item>
                              <item>
                               <property name="text">
-                               <string>column</string>
+                               <string>col</string>
                               </property>
                              </item>
                              <item>
@@ -3674,7 +3674,7 @@
                       <rect>
                        <x>0</x>
                        <y>0</y>
-                       <width>1003</width>
+                       <width>1020</width>
                        <height>126</height>
                       </rect>
                      </property>
@@ -5543,7 +5543,7 @@
                            <x>0</x>
                            <y>0</y>
                            <width>996</width>
-                           <height>146</height>
+                           <height>140</height>
                           </rect>
                          </property>
                          <layout class="QGridLayout" name="gridLayout_68">
@@ -6093,8 +6093,8 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>1016</width>
-                        <height>181</height>
+                        <width>493</width>
+                        <height>128</height>
                        </rect>
                       </property>
                       <layout class="QGridLayout" name="gridLayout_49">
@@ -7323,8 +7323,8 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>1016</width>
-                        <height>144</height>
+                        <width>538</width>
+                        <height>63</height>
                        </rect>
                       </property>
                       <layout class="QGridLayout" name="gridLayout_57">
@@ -8171,8 +8171,8 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>1016</width>
-                        <height>185</height>
+                        <width>775</width>
+                        <height>102</height>
                        </rect>
                       </property>
                       <layout class="QGridLayout" name="gridLayout_69">
@@ -9091,8 +9091,8 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>1016</width>
-                        <height>168</height>
+                        <width>383</width>
+                        <height>68</height>
                        </rect>
                       </property>
                       <layout class="QGridLayout" name="gridLayout_66">
@@ -10086,7 +10086,7 @@
      <x>0</x>
      <y>0</y>
      <width>1122</width>
-     <height>21</height>
+     <height>23</height>
     </rect>
    </property>
    <widget class="QMenu" name="menuTools">
diff --git a/metax/gui/metax_gui/ui_main_window.py b/metax/gui/metax_gui/ui_main_window.py
index bf9e99b..e4c1333 100644
--- a/metax/gui/metax_gui/ui_main_window.py
+++ b/metax/gui/metax_gui/ui_main_window.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-# Form implementation generated from reading ui file 'c:\Users\Qing\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\metax\gui\metax_gui\main_window.ui'
+# Form implementation generated from reading ui file 'c:\Users\max\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\metax\gui\metax_gui\main_window.ui'
 #
 # Created by: PyQt5 UI code generator 5.15.9
 #
@@ -147,7 +147,7 @@ def setupUi(self, metaX_main):
         self.toolBox_2.setMaximumSize(QtCore.QSize(1677, 16777215))
         self.toolBox_2.setObjectName("toolBox_2")
         self.page_2 = QtWidgets.QWidget()
-        self.page_2.setGeometry(QtCore.QRect(0, 0, 528, 573))
+        self.page_2.setGeometry(QtCore.QRect(0, 0, 391, 80))
         self.page_2.setObjectName("page_2")
         self.gridLayout_27 = QtWidgets.QGridLayout(self.page_2)
         self.gridLayout_27.setObjectName("gridLayout_27")
@@ -706,7 +706,7 @@ def setupUi(self, metaX_main):
         self.scrollArea.setWidgetResizable(True)
         self.scrollArea.setObjectName("scrollArea")
         self.scrollAreaWidgetContents = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 1016, 232))
+        self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 660, 232))
         self.scrollAreaWidgetContents.setObjectName("scrollAreaWidgetContents")
         self.gridLayout_34 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents)
         self.gridLayout_34.setObjectName("gridLayout_34")
@@ -1370,7 +1370,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_2.setWidgetResizable(True)
         self.scrollArea_2.setObjectName("scrollArea_2")
         self.scrollAreaWidgetContents_2 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 1016, 162))
+        self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 1016, 158))
         self.scrollAreaWidgetContents_2.setObjectName("scrollAreaWidgetContents_2")
         self.gridLayout_50 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_2)
         self.gridLayout_50.setObjectName("gridLayout_50")
@@ -1919,7 +1919,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_cross_heatmap_settings.setWidgetResizable(True)
         self.scrollArea_cross_heatmap_settings.setObjectName("scrollArea_cross_heatmap_settings")
         self.scrollAreaWidgetContents_3 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_3.setGeometry(QtCore.QRect(0, 0, 1003, 126))
+        self.scrollAreaWidgetContents_3.setGeometry(QtCore.QRect(0, 0, 1020, 126))
         self.scrollAreaWidgetContents_3.setObjectName("scrollAreaWidgetContents_3")
         self.gridLayout_38 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_3)
         self.gridLayout_38.setObjectName("gridLayout_38")
@@ -2844,7 +2844,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_3.setWidgetResizable(True)
         self.scrollArea_3.setObjectName("scrollArea_3")
         self.scrollAreaWidgetContents_4 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_4.setGeometry(QtCore.QRect(0, 0, 996, 146))
+        self.scrollAreaWidgetContents_4.setGeometry(QtCore.QRect(0, 0, 996, 140))
         self.scrollAreaWidgetContents_4.setObjectName("scrollAreaWidgetContents_4")
         self.gridLayout_68 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_4)
         self.gridLayout_68.setObjectName("gridLayout_68")
@@ -3145,7 +3145,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_4.setWidgetResizable(True)
         self.scrollArea_4.setObjectName("scrollArea_4")
         self.scrollAreaWidgetContents_5 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_5.setGeometry(QtCore.QRect(0, 0, 1016, 181))
+        self.scrollAreaWidgetContents_5.setGeometry(QtCore.QRect(0, 0, 493, 128))
         self.scrollAreaWidgetContents_5.setObjectName("scrollAreaWidgetContents_5")
         self.gridLayout_49 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_5)
         self.gridLayout_49.setObjectName("gridLayout_49")
@@ -3790,7 +3790,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_5.setWidgetResizable(True)
         self.scrollArea_5.setObjectName("scrollArea_5")
         self.scrollAreaWidgetContents_6 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 1016, 144))
+        self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 538, 63))
         self.scrollAreaWidgetContents_6.setObjectName("scrollAreaWidgetContents_6")
         self.gridLayout_57 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_6)
         self.gridLayout_57.setObjectName("gridLayout_57")
@@ -4261,7 +4261,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_6.setWidgetResizable(True)
         self.scrollArea_6.setObjectName("scrollArea_6")
         self.scrollAreaWidgetContents_7 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_7.setGeometry(QtCore.QRect(0, 0, 1016, 185))
+        self.scrollAreaWidgetContents_7.setGeometry(QtCore.QRect(0, 0, 775, 102))
         self.scrollAreaWidgetContents_7.setObjectName("scrollAreaWidgetContents_7")
         self.gridLayout_69 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_7)
         self.gridLayout_69.setObjectName("gridLayout_69")
@@ -4765,7 +4765,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_7.setWidgetResizable(True)
         self.scrollArea_7.setObjectName("scrollArea_7")
         self.scrollAreaWidgetContents_8 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 1016, 168))
+        self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 383, 68))
         self.scrollAreaWidgetContents_8.setObjectName("scrollAreaWidgetContents_8")
         self.gridLayout_66 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_8)
         self.gridLayout_66.setObjectName("gridLayout_66")
@@ -5276,7 +5276,7 @@ def setupUi(self, metaX_main):
         self.statusbar.setObjectName("statusbar")
         metaX_main.setStatusBar(self.statusbar)
         self.menuBar = QtWidgets.QMenuBar(metaX_main)
-        self.menuBar.setGeometry(QtCore.QRect(0, 0, 1122, 21))
+        self.menuBar.setGeometry(QtCore.QRect(0, 0, 1122, 23))
         self.menuBar.setObjectName("menuBar")
         self.menuTools = QtWidgets.QMenu(self.menuBar)
         self.menuTools.setObjectName("menuTools")
@@ -5336,7 +5336,7 @@ def setupUi(self, metaX_main):
 
         self.retranslateUi(metaX_main)
         self.stackedWidget.setCurrentIndex(0)
-        self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(4)
+        self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(3)
         self.toolBox_2.setCurrentIndex(0)
         self.tabWidget_4.setCurrentIndex(1)
         self.tabWidget_3.setCurrentIndex(3)
@@ -5616,7 +5616,7 @@ def retranslateUi(self, metaX_main):
         self.label_186.setText(_translate("metaX_main", "Sankey"))
         self.label_31.setText(_translate("metaX_main", "Scale"))
         self.comboBox_basic_hetatmap_scale.setItemText(0, _translate("metaX_main", "row"))
-        self.comboBox_basic_hetatmap_scale.setItemText(1, _translate("metaX_main", "column"))
+        self.comboBox_basic_hetatmap_scale.setItemText(1, _translate("metaX_main", "col"))
         self.comboBox_basic_hetatmap_scale.setItemText(2, _translate("metaX_main", "all"))
         self.comboBox_basic_hetatmap_scale.setItemText(3, _translate("metaX_main", "None"))
         self.label_13.setText(_translate("metaX_main", "Theme"))
diff --git a/metax/peptide_annotator/convert_id_to_name.py b/metax/peptide_annotator/convert_id_to_name.py
index 783d720..45a5a28 100644
--- a/metax/peptide_annotator/convert_id_to_name.py
+++ b/metax/peptide_annotator/convert_id_to_name.py
@@ -177,15 +177,18 @@ def lookup_and_join(ec_nums, column_name):
     print("Add EC columns to df successfully!")
     return df
 
-def add_pathway_name_to_df(df: pd.DataFrame) -> pd.DataFrame:
-    def query_kegg(id_str, pathway_dict):
+def add_pathway_name_to_df(df: pd.DataFrame, kppe_id:bool = False) -> pd.DataFrame:
+    def query_kegg(id_str, pathway_dict, kppe_id=False):
         id_list = id_str.split(',')
         if id_list[0] == 'not_found':
             return 'not_found'
         pathway_list = []
         for id in id_list:
             if id in pathway_dict:
-                pathway_list.append(pathway_dict[id])
+                if kppe_id:
+                    pathway_list.append(f'{id}:{pathway_dict[id]}')
+                else:
+                    pathway_list.append(pathway_dict[id])
         # remove duplicates
         pathway_list = list(dict.fromkeys(pathway_list))
         if len(pathway_list) == 0:
@@ -199,9 +202,12 @@ def query_kegg(id_str, pathway_dict):
     if 'KEGG_Pathway' not in df.columns:
         print('KEGG_Pathway column does not exist!, return the original dataframe')
         return df
+    
+    #! fill the missing pathway names if necessary
+    # df['KEGG_Pathway'] = df['KEGG_Pathway'].fillna('not_found')
 
     pathway_dict = get_pathway_dict()
-    df.loc[:, 'KEGG_Pathway_name'] = df['KEGG_Pathway'].apply(lambda x: query_kegg(x, pathway_dict))
+    df.loc[:, 'KEGG_Pathway_name'] = df['KEGG_Pathway'].apply(lambda x: query_kegg(x, pathway_dict, kppe_id))
     df.loc[:, 'KEGG_Pathway_name_prop'] = df['KEGG_Pathway_prop']    
     print("Add KEGG_Pathway_name to df successfully!")
     return df
@@ -250,7 +256,7 @@ def query_ko(id_str, ko_dict):
 # if __name__ == '__main__':
 #     df_path = "MetaX/data/example_data/Example_OTF.tsv"
 #     df = pd.read_csv(df_path, sep='\t')
-#     df = add_pathway_name_to_df(df)
+#     df = add_pathway_name_to_df(df, kppe_id=True)
 #     df = add_ec_name_to_df(df)
 #     df = add_ko_name_to_df(df)
 #     df.to_csv("11.tsv", sep='\t', index=False)
\ No newline at end of file
diff --git a/metax/taxafunc_analyzer/analyzer.py b/metax/taxafunc_analyzer/analyzer.py
index 2c79fb4..4d4f616 100644
--- a/metax/taxafunc_analyzer/analyzer.py
+++ b/metax/taxafunc_analyzer/analyzer.py
@@ -791,6 +791,7 @@ def get_df(self, table_name:str = 'taxa'):
             "proteins": "protein_df",
             
         }
+        table_name = table_name.lower()
         dft = getattr(self, name_dict[table_name])
         # remove peptide_num column if exists
         if "peptide_num" in dft.columns:
diff --git a/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py b/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py
index 5a02486..8995e84 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py
@@ -28,7 +28,10 @@ def __init__(self, taxa_func_analyzer):
         self.df = self.tfa.original_df.loc[:, self.extract_col_name]
         self._init_dicts()
         self.greedy_method = None  # only used for razor method
-
+        self.share_intensity = False
+        self.__multi_target_count = 0
+        
+        
     def check_protein_col(self):
         # if any NA, '', or empty in the protein column, raise error
         if self.df[self.tfa.protein_col_name].isnull().values.any():
@@ -71,6 +74,8 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
             # use Set Cover Problem to get the protein list, then sum the intensity
             pep_to_protein = self._create_pep_to_protein_razor()
             self._sum_protein_razor(pep_to_protein)
+            self.__multi_target_count = self.__multi_target_count/len(self.tfa.sample_list)
+            print(f'Peptides with multiple targets: {self.__multi_target_count} ({self.__multi_target_count/len(pep_to_protein)*100:.2f}%)')
         
         elif method == 'anti-razor':
             print(f"\n-------------Start to sum protein intensity using method: [{method}]  by_sample: [True] rank_method: [Shared]-------------")    
@@ -269,13 +274,19 @@ def _update_output_dict(self, protein_list: list, sample_name:str, intensity:flo
             else:
                 self.res_intensity_dict[sample_name][protein] = intensity
         else:
-            intensity = intensity/len(protein_list)
-            for protein in protein_list:
-                if protein in self.res_intensity_dict[sample_name].keys():
+            if self.share_intensity:
+                intensity = intensity/len(protein_list)
+                for protein in protein_list:
+                    self.res_intensity_dict.setdefault(sample_name, {}).setdefault(protein, 0)
                     self.res_intensity_dict[sample_name][protein] += intensity
-                else:
-                    self.res_intensity_dict[sample_name][protein] = intensity
-                    
+            else:
+                self.__multi_target_count += 1
+                protein = protein_list[0]
+                self.res_intensity_dict.setdefault(sample_name, {}).setdefault(protein, 0)
+                self.res_intensity_dict[sample_name][protein] += intensity
+                
+                
+                
                     
     def _sum_protein_rank(self, sample_name:str, by_sample=False):
         # print in one line
diff --git a/metax/taxafunc_ploter/diversity_plot.py b/metax/taxafunc_ploter/diversity_plot.py
index fb89d86..966a6cc 100644
--- a/metax/taxafunc_ploter/diversity_plot.py
+++ b/metax/taxafunc_ploter/diversity_plot.py
@@ -27,10 +27,12 @@ def ace_with_threshold(self, row):
     def plot_alpha_diversity(self, metric:str='shannon', sample_list:list=None, 
                              width:int = 10, height:int = 8,  font_size:int = 10,
                              plot_all_samples:bool = False, theme:str = None, sub_meta:str = 'None',
-                             show_fliers = True, legend_col_num: int | None = None, rename_sample:bool = False
+                             show_fliers = True, legend_col_num: int | None = None, rename_sample:bool = False,
+                             df_type:str = 'taxa', title_name:str = "Table"
                              ):
         '''
         Calculate alpha diversity and plot boxplot\n
+        df_type: ['taxa', 'functions', 'taxa_functions', 
         return: (fig, aplha_diversity_df)
         '''
         if sample_list is None:
@@ -63,7 +65,8 @@ def plot_alpha_diversity(self, metric:str='shannon', sample_list:list=None,
             raise ValueError(f'Invalid metric: {metric}. Please choose from: {list(metric_dict.keys())}')
         
         try:
-            df = self.tfa.taxa_df.copy()
+            # df = self.tfa.taxa_df.copy()
+            df = self.tfa.get_df(df_type)
             df = df[sample_list]
             
             if metric == 'ace':
@@ -135,7 +138,8 @@ def plot_alpha_diversity(self, metric:str='shannon', sample_list:list=None,
             fig.set_yticklabels(fig.get_yticks(), fontsize=font_size)
             fig.set_xlabel('Group', fontsize=font_size)
             fig.set_ylabel(f'{metric} Index', fontsize=font_size)
-            fig.set_title(f'Alpha Diversity ({metric})', fontsize=font_size+2, fontweight='bold')
+            fig.set_title(f'Alpha Diversity of {title_name} ({metric})', 
+                          fontsize=font_size+2, fontweight='bold')
             if sub_meta:
                 if legend_col_num != 0:
                     plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0., 
@@ -171,7 +175,8 @@ def plot_beta_diversity(self, metric:str='braycurtis', sample_list:list|None=Non
                              width:int = 10, height:int = 8,  font_size:int = 10, 
                              font_transparency:float = 0.8, show_label:bool = False,rename_sample:bool = False,
                               adjust_label:bool = False , theme:str|None = None, sub_meta:str = "None", 
-                              legend_col_num: int | None = None, dot_size: float|None = None):
+                              legend_col_num: int | None = None, dot_size: float|None = None, df_type:str = 'taxa',
+                              title_name:str = "Table"):
         '''
         Calculate beta diversity and plot PCoA plot
         Return:(fig, distance_matrix)
@@ -200,7 +205,7 @@ def plot_beta_diversity(self, metric:str='braycurtis', sample_list:list|None=Non
             color_palette = None  # Let seaborn handle the color mapping
 
         try:
-            df = self.tfa.taxa_df.copy()
+            df = self.tfa.get_df(df_type)
             df = df[sample_list]
             df = df.T
             
@@ -225,8 +230,10 @@ def plot_beta_diversity(self, metric:str='braycurtis', sample_list:list|None=Non
             fig.set_ylabel("PC2 (%.2f%%)" % (pcoa_res.proportion_explained[1] * 100), fontsize=font_size)
             # set title
             num_legend = len(unique_groups) if sub_meta == 'None' else len(set(style_list)) + len(unique_groups)
-
-            plt.title(f'PCoA plot of {metric} distance (Total explained variation: {pcoa_res.proportion_explained[0] * 100 + pcoa_res.proportion_explained[1] * 100:.2f}%)', fontsize=font_size+2, fontweight='bold')
+            
+            title = f'PCoA plot of {metric} distance {title_name} (Total explained variation: {pcoa_res.proportion_explained[0] * 100 + pcoa_res.proportion_explained[1] * 100:.2f}%)'
+            plt.title(title, fontsize=font_size+2, fontweight='bold')
+            
             if legend_col_num != 0:
                 plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0.,
                         fontsize=font_size +2 , ncol= (num_legend//30 + 1) if legend_col_num is None else legend_col_num)
diff --git a/metax/taxafunc_ploter/heatmap_plot.py b/metax/taxafunc_ploter/heatmap_plot.py
index c961ff1..4c422d4 100644
--- a/metax/taxafunc_ploter/heatmap_plot.py
+++ b/metax/taxafunc_ploter/heatmap_plot.py
@@ -378,7 +378,7 @@ def plot_basic_heatmap(self,  df, title = 'Heatmap',fig_size:tuple|None = None,
 
         fig.ax_heatmap.set_xticklabels(fig.ax_heatmap.get_xmajorticklabels(), fontsize=font_size, rotation=90)
         fig.ax_heatmap.set_yticklabels(fig.ax_heatmap.get_ymajorticklabels(), fontsize=font_size, rotation=0)
-        title = f"{title} (scaled by {scale})"
+        title = f"{title} (scaled by {scale})" if scale not in [None, 'None'] else title
         plt.suptitle(title, weight='bold')
         
         cbar = fig.ax_heatmap.collections[0].colorbar
@@ -754,6 +754,11 @@ def scale_data(self, df: pd.DataFrame, scale_by: str|None = None, method: str|No
             return df
         
         df = df.copy()
+        
+        # convert 'column' to 'col' for consistency
+        if scale_by == 'column':
+            scale_by = 'col'
+             
         if scale_by not in ['row', 'col', 'all', 'none']:
             raise ValueError("scale_by must be 'row', 'col', 'all' or 'none'")
 
diff --git a/metax/taxafunc_ploter/volcano_plot.py b/metax/taxafunc_ploter/volcano_plot.py
index 8b10985..1571b96 100644
--- a/metax/taxafunc_ploter/volcano_plot.py
+++ b/metax/taxafunc_ploter/volcano_plot.py
@@ -10,7 +10,7 @@ def __init__(self):
 
         
     def plot_volcano(self, df_fc, pvalue: float = 0.05, p_type='padj', log2fc_min: float = 1, log2fc_max: float = 10,
-                     title_name='2 groups',font_size:int=12, width=8, height=6, dot_size=15, theme:str|None = None):
+                     title_name='2 groups',font_size:int=12, width=8, height=6, dot_size=15, theme:str|None = None, alpha=0.8):
         
         def color_mapping(type_value):
             if type_value == 'up':
@@ -42,14 +42,22 @@ def color_mapping(type_value):
             
             # create the volcano plot
             plt.figure(figsize=(width, height))
-            fig = sns.scatterplot(x=df['log2FoldChange'], y=-np.log10(df[p_type]), s=dot_size*10, hue=df['type'], alpha=0.8,
-                                palette={'up': '#d23918', 'down': '#68945c', 'ultra-up': '#663d74', 'ultra-down': '#206864', 'normal': '#6b798e'}, linewidth=0.5, edgecolor='black')
+            fig = sns.scatterplot(x=df['log2FoldChange'], y=-np.log10(df[p_type]), s=dot_size*10, hue=df['type'], alpha=alpha,
+                                # palette={'up': '#d23918', 'down': '#68945c', 'ultra-up': '#663d74', 'ultra-down': '#206864', 'normal': '#6b798e'}, 
+                                palette={'up': color_mapping('up'), 'down': color_mapping('down'), 'ultra-up': color_mapping('ultra-up'), 'ultra-down': color_mapping('ultra-down'), 'normal': color_mapping('normal')},
+                                linewidth=0.5, edgecolor='black')
             plt.axhline(y=-np.log10(pvalue), linestyle='--', color='grey', linewidth=1)  # padj line
             plt.axvline(x=-log2fc_min, linestyle='--', color='grey', linewidth=1)  # log2FoldChange line
             plt.axvline(x=log2fc_min, linestyle='--', color='grey', linewidth=1)   # log2FoldChange line
 
             # set the title and labels
-            fig.set_title(f'Volcano plot of {title_name} ({"padj" if p_type == "padj" else "pvalue"} < {pvalue}, |log2FoldChange| > {log2fc_min})', fontsize=font_size)
+            # if ultra-up or ultra-down is not in the data, then don't show it in the title
+            if len(df[df['type'].isin(['ultra-up', 'ultra-down'])]) == 0:
+                log2fc_title = f'|log2FoldChange| >= {log2fc_min}'
+            else:
+                log2fc_title = f'{log2fc_min} <= |log2FoldChange| < {log2fc_max}'
+                
+            fig.set_title(f'Volcano plot of {title_name} ({"padj" if p_type == "padj" else "pvalue"} <= {pvalue}, {log2fc_title})', fontsize=font_size)
             fig.set_xlabel('log2FoldChange', fontsize=font_size)
             fig.set_ylabel('-log10(padj)', fontsize=font_size)
             sns.despine(trim=True)
@@ -63,7 +71,7 @@ def color_mapping(type_value):
                 if count_dict[t] == 0:
                     continue
                 # set the size of dot as font size*10, because when the font size is small, the dot will be overlapped
-                h = plt.scatter([], [], s=font_size*10, color=color_mapping(t), alpha=0.8, linewidth=0.5, edgecolor='black')
+                h = plt.scatter([], [], s=font_size*10, color=color_mapping(t), alpha=alpha, linewidth=0.5, edgecolor='black')
                 handles.append(h)
                 labels.append(f'{t} ({count_dict[t]})')
             fig.legend(handles=handles, labels=labels,
diff --git a/metax/taxafunc_ploter/volcano_plot_js.py b/metax/taxafunc_ploter/volcano_plot_js.py
index 413e43b..eb00289 100644
--- a/metax/taxafunc_ploter/volcano_plot_js.py
+++ b/metax/taxafunc_ploter/volcano_plot_js.py
@@ -63,7 +63,13 @@ def color_mapping(type_value):
         scatter_ultra_down = df[df['type'] == 'ultra-down'].apply(lambda p: {'name': p['label'], 'value': [p['log2FoldChange'], p[p_type]]}, axis=1)
         Scatter_normal = df[df['type'] == 'normal'].apply(lambda p: {'name': p['label'], 'value': [p['log2FoldChange'], p[p_type]]}, axis=1)
 
-        title = f'Volcano plot of {title_name} ({p_type} <= {pvalue},  {log2fc_min} <= log2FoldChange < {log2fc_max})'
+        # if ultra-up or ultra-down is not in the data, then don't show it in the title
+        if len(df[df['type'].isin(['ultra-up', 'ultra-down'])]) == 0:
+            log2fc_title = f'|log2FoldChange| >= {log2fc_min}'
+        else:
+            log2fc_title = f'{log2fc_min} <= |log2FoldChange| < {log2fc_max}'
+        
+        title = f'Volcano plot of {title_name} ({p_type} <= {pvalue}, {log2fc_title})'
         
         scatter = (
             Scatter(init_opts=opts.InitOpts(width=f"{width*100}px", height=f"{height*100}px", theme=self.theme))
diff --git a/metax/utils/scripts/razor_sum.py b/metax/utils/scripts/razor_sum.py
new file mode 100644
index 0000000..fef9b3a
--- /dev/null
+++ b/metax/utils/scripts/razor_sum.py
@@ -0,0 +1,214 @@
+from collections import defaultdict
+import pandas as pd
+from tqdm import tqdm
+
+
+class RazorSum:
+    def __init__(self, df, column_map):
+        self.df = df
+        self.column_map = column_map
+        self.res_intensity_dict = {}  # store all sample to output
+        self.greedy_method = None  # only used for razor method
+        self.mini_target_set = None
+        self.filtered_target_to_peptides = None
+        self.share_intensity = False
+        self.__multi_target_count = 0
+
+
+    def sum_protein_intensity(self, greedy_method='heap'):
+        self.greedy_method = greedy_method
+        print('Start to sum protein intensity using method: [razor]')
+        if column_map['sample_list'] is None or len(column_map['sample_list']) == 0:
+            raise ValueError('Please provide [sample_list] in column_map for sum, e.g. ["Sample1", "Sample2", "Sample3"]')
+        # only extract the peptide and target columns
+        extract_cols = [self.column_map['peptide'], self.column_map['target']] + self.column_map['sample_list']
+        self.df = self.df.loc[:, extract_cols]
+        
+        pep_to_target = self._create_pep_to_target_razor()
+        self._sum_target_intensity(pep_to_target)
+        
+        # show summary
+        print(f"Total peptides count: {len(pep_to_target)}")
+        self.__multi_target_count = self.__multi_target_count/len(sample_list)
+        print(f"Multi-target peptides count: {self.__multi_target_count} ({self.__multi_target_count / len(pep_to_target) * 100:.2f}%)")
+
+        
+        res_df = pd.DataFrame.from_dict(self.res_intensity_dict)
+        res_df.fillna(0, inplace=True)
+        res_df.index.name = 'Target'
+        
+        print('Finish summing protein intensity')
+        
+        return res_df
+    
+    def get_mini_target_set(self, greedy_method='heap'):
+        self.greedy_method = greedy_method
+        print('Start to get minimum target set using method: [razor]')
+        # only extract the peptide and target columns
+        extract_cols = [self.column_map['peptide'], self.column_map['target']] + self.column_map['sample_list'] if self.column_map['sample_list'] else []
+        # if NA in target column, or '', raise error
+        if self.df[self.column_map['target']].isna().any() or '' in self.df[self.column_map['target']].values:
+            raise ValueError(f'NA or empty value in target column: {self.column_map["target"]}')
+        
+        self.df = self.df.loc[:, extract_cols]
+        df = self.df.loc[:, [self.column_map['peptide'], self.column_map['target']]]
+        peptides = set(df[self.column_map['peptide']])
+        target_to_peptides = self._create_target_to_peptides()
+        mini_target_set = self.find_minimum_target_set(peptides, target_to_peptides)
+        filtered_target_to_peptides = {target: target_to_peptides[target] for target in mini_target_set}
+        self.mini_target_set = mini_target_set
+        self.filtered_target_to_peptides = filtered_target_to_peptides
+        return self.mini_target_set
+
+    def _create_pep_to_target_razor(self):
+        """
+        Create a dictionary mapping peptides to targets based on a minimum target set.
+
+        Returns:
+            dict: A dictionary mapping peptides to targets.
+            key: peptide
+            value: a list of targets
+        """
+        self.get_mini_target_set(self.greedy_method)
+        
+        peptides = set(self.df[self.column_map['peptide']])
+        filtered_target_to_peptides = self.filtered_target_to_peptides
+        
+        peptide_to_target = defaultdict(list)
+        for peptide in tqdm(peptides, desc="Assigning peptides to targets"):
+            possible_targets = [target for target, peps in filtered_target_to_peptides.items() if peptide in peps]
+            if possible_targets:
+                max_target_count = max(len(filtered_target_to_peptides[target]) for target in possible_targets)
+                best_targets = [target for target in possible_targets if len(filtered_target_to_peptides[target]) == max_target_count]
+                peptide_to_target[peptide].extend(best_targets)
+        
+        return peptide_to_target
+    
+    def _create_target_to_peptides(self):
+        """
+        Create a dictionary mapping targets to peptides.
+        e.g. {'target1': {'peptide1', 'peptide2'}, 'target2': {'peptide1', 'peptide3'}}
+        
+        """
+        df = self.df.loc[:, [self.column_map['peptide'], self.column_map['target']]]
+        target_to_peptides = defaultdict(set)
+
+        for _, row in tqdm(df.iterrows(), total=df.shape[0], desc="Creating target to peptides mapping"):
+            sequence = row[self.column_map['peptide']]
+            targets = row[self.column_map['target']].split(';')
+            for target in targets:
+                target_to_peptides[target].add(sequence)
+                
+        return target_to_peptides
+
+    def _sum_target_intensity(self, peptide_to_target):
+        for sample in tqdm(self.column_map['sample_list'], desc="Summing intensity"):
+            df_sample = self.df.loc[:, [self.column_map['peptide'], sample]]
+            df_sample.set_index(self.column_map['peptide'], inplace=True)
+            peptide_intensity_dict = df_sample.to_dict()[sample]
+            for peptide, targets in peptide_to_target.items():
+                intensity = peptide_intensity_dict.get(peptide, 0)
+                self._update_output_dict(targets, sample, intensity)
+        
+
+    def find_minimum_target_set(self, peptides, target_to_peptides):
+        target_to_peptides_copy = target_to_peptides.copy()
+        # print current target number
+        print(f'Current target number: {len(target_to_peptides_copy)}')
+        peptides_to_cover = set(peptides)
+        selected_targets = set()
+        method = self.greedy_method
+
+        if method == 'greedy':
+            print('Start creating protein dict for "Set Cover Problem" with Greedy Approximation Algorithm')
+            with tqdm(total=len(peptides_to_cover), desc="Covering peptides") as pbar:
+                while peptides_to_cover:
+                    best_protein = None
+                    peptides_covered_by_best = set()
+                    for protein, covered_peptides in target_to_peptides_copy.items():
+                        covered = peptides_to_cover & covered_peptides
+                        if len(covered) > len(peptides_covered_by_best):
+                            best_protein = protein
+                            peptides_covered_by_best = covered
+
+                    if not best_protein:
+                        break
+
+                    selected_targets.add(best_protein)
+                    peptides_to_cover -= peptides_covered_by_best
+                    target_to_peptides_copy.pop(best_protein)  # remove the protein from the dict to speed up the process
+                    pbar.update(len(peptides_covered_by_best))
+        elif method == 'heap':
+            import heapq
+            target_coverage = {target: covered_peptides & peptides_to_cover 
+                            for target, covered_peptides in target_to_peptides_copy.items()}
+            target_heap = [(-len(covered), target) for target, covered in target_coverage.items()]
+            heapq.heapify(target_heap)
+
+            with tqdm(total=len(peptides_to_cover), desc="Covering peptides") as pbar:
+                while peptides_to_cover:
+                    while target_heap:
+                        max_covered, best_target = heapq.heappop(target_heap)
+                        if best_target in target_coverage:
+                            peptides_covered_by_best = target_coverage.pop(best_target)
+                            break
+
+                    if not best_target or not peptides_covered_by_best:
+                        break
+
+                    selected_targets.add(best_target)
+                    peptides_to_cover -= peptides_covered_by_best
+                    pbar.update(len(peptides_covered_by_best))
+
+                    for target in list(target_coverage.keys()):
+                        if target_coverage[target] & peptides_covered_by_best:
+                            target_coverage[target] -= peptides_covered_by_best
+                            heapq.heappush(target_heap, (-len(target_coverage[target]), target))
+                            if not target_coverage[target]:
+                                del target_coverage[target]
+        else:
+            raise ValueError(f"Invalid greedy method: {method}. Must be ['greedy' or 'heap']")
+        
+        
+        print(f'Minium target number: {len(selected_targets)}')
+        return selected_targets
+
+    def _update_output_dict(self, target_list, sample_name, intensity):
+        if len(target_list) == 1:
+            target = target_list[0]
+            self.res_intensity_dict.setdefault(sample_name, {}).setdefault(target, 0)
+            self.res_intensity_dict[sample_name][target] += intensity
+        else:
+            if self.share_intensity:
+                intensity /= len(target_list)
+                for target in target_list:
+                    self.res_intensity_dict.setdefault(sample_name, {}).setdefault(target, 0)
+                    self.res_intensity_dict[sample_name][target] += intensity
+
+            else: # assign the intensity to the 1st target
+                self.__multi_target_count += 1
+                target = target_list[0]
+                self.res_intensity_dict.setdefault(sample_name, {}).setdefault(target, 0)
+                self.res_intensity_dict[sample_name][target] += intensity
+
+# Example usage:
+# Assuming df is your pandas dataframe and column_map is your dictionary
+if __name__ == '__main__':
+    df = pd.read_csv('OTF.tsv', sep='\t')
+    df_meta = pd.read_csv('meta.txt', sep='\t')
+    sample_list = df_meta['Samples'].unique().tolist()
+    sample_list = ["Intensity_" + sample for sample in sample_list]
+    
+    column_map = {
+        'peptide': 'Sequence',
+        'target': 'Proteins',
+        'sample_list': sample_list  # ['Sample1', 'Sample2', 'Sample3']
+    }
+    sia = RazorSum(df, column_map)
+    
+    res_df = sia.sum_protein_intensity(greedy_method='heap')
+    res_df.to_csv('razor_protein_intensity.tsv', sep='\t')
+
+    # or get minimum target set only
+    # mini_target_set = sia.get_mini_target_set(greedy_method='heap')
+
diff --git a/metax/utils/version.py b/metax/utils/version.py
index 3896684..f7a9d51 100644
--- a/metax/utils/version.py
+++ b/metax/utils/version.py
@@ -1,2 +1,2 @@
-__version__ = '1.109.12'
+__version__ = '1.110.0'
 API_version = '2'
\ No newline at end of file

From 82772560e41f38869a0c1cf558db4b5e8fc2f4dc Mon Sep 17 00:00:00 2001
From: Qing <44231502+byemaxx@users.noreply.github.com>
Date: Mon, 12 Aug 2024 14:23:12 -0400
Subject: [PATCH 2/2] 	modified:   pyproject.toml

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 77fd009..774eb4f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "MetaXTools"
-version = "1.109.8"
+version = "1.110.0"
 description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics."
 readme = "README_PyPi.md"
 license = { text = "NorthOmics" }