From e7aad1f175853584caaffa355efcc42f3a2712a0 Mon Sep 17 00:00:00 2001
From: Qing <44231502+byemaxx@users.noreply.github.com>
Date: Fri, 4 Oct 2024 12:43:13 -0400
Subject: [PATCH 1/4] - New: added a new Normalization method: "Trace Shifting"
 for the data preprossing.

---
 Docs/ChangeLog.md                             |  6 +-
 Docs/MetaX_Cookbook.md                        |  4 ++
 metax/gui/main_gui.py                         |  1 +
 metax/gui/metax_gui/main_window.ui            | 25 +++++----
 metax/gui/metax_gui/ui_main_window.py         | 24 ++++----
 metax/taxafunc_analyzer/analyzer.py           | 31 +++++++++--
 .../analyzer_utils/data_preprocessing.py      |  8 ++-
 metax/taxafunc_analyzer/analyzer_utils/lfq.py | 55 +++++++++++++++++--
 metax/utils/version.py                        |  2 +-
 pyproject.toml                                |  2 +-
 10 files changed, 122 insertions(+), 36 deletions(-)
diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md
index fd2c231..d24f626 100644
--- a/Docs/ChangeLog.md
+++ b/Docs/ChangeLog.md
@@ -1,9 +1,13 @@
+# Version: 1.115.2
+## Date: 2024-10-03
+### Changes:
+- New: added a new Normalization method: "Trace Shifting" for the data preprossing.
+
 # Version: 1.115.1
 ## Date: 2024-10-03
 ### Changes:
 - New: added multiprocessing option for LFQ.
 
-
 # Version: 1.115.0
 ## Date: 2024-10-02
 ### Changes:
diff --git a/Docs/MetaX_Cookbook.md b/Docs/MetaX_Cookbook.md
index d496bc0..1808fe0 100644
--- a/Docs/MetaX_Cookbook.md
+++ b/Docs/MetaX_Cookbook.md
@@ -339,7 +339,11 @@ There are several methods for detecting and handling outliers.
 
 - **Data Normalization:**
 
+  - **Trace Shifting:** Reframing the Normalization Problem with Intensity traces (inspired by DirectLFQ).
+    - Note: If <u>both</u> trace shifting and transformation are applied, *<u>normalization will be done before transformation.</u>*
+  
   - Standard Scaling (Z-Score), Min-Max Scaling, Pareto Scaling, Mean centring and Normalization by sum.
+  
 
 <u>If you use [Z-Score, Mean centring and Pareto Scaling] data normalization, the data will be given a minimum offset again to avoid negative values.</u>
 
diff --git a/metax/gui/main_gui.py b/metax/gui/main_gui.py
index 3e133cc..a7332de 100644
--- a/metax/gui/main_gui.py
+++ b/metax/gui/main_gui.py
@@ -2619,6 +2619,7 @@ def set_multi_table(self, restore_taxafunc=False,  saved_obj=None):
                 }
                 normalize_dict = {
                     "None": None,
+                    "Trace Shifting": "trace_shift",
                     "Mean centering": "mean",
                     "Standard Scaling (Z-Score)": "zscore",
                     "Min-Max Scaling": "minmax",
diff --git a/metax/gui/metax_gui/main_window.ui b/metax/gui/metax_gui/main_window.ui
index 22e9bc0..530ed76 100644
--- a/metax/gui/metax_gui/main_window.ui
+++ b/metax/gui/metax_gui/main_window.ui
@@ -245,8 +245,8 @@
                 <rect>
                  <x>0</x>
                  <y>0</y>
-                 <width>528</width>
-                 <height>534</height>
+                 <width>391</width>
+                 <height>80</height>
                 </rect>
                </property>
                <attribute name="label">
@@ -939,6 +939,11 @@
                   <string>None</string>
                  </property>
                 </item>
+                <item>
+                 <property name="text">
+                  <string>Trace Shifting</string>
+                 </property>
+                </item>
                 <item>
                  <property name="text">
                   <string>Standard Scaling (Z-Score)</string>
@@ -2771,7 +2776,7 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>999</width>
+                        <width>621</width>
                         <height>150</height>
                        </rect>
                       </property>
@@ -3774,7 +3779,7 @@
                       <rect>
                        <x>0</x>
                        <y>0</y>
-                       <width>1020</width>
+                       <width>878</width>
                        <height>128</height>
                       </rect>
                      </property>
@@ -5668,8 +5673,8 @@
                           <rect>
                            <x>0</x>
                            <y>0</y>
-                           <width>996</width>
-                           <height>103</height>
+                           <width>535</width>
+                           <height>94</height>
                           </rect>
                          </property>
                          <layout class="QGridLayout" name="gridLayout_68">
@@ -7449,8 +7454,8 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>1016</width>
-                        <height>105</height>
+                        <width>620</width>
+                        <height>65</height>
                        </rect>
                       </property>
                       <layout class="QGridLayout" name="gridLayout_57">
@@ -8158,8 +8163,8 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>1016</width>
-                        <height>185</height>
+                        <width>775</width>
+                        <height>102</height>
                        </rect>
                       </property>
                       <layout class="QGridLayout" name="gridLayout_69">
diff --git a/metax/gui/metax_gui/ui_main_window.py b/metax/gui/metax_gui/ui_main_window.py
index f077358..efb045f 100644
--- a/metax/gui/metax_gui/ui_main_window.py
+++ b/metax/gui/metax_gui/ui_main_window.py
@@ -147,7 +147,7 @@ def setupUi(self, metaX_main):
         self.toolBox_2.setMaximumSize(QtCore.QSize(1677, 16777215))
         self.toolBox_2.setObjectName("toolBox_2")
         self.page_2 = QtWidgets.QWidget()
-        self.page_2.setGeometry(QtCore.QRect(0, 0, 528, 534))
+        self.page_2.setGeometry(QtCore.QRect(0, 0, 391, 80))
         self.page_2.setObjectName("page_2")
         self.gridLayout_27 = QtWidgets.QGridLayout(self.page_2)
         self.gridLayout_27.setObjectName("gridLayout_27")
@@ -484,6 +484,7 @@ def setupUi(self, metaX_main):
         self.comboBox_set_data_normalization.addItem("")
         self.comboBox_set_data_normalization.addItem("")
         self.comboBox_set_data_normalization.addItem("")
+        self.comboBox_set_data_normalization.addItem("")
         self.gridLayout_15.addWidget(self.comboBox_set_data_normalization, 7, 1, 1, 3)
         self.comboBox_remove_batch_effect = QtWidgets.QComboBox(self.tab_set_taxa_func)
         sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed)
@@ -1407,7 +1408,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_2.setWidgetResizable(True)
         self.scrollArea_2.setObjectName("scrollArea_2")
         self.scrollAreaWidgetContents_2 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 999, 150))
+        self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 621, 150))
         self.scrollAreaWidgetContents_2.setObjectName("scrollAreaWidgetContents_2")
         self.gridLayout_50 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_2)
         self.gridLayout_50.setObjectName("gridLayout_50")
@@ -1956,7 +1957,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_cross_heatmap_settings.setWidgetResizable(True)
         self.scrollArea_cross_heatmap_settings.setObjectName("scrollArea_cross_heatmap_settings")
         self.scrollAreaWidgetContents_3 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_3.setGeometry(QtCore.QRect(0, 0, 1020, 128))
+        self.scrollAreaWidgetContents_3.setGeometry(QtCore.QRect(0, 0, 878, 128))
         self.scrollAreaWidgetContents_3.setObjectName("scrollAreaWidgetContents_3")
         self.gridLayout_38 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_3)
         self.gridLayout_38.setObjectName("gridLayout_38")
@@ -2890,7 +2891,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_3.setWidgetResizable(True)
         self.scrollArea_3.setObjectName("scrollArea_3")
         self.scrollAreaWidgetContents_4 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_4.setGeometry(QtCore.QRect(0, 0, 996, 103))
+        self.scrollAreaWidgetContents_4.setGeometry(QtCore.QRect(0, 0, 535, 94))
         self.scrollAreaWidgetContents_4.setObjectName("scrollAreaWidgetContents_4")
         self.gridLayout_68 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_4)
         self.gridLayout_68.setObjectName("gridLayout_68")
@@ -3836,7 +3837,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_5.setWidgetResizable(True)
         self.scrollArea_5.setObjectName("scrollArea_5")
         self.scrollAreaWidgetContents_6 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 1016, 105))
+        self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 620, 65))
         self.scrollAreaWidgetContents_6.setObjectName("scrollAreaWidgetContents_6")
         self.gridLayout_57 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_6)
         self.gridLayout_57.setObjectName("gridLayout_57")
@@ -4217,7 +4218,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_6.setWidgetResizable(True)
         self.scrollArea_6.setObjectName("scrollArea_6")
         self.scrollAreaWidgetContents_7 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_7.setGeometry(QtCore.QRect(0, 0, 1016, 185))
+        self.scrollAreaWidgetContents_7.setGeometry(QtCore.QRect(0, 0, 775, 102))
         self.scrollAreaWidgetContents_7.setObjectName("scrollAreaWidgetContents_7")
         self.gridLayout_69 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_7)
         self.gridLayout_69.setObjectName("gridLayout_69")
@@ -5555,11 +5556,12 @@ def retranslateUi(self, metaX_main):
         self.comboBox_outlier_handling_method2.setItemText(4, _translate("metaX_main", "regression"))
         self.label_102.setText(_translate("metaX_main", "Outliers Handling by"))
         self.comboBox_set_data_normalization.setItemText(0, _translate("metaX_main", "None"))
-        self.comboBox_set_data_normalization.setItemText(1, _translate("metaX_main", "Standard Scaling (Z-Score)"))
-        self.comboBox_set_data_normalization.setItemText(2, _translate("metaX_main", "Min-Max Scaling"))
-        self.comboBox_set_data_normalization.setItemText(3, _translate("metaX_main", "Pareto Scaling"))
-        self.comboBox_set_data_normalization.setItemText(4, _translate("metaX_main", "Mean centering"))
-        self.comboBox_set_data_normalization.setItemText(5, _translate("metaX_main", "Normalization by sum"))
+        self.comboBox_set_data_normalization.setItemText(1, _translate("metaX_main", "Trace Shifting"))
+        self.comboBox_set_data_normalization.setItemText(2, _translate("metaX_main", "Standard Scaling (Z-Score)"))
+        self.comboBox_set_data_normalization.setItemText(3, _translate("metaX_main", "Min-Max Scaling"))
+        self.comboBox_set_data_normalization.setItemText(4, _translate("metaX_main", "Pareto Scaling"))
+        self.comboBox_set_data_normalization.setItemText(5, _translate("metaX_main", "Mean centering"))
+        self.comboBox_set_data_normalization.setItemText(6, _translate("metaX_main", "Normalization by sum"))
         self.comboBox_remove_batch_effect.setItemText(0, _translate("metaX_main", "None"))
         self.label_41.setText(_translate("metaX_main", "Data Normalization"))
         self.label_43.setText(_translate("metaX_main", "Batch Effect Correction"))
diff --git a/metax/taxafunc_analyzer/analyzer.py b/metax/taxafunc_analyzer/analyzer.py
index 8dbb7ac..24e37ff 100644
--- a/metax/taxafunc_analyzer/analyzer.py
+++ b/metax/taxafunc_analyzer/analyzer.py
@@ -688,12 +688,31 @@ def run_lfq_for_taxa_func(self, df_taxa_func):
         df_taxa_func = df_taxa_func.set_index(['Taxon', self.func_name], drop=True)
         
         return df_taxa_func
+    
+    def update_data_preprocess_parameters(self, data_preprocess_params, peptide_num_threshold):
+        data_preprocess_params['peptide_num_threshold'] = peptide_num_threshold
+        
+        normalize_method = data_preprocess_params['normalize_method']
+        transform_method = data_preprocess_params['transform_method']
+        processing_order = data_preprocess_params['processing_order']
+        
+        if 'trace_shift' == normalize_method and transform_method not in ['None', None]:
+            print(f'Warning: [Trace Shifting] and {transform_method} are both set, Normalize will be prior to Transform.')
+            # move 'normalize' to the first
+            processing_order = ['normalize'] + [i for i in processing_order if i != 'normalize']
+            print(f'Data Preprocessing order: {processing_order}')
+        
+        data_preprocess_params['processing_order'] = processing_order
+                
+        
+        return data_preprocess_params
+        
             
     def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00,
                          outlier_params: dict = {'detect_method': None, 'handle_method': None,
                                                  "detection_by_group" : None, "handle_by_group": None},
                          data_preprocess_params: dict = {'normalize_method': None, 'transform_method': None,
-                                                            'batch_meta': None, 'processing_order': None},
+                                                            'batch_meta': None, 'processing_order': ['transform', 'normalize', 'batch']},
                           peptide_num_threshold: dict = {'taxa': 1, 'func': 1, 'taxa_func': 1},
                           sum_protein:bool = False, sum_protein_params: dict = {'method': 'razor',
                                                                                 'by_sample': False,
@@ -731,8 +750,8 @@ def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00,
             return
 
         #! fllowing code is for the normal mode
-        # add 'peptide_num_threshold' to 'data_preprocess_params
-        data_preprocess_params['peptide_num_threshold'] = peptide_num_threshold
+        # Update 'data_preprocess_params'
+        data_preprocess_params = self.update_data_preprocess_parameters(data_preprocess_params, peptide_num_threshold)
         
         #2. sum the protein intensity
         if sum_protein:
@@ -1009,15 +1028,15 @@ def get_df(self, table_name:str = 'taxa'):
                         outlier_params = {'detect_method': 'zero-dominant', 'handle_method': 'original',
                             "detection_by_group" : 'Individual', "handle_by_group": None},
                         data_preprocess_params = {
-                                                'normalize_method': None, 
+                                                'normalize_method': 'trace_shift', 
                                                 'transform_method': "log2",
                                                 'batch_meta': 'None', 
-                                                'processing_order': None},
+                                                'processing_order': ['transform', 'normalize', 'batch']},
                     peptide_num_threshold = {'taxa': 2, 'func': 2, 'taxa_func': 2},
                     keep_unknow_func=False, sum_protein=False, 
                     sum_protein_params = {'method': 'razor', 'by_sample': False, 'rank_method': 'unique_counts', 'greedy_method': 'heap', 'peptide_num_threshold': 3},
                     split_func=True, split_func_params = {'split_by': '|', 'share_intensity': False},
-                    taxa_and_func_only_from_otf=False, quant_method='lfq'
+                    taxa_and_func_only_from_otf=False, quant_method='sum'
                     )
 
     sw.check_attributes()
\ No newline at end of file
diff --git a/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py b/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py
index 9df4db8..fef5f33 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py
@@ -101,6 +101,10 @@ def _data_transform(self, df: pd.DataFrame, transform_method: str|None = None) -
 
     
     def _data_normalization(self, df: pd.DataFrame, normalize_method: str|None = None) -> pd.DataFrame:
+        def trace_shift(x):
+            from .lfq import run_normalization
+            return run_normalization(x)
+
         if normalize_method is None:
             print('normalize_method is not set, data normalization did not perform.')
         else:
@@ -117,7 +121,8 @@ def _data_normalization(self, df: pd.DataFrame, normalize_method: str|None = Non
                 'sum': lambda x: x / (x.sum() + epsilon),
                 'minmax': lambda x: (x - x.min()) / (x.max() - x.min()),
                 'zscore': lambda x: (x - x.mean()) / (x.std() + epsilon),
-                'pareto': lambda x: (x - x.mean()) / (np.sqrt(x.std() + epsilon))
+                'pareto': lambda x: (x - x.mean()) / (np.sqrt(x.std() + epsilon)),
+                'trace_shift': lambda x: trace_shift(x)
             }
 
             if normalize_method in normalize_operations:
@@ -624,6 +629,7 @@ def data_preprocess(self, df: pd.DataFrame, normalize_method: str|None = None,
         - `normalize_method` (`str`, optional):  
         Method used for data normalization. Options include:
             - `None`: No normalization.
+            - `trace_shift`: Trace shift normalization inspired by DirectLFQ.
             - `mean`: Mean normalization.
             - `sum`: Sum normalization.
             - `minmax`: Min-max normalization.
diff --git a/metax/taxafunc_analyzer/analyzer_utils/lfq.py b/metax/taxafunc_analyzer/analyzer_utils/lfq.py
index 05fa0a8..abe5dcc 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/lfq.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/lfq.py
@@ -2,7 +2,6 @@
 import pandas as pd
 import numpy as np
 from numba import njit
-import multiprocessing
 import os
 
 # Setup logging
@@ -11,7 +10,7 @@
 def setup_logging():
     logging.basicConfig(
         level=logging.INFO,
-        format="LFQ: %(message)s",
+        format="%(asctime)s - %(levelname)s - %(message)s",
     )
 
 setup_logging()
@@ -451,7 +450,7 @@ def get_list_of_tuple_w_protein_profiles_and_shifted_peptides(
     )
 
     if num_cores is not None and num_cores > 1:
-        # Use multiprocessing
+        import multiprocessing
         pool = multiprocessing.Pool(num_cores)
         args = [
             (
@@ -644,12 +643,53 @@ def get_ion_intensity_dataframe_from_list_of_shifted_peptides(
     return ion_df
 
 
+def is_numeric_matrix(df):
+    # mark non-numeric values as NaN
+    numeric_df = df.apply(pd.to_numeric, errors='coerce')
+    # check if nan values are present
+    return numeric_df.notna().all().all()
+
+
+def run_normalization(
+            input_df: pd.DataFrame,
+            number_of_quadratic_samples: int = 100
+       ):
+    '''
+    Normalize the input DataFrame.
+    Args:
+        input_df (pd.DataFrame): A matrix of intensities.Columns are samples, index is items to be normalized.
+        number_of_quadratic_samples (int, optional): How many samples are used to create the anchor intensity trace. Increasing might marginally increase performance at the cost of runtime
+    Returns:
+        pd.DataFrame: The normalized DataFrame.
+    '''
+    # chcek if only numbers are in the dataframe
+    if not is_numeric_matrix(input_df):
+        raise ValueError("Input DataFrame contains non-numeric values. Make sure to the items column is set as index.")
+    
+    copy_numpy_arrays = check_whether_to_copy_numpy_arrays_derived_from_pandas()
+    input_df = np.log2(input_df.replace(0, np.nan)) # type: ignore
+    input_df = input_df.dropna(axis=0, how="all")
+    
+    LOGGER.info("Performing sample normalization.")
+    input_df = NormalizationManagerSamplesOnSelectedProteins(
+        input_df,
+        num_samples_quadratic=number_of_quadratic_samples,
+        selected_proteins_file=None,
+        copy_numpy_arrays=copy_numpy_arrays,
+    ).complete_dataframe
+    # restore log2 values
+    input_df = 2 ** input_df
+    # fill NaNs with 0
+    input_df = input_df.fillna(0)
+    
+    return input_df
+
 def run_lfq(
     input_df,
     protein_id: str = "protein",
     quant_id: str = "ion",
     min_nonan: int = 1,
-    number_of_quadratic_samples: int = 50,
+    number_of_quadratic_samples: int = 100,
     maximum_number_of_quadratic_ions_to_use_per_protein: int = 10,
     log_processed_proteins: bool = True,
     compile_normalized_ion_table: bool = True,
@@ -699,17 +739,22 @@ def run_lfq(
     df_path = os.path.join(current_dir, "../../../local_tests/peptide_for_protein.tsv")
     df = pd.read_csv(df_path, sep="\t")
 
+    # protein_df = df.drop(columns=["Proteins"])
+    # protein_df.set_index("Sequence", inplace=True)
+    # print(protein_df.head())
+    # df1 = run_normalization(protein_df)
     
     protein_df, ion_df = run_lfq(
         df,
         protein_id="Proteins",
         quant_id="Sequence",
         min_nonan=1,
-        number_of_quadratic_samples=50,
+        number_of_quadratic_samples=500,
         maximum_number_of_quadratic_ions_to_use_per_protein=10,
         num_cores=None,
         use_multiprocessing=True
     )
+    
     print(protein_df.shape)
     print(protein_df.head())
     t2 = time.time()
diff --git a/metax/utils/version.py b/metax/utils/version.py
index cfa61a3..15f221c 100644
--- a/metax/utils/version.py
+++ b/metax/utils/version.py
@@ -1,2 +1,2 @@
-__version__ = '1.115.1'
+__version__ = '1.115.2'
 API_version = '3'
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 8d4e3df..a7b6371 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "MetaXTools"
-version = "1.115.1"
+version = "1.115.2"
 description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics."
 readme = "README_PyPi.md"
 license = { text = "NorthOmics" }

From 349e3a6da09b28ff0c6f1e58151b0f0880ddf93e Mon Sep 17 00:00:00 2001
From: Qing <44231502+byemaxx@users.noreply.github.com>
Date: Fri, 4 Oct 2024 14:15:31 -0400
Subject: [PATCH 2/4] - Fix: Fixed the filter of pvalue or padj option not work
 for functional redundancy analysis in T-Test part.

---
 Docs/ChangeLog.md                                    | 7 ++++++-
 metax/gui/main_gui.py                                | 5 +++--
 metax/taxafunc_analyzer/analyzer_utils/cross_test.py | 2 +-
 metax/utils/version.py                               | 2 +-
 pyproject.toml                                       | 2 +-
 5 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md
index d24f626..05e1ae3 100644
--- a/Docs/ChangeLog.md
+++ b/Docs/ChangeLog.md
@@ -1,5 +1,10 @@
+# Version: 1.115.3
+## Date: 2024-10-04
+### Changes:
+- Fix: Fixed the filter of pvalue or padj option not work for functional redundancy analysis in T-Test part.
+
 # Version: 1.115.2
-## Date: 2024-10-03
+## Date: 2024-10-04
 ### Changes:
 - New: added a new Normalization method: "Trace Shifting" for the data preprossing.
 
diff --git a/metax/gui/main_gui.py b/metax/gui/main_gui.py
index a7332de..10cbe18 100644
--- a/metax/gui/main_gui.py
+++ b/metax/gui/main_gui.py
@@ -4881,12 +4881,13 @@ def t_test(self):
             try:
                 self.pushButton_ttest.setEnabled(False)
                 group_list = [group1, group2]
-                table_names = []
+                table_names = [] # reset table_names as empty list
                 if df_type == 'Significant Taxa-Func'.lower():
                     p_value = self.doubleSpinBox_top_heatmap_pvalue.value()
                     p_value = round(p_value, 4)
+                    p_type = self.comboBox_top_heatmap_p_type.currentText()
                     
-                    ttest_sig_tf_params = {'group_list': group_list, 'p_value': p_value, 'condition': condition}
+                    ttest_sig_tf_params = {'group_list': group_list, 'p_value': p_value, 'condition': condition, "p_type": p_type}
                     self.run_in_new_window(self.tfa.CrossTest.get_stats_diff_taxa_but_func, callback= self.callback_after_ttest, **ttest_sig_tf_params)
                     
                 
diff --git a/metax/taxafunc_analyzer/analyzer_utils/cross_test.py b/metax/taxafunc_analyzer/analyzer_utils/cross_test.py
index 1139341..6ae5e51 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/cross_test.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/cross_test.py
@@ -598,7 +598,7 @@ def get_stats_diff_taxa_but_func(self, group_list: list|None = None, p_value: fl
                                      condition:list|None =None, p_type: str = 'padj'
                                      ) -> tuple:
         p_col_name = 'pvalue' if p_type == 'pvalue' else 'padj'
-        
+        print(f"Using [{p_col_name}] for filtering")
         # calculate the test result if not given
         if taxa_res_df is None or func_res_df is None or taxa_func_res_df is None:
             print("No test result given, calculating the test result first")
diff --git a/metax/utils/version.py b/metax/utils/version.py
index 15f221c..06dc858 100644
--- a/metax/utils/version.py
+++ b/metax/utils/version.py
@@ -1,2 +1,2 @@
-__version__ = '1.115.2'
+__version__ = '1.115.3'
 API_version = '3'
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index a7b6371..b99ae8e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "MetaXTools"
-version = "1.115.2"
+version = "1.115.3"
 description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics."
 readme = "README_PyPi.md"
 license = { text = "NorthOmics" }

From 9060e6d8204c3c712bd46a57bc5ff9d259548012 Mon Sep 17 00:00:00 2001
From: Qing <44231502+byemaxx@users.noreply.github.com>
Date: Thu, 10 Oct 2024 15:18:46 -0400
Subject: [PATCH 3/4] move peptide number threshold out of the
 data_preprocessing to get the accurate number of peptides used in the
 analysis

---
 Docs/ChangeLog.md                             |  5 +
 metax/gui/main_gui.py                         |  3 +-
 metax/taxafunc_analyzer/analyzer.py           | 92 +++++++++++++++++--
 .../analyzer_utils/data_preprocessing.py      | 15 +--
 metax/utils/version.py                        |  2 +-
 5 files changed, 92 insertions(+), 25 deletions(-)

diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md
index 05e1ae3..3c07c5b 100644
--- a/Docs/ChangeLog.md
+++ b/Docs/ChangeLog.md
@@ -1,3 +1,8 @@
+# Version: 1.115.4
+## Date: 2024-10-07
+### Changes:
+- TODO: use the peptide number for 'self.peptide_num_used' after filtering the minimum peptide number
+
 # Version: 1.115.3
 ## Date: 2024-10-04
 ### Changes:
diff --git a/metax/gui/main_gui.py b/metax/gui/main_gui.py
index 10cbe18..73d4479 100644
--- a/metax/gui/main_gui.py
+++ b/metax/gui/main_gui.py
@@ -1857,7 +1857,6 @@ def run_after_set_multi_tables(self):
             msg = f"""<html>
             <body>
             <p>Custom data is ready!</p>
-            <p>{nan_stats_str}</p>
             <p>Number of items: [{num_item}]</p>
             </body>
             </html>
@@ -1890,7 +1889,7 @@ def run_after_set_multi_tables(self):
                 <table>
                     <tr>
                         <th>Category</th>
-                        <th>Number</th>
+                        <th>Number (After Filtering)</th>
                         <th>Used Peptides</th>
                         <th>% of All Peptides</th>
                     </tr>
diff --git a/metax/taxafunc_analyzer/analyzer.py b/metax/taxafunc_analyzer/analyzer.py
index 24e37ff..b21e774 100644
--- a/metax/taxafunc_analyzer/analyzer.py
+++ b/metax/taxafunc_analyzer/analyzer.py
@@ -689,8 +689,7 @@ def run_lfq_for_taxa_func(self, df_taxa_func):
         
         return df_taxa_func
     
-    def update_data_preprocess_parameters(self, data_preprocess_params, peptide_num_threshold):
-        data_preprocess_params['peptide_num_threshold'] = peptide_num_threshold
+    def update_data_preprocess_parameters(self, data_preprocess_params):
         
         normalize_method = data_preprocess_params['normalize_method']
         transform_method = data_preprocess_params['transform_method']
@@ -706,7 +705,67 @@ def update_data_preprocess_parameters(self, data_preprocess_params, peptide_num_
                 
         
         return data_preprocess_params
+
+    def filter_peptides_num_for_splited_func(self, df, peptide_num_threshold, df_type, distinct_threshold_mode=False):
+        '''
+        Only for the splited func table or taxa_func table
+        - df: the splited func table or taxa_func table which has been grouped, index is the func or taxa_func
+        - peptide_num_threshold: the threshold of peptide number for each func or taxa_func
+        - df_type: 'func' or 'taxa_func'
+        - distinct_threshold_mode: TODO
+        '''
+        
+        valid_df_types = ['func', 'taxa_func']
+        if df_type not in valid_df_types:
+            raise ValueError(f"df_type must be one of {valid_df_types}, your input is [{df_type}]")
+        
+        peptide_num= peptide_num_threshold[df_type]
+        df_original_len = len(df)
+        
+        df = df[df['peptide_num'] >= peptide_num]
+        print(f"Removed [{df_original_len - len(df)} {df_type}] with less than [{peptide_num}] peptides.")
+        return df    
+    
+
+        
         
+    def filter_peptides_num(self, df, peptide_num_threshold, df_type, distinct_threshold_mode=False):
+        '''
+        Filter the peptides based on the peptide number threshold
+        - df: the original df including peptides, taxa, and functions, etc.
+        - peptide_num_threshold: the threshold of peptide number for each taxa or func
+        - df_type: 'taxa', 'func', or 'taxa_func'
+        - distinct_threshold_mode: TODO
+        '''
+        valid_df_types = ['taxa', 'func', 'taxa_func']
+        if df_type not in valid_df_types:
+            raise ValueError(f"df_type must be one of {valid_df_types}, your input is [{df_type}]")
+        
+        peptide_num= peptide_num_threshold[df_type]
+        df_original_len = len(df)
+
+        if df_type == 'taxa_func':
+            item_col = 'taxa_func'
+            df['taxa_func'] = df['Taxon'] + '&&&&' + df[self.func_name]
+        else:
+            item_col = 'Taxon' if df_type == 'taxa' else self.func_name
+
+        # Group by item_col and filter based on peptide number
+        dict_item_pep_num = df.groupby(item_col).size().to_dict()
+        remove_list = [k for k, v in dict_item_pep_num.items() if v < peptide_num]
+
+        # Remove rows based on peptide number threshold
+        df = df[~df[item_col].isin(remove_list)]
+
+        if df_type == 'taxa_func':
+            df = df.drop('taxa_func', axis=1)
+
+        self.peptide_num_used[df_type] = len(df)
+        print(f"Removed [{len(remove_list)} {df_type}] from [{df_original_len - len(df)} Peptides] with less than [{peptide_num}] peptides.")
+
+        return df
+
+
             
     def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00,
                          outlier_params: dict = {'detect_method': None, 'handle_method': None,
@@ -751,7 +810,7 @@ def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00,
 
         #! fllowing code is for the normal mode
         # Update 'data_preprocess_params'
-        data_preprocess_params = self.update_data_preprocess_parameters(data_preprocess_params, peptide_num_threshold)
+        data_preprocess_params = self.update_data_preprocess_parameters(data_preprocess_params)
         
         #2. sum the protein intensity
         if sum_protein:
@@ -821,10 +880,15 @@ def strip_taxa(x, level):
         
         if not taxa_and_func_only_from_otf:
             # extract 'taxa', sample intensity #! and 'peptide_col' to avoid the duplicated items when handling outlier
-            df_taxa_pep = df_filtered_peptides[[self.peptide_col_name,'Taxon'] + self.sample_list]
+            df_taxa_pep = df_filtered_peptides[[self.peptide_col_name,'Taxon'] + self.sample_list] # type: ignore
             print("\n-----Starting to perform outlier detection and handling for [Peptide-Taxon] table...-----")
             df_taxa_pep = self.detect_and_handle_outliers(df=df_taxa_pep, **outlier_params)
-            self.peptide_num_used['taxa'] = len(df_taxa_pep)
+            #TODO: use the peptide number after filtering the minimum peptide number 
+            # statastic the peptide number of each taxa
+            df_taxa_pep = self.filter_peptides_num(df=df_taxa_pep, peptide_num_threshold=peptide_num_threshold, df_type='taxa')
+            
+            
+            # self.peptide_num_used['taxa'] = len(df_taxa_pep)
             # add column 'peptide_num' to df_taxa as 1
             df_taxa_pep['peptide_num'] = 1
             
@@ -844,7 +908,8 @@ def strip_taxa(x, level):
             df_func_pep = df_func_pep[[self.peptide_col_name, self.func_name] + self.sample_list]
             print("\n-----Starting to perform outlier detection and handling for [Peptide-Function] table...-----")
             df_func_pep = self.detect_and_handle_outliers(df=df_func_pep, **outlier_params)
-            self.peptide_num_used['func'] = len(df_func_pep)
+            if not split_func:
+                df_func_pep = self.filter_peptides_num(df=df_func_pep, peptide_num_threshold=peptide_num_threshold, df_type='func')
             df_func_pep['peptide_num'] = 1
             
             if quant_method == 'lfq':
@@ -853,8 +918,10 @@ def strip_taxa(x, level):
                 df_func = df_func_pep.groupby(self.func_name).sum(numeric_only=True)
             
             if split_func:
+                self.peptide_num_used['func'] = len(df_func_pep)
                 df_func = self.split_func(df=df_func, split_func_params=split_func_params, df_type='func')
-                
+                df_func = self.filter_peptides_num_for_splited_func(df=df_func, peptide_num_threshold=peptide_num_threshold, df_type='func')
+
             df_func = self.data_preprocess(df=df_func,df_name = 'func', **data_preprocess_params)
             self.func_df = df_func
             #-----Func Table End-----
@@ -887,6 +954,8 @@ def strip_taxa(x, level):
         # ----- create taxa_func table -----
         df_taxa_func = df_half_processed_peptides[[self.peptide_col_name, 'Taxon', self.func_name] + self.sample_list]
         df_taxa_func['peptide_num'] = 1
+        if not split_func:
+            df_taxa_func = self.filter_peptides_num(df=df_taxa_func, peptide_num_threshold=peptide_num_threshold, df_type='taxa_func')
         
         for key in ['taxa_func', 'taxa', 'func']:
             self.peptide_num_used[key] = len(df_taxa_func) if self.peptide_num_used[key] == 0 else self.peptide_num_used[key]
@@ -899,6 +968,9 @@ def strip_taxa(x, level):
         # split the function before data preprocess
         if split_func:
             df_taxa_func = self.split_func( df=df_taxa_func, split_func_params=split_func_params, df_type='taxa_func')
+            df_taxa_func = self.filter_peptides_num_for_splited_func(df=df_taxa_func, peptide_num_threshold=peptide_num_threshold, 
+                                                                     df_type='taxa_func')
+            
             
         print("\n-----Starting to perform data pre-processing for [Taxa-Function] table...-----")
         df_taxa_func_all_processed = self.data_preprocess(df=df_taxa_func
@@ -1025,14 +1097,14 @@ def get_df(self, table_name:str = 'taxa'):
     sw.set_func('KEGG_Pathway_name')
     sw.set_group('Individual')
     sw.set_multi_tables(level='s', 
-                        outlier_params = {'detect_method': 'zero-dominant', 'handle_method': 'original',
+                        outlier_params = {'detect_method': 'None', 'handle_method': 'original',
                             "detection_by_group" : 'Individual', "handle_by_group": None},
                         data_preprocess_params = {
-                                                'normalize_method': 'trace_shift', 
+                                                'normalize_method': 'None', 
                                                 'transform_method': "log2",
                                                 'batch_meta': 'None', 
                                                 'processing_order': ['transform', 'normalize', 'batch']},
-                    peptide_num_threshold = {'taxa': 2, 'func': 2, 'taxa_func': 2},
+                    peptide_num_threshold = {'taxa': 3, 'func': 3, 'taxa_func': 3},
                     keep_unknow_func=False, sum_protein=False, 
                     sum_protein_params = {'method': 'razor', 'by_sample': False, 'rank_method': 'unique_counts', 'greedy_method': 'heap', 'peptide_num_threshold': 3},
                     split_func=True, split_func_params = {'split_by': '|', 'share_intensity': False},
diff --git a/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py b/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py
index fef5f33..a7ab723 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py
@@ -614,7 +614,7 @@ def detect_and_handle_outliers(self, df: pd.DataFrame,
     def data_preprocess(self, df: pd.DataFrame, normalize_method: str|None = None, 
                          transform_method: str|None = None, batch_meta: str|None =None,
                          processing_order:list|None =None,
-                         df_name:str = "None", peptide_num_threshold:dict[str, int] ={'taxa': 1, 'func': 1, 'taxa_func': 1}
+                         df_name:str = "None"
                          ) -> pd.DataFrame:
         """
         ## `data_preprocess` Method
@@ -662,11 +662,7 @@ def data_preprocess(self, df: pd.DataFrame, normalize_method: str|None = None,
             - `taxa_func`
             - `protein`
             - `custom`
-        - `peptide_num_threshold` (`dict`, optional):
-        The threshold for the number of peptides in each DataFrame. Default values are:
-        - `taxa`: 3
-        - `func`: 3
-        - `taxa_func`: 3
+
         
         ### Returns:
 
@@ -677,12 +673,7 @@ def data_preprocess(self, df: pd.DataFrame, normalize_method: str|None = None,
         
         df = df.copy()
         
-        # remove items with peptide number less than threshold
-        if df_name in ['taxa', 'func', 'taxa_func']:
-            print(f'{df_name.upper()} number before removing: {df.shape[0]}')
-            df = df[df['peptide_num'] >= peptide_num_threshold[df_name]]
-            print(f'{df_name.upper()} number with peptide_num >= [{peptide_num_threshold[df_name]}]: {df.shape[0]}')
-           
+
         if processing_order is None:
             processing_order = ['transform', 'normalize', 'batch']
         else:
diff --git a/metax/utils/version.py b/metax/utils/version.py
index 06dc858..b0d8e7d 100644
--- a/metax/utils/version.py
+++ b/metax/utils/version.py
@@ -1,2 +1,2 @@
-__version__ = '1.115.3'
+__version__ = '1.115.4'
 API_version = '3'
\ No newline at end of file

From 7343ea13b2fd8deea78f6c56e882a070934993e8 Mon Sep 17 00:00:00 2001
From: Qing <byeomax@outlook.com>
Date: Wed, 16 Oct 2024 22:49:07 -0400
Subject: [PATCH 4/4] - Fix: Fixed the bug of when use Anydata moed, the report
 will raise error. - Change: changed the approche of filter the minimum number
 of peptides threshold for the protein.(Avaliable for Razor and Anti-Razor
 method)

---
 Docs/ChangeLog.md                             |   3 +-
 metax/gui/main_gui.py                         |   8 +-
 metax/gui/metax_gui/main_window.ui            | 146 +++++++++---------
 metax/gui/metax_gui/ui_main_window.py         | 102 ++++++------
 metax/taxafunc_analyzer/analyzer.py           |  74 ++++++++-
 .../analyzer_utils/razor_sum.py               |   3 +-
 .../analyzer_utils/sum_protein_intensity.py   |  57 +++++--
 pyproject.toml                                |   2 +-
 8 files changed, 243 insertions(+), 152 deletions(-)

diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md
index 3c07c5b..9e78d66 100644
--- a/Docs/ChangeLog.md
+++ b/Docs/ChangeLog.md
@@ -1,7 +1,8 @@
 # Version: 1.115.4
 ## Date: 2024-10-07
 ### Changes:
-- TODO: use the peptide number for 'self.peptide_num_used' after filtering the minimum peptide number
+- Fix: Fixed the bug of when use Anydata moed, the report will raise error.
+- Change: changed the approche of filter the minimum number of peptides threshold for the protein.(Avaliable for Razor and Anti-Razor method)
 
 # Version: 1.115.3
 ## Date: 2024-10-04
diff --git a/metax/gui/main_gui.py b/metax/gui/main_gui.py
index 73d4479..7230595 100644
--- a/metax/gui/main_gui.py
+++ b/metax/gui/main_gui.py
@@ -993,10 +993,14 @@ def update_method_of_protein_inference(self):
             self.checkBox_infrence_protein_by_sample.setChecked(True)
             self.checkBox_infrence_protein_by_sample.setEnabled(False)
             self.comboBox_protein_ranking_method.setEnabled(False)
+            # enable the peptide_num_threshold
+            self.spinBox_peptide_num_threshold_protein.setEnabled(True)
         else: # method is ["rank"]
             self.checkBox_infrence_protein_by_sample.setEnabled(True)
             self.comboBox_protein_ranking_method.setEnabled(True)
             self.checkBox_infrence_protein_by_sample.setChecked(False)
+            # disable the peptide_num_threshold
+            self.spinBox_peptide_num_threshold_protein.setEnabled(False)
     
     
 
@@ -1853,11 +1857,11 @@ def run_after_set_multi_tables(self):
         
         # Final message
         if self.tfa.any_df_mode:
-            num_item = self.tfa.custom_df.shape[0]
+            original_num_peptide = self.tfa.custom_df.shape[0]
             msg = f"""<html>
             <body>
             <p>Custom data is ready!</p>
-            <p>Number of items: [{num_item}]</p>
+            <p>Number of items: [{original_num_peptide}]</p>
             </body>
             </html>
             """
diff --git a/metax/gui/metax_gui/main_window.ui b/metax/gui/metax_gui/main_window.ui
index 530ed76..818a6dc 100644
--- a/metax/gui/metax_gui/main_window.ui
+++ b/metax/gui/metax_gui/main_window.ui
@@ -46,7 +46,7 @@
            <enum>Qt::LeftToRight</enum>
           </property>
           <property name="currentIndex">
-           <number>2</number>
+           <number>4</number>
           </property>
           <property name="documentMode">
            <bool>false</bool>
@@ -245,8 +245,8 @@
                 <rect>
                  <x>0</x>
                  <y>0</y>
-                 <width>391</width>
-                 <height>80</height>
+                 <width>528</width>
+                 <height>534</height>
                 </rect>
                </property>
                <attribute name="label">
@@ -1476,7 +1476,7 @@
                   <property name="maximumSize">
                    <size>
                     <width>16777215</width>
-                    <height>280</height>
+                    <height>300</height>
                    </size>
                   </property>
                   <property name="title">
@@ -1505,7 +1505,7 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>660</width>
+                        <width>1016</width>
                         <height>232</height>
                        </rect>
                       </property>
@@ -2759,7 +2759,7 @@
                   <property name="maximumSize">
                    <size>
                     <width>16777215</width>
-                    <height>280</height>
+                    <height>300</height>
                    </size>
                   </property>
                   <property name="title">
@@ -2776,7 +2776,7 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>621</width>
+                        <width>999</width>
                         <height>150</height>
                        </rect>
                       </property>
@@ -3750,7 +3750,7 @@
                  <property name="maximumSize">
                   <size>
                    <width>16777215</width>
-                   <height>240</height>
+                   <height>280</height>
                   </size>
                  </property>
                  <property name="title">
@@ -3779,7 +3779,7 @@
                       <rect>
                        <x>0</x>
                        <y>0</y>
-                       <width>878</width>
+                       <width>1020</width>
                        <height>128</height>
                       </rect>
                      </property>
@@ -4801,7 +4801,7 @@
                <enum>QTabWidget::Triangular</enum>
               </property>
               <property name="currentIndex">
-               <number>3</number>
+               <number>2</number>
               </property>
               <widget class="QWidget" name="tab_3">
                <attribute name="title">
@@ -5144,7 +5144,7 @@
                   </property>
                  </widget>
                 </item>
-                <item row="10" column="1" colspan="2">
+                <item row="10" column="1">
                  <widget class="QPushButton" name="pushButton_dunnett_test">
                   <property name="enabled">
                    <bool>false</bool>
@@ -5239,64 +5239,11 @@
                   </item>
                  </layout>
                 </item>
-                <item row="9" column="1" colspan="2">
-                 <widget class="Line" name="line_26">
-                  <property name="orientation">
-                   <enum>Qt::Horizontal</enum>
-                  </property>
-                 </widget>
-                </item>
-                <item row="11" column="1" colspan="2">
-                 <widget class="QPushButton" name="pushButton_multi_deseq2">
-                  <property name="enabled">
-                   <bool>false</bool>
-                  </property>
-                  <property name="text">
-                   <string>Run Deseq2</string>
-                  </property>
-                 </widget>
-                </item>
                 <item row="4" column="1" colspan="2">
                  <layout class="QGridLayout" name="gridLayout_72">
-                  <item row="1" column="1">
-                   <layout class="QHBoxLayout" name="horizontalLayout_dunnett_group"/>
-                  </item>
-                  <item row="0" column="1">
-                   <widget class="QLabel" name="label_114">
-                    <property name="sizePolicy">
-                     <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
-                      <horstretch>0</horstretch>
-                      <verstretch>0</verstretch>
-                     </sizepolicy>
-                    </property>
-                    <property name="text">
-                     <string>Groups (Default all)</string>
-                    </property>
-                   </widget>
-                  </item>
                   <item row="1" column="0">
                    <widget class="QComboBox" name="comboBox_dunnett_control_group"/>
                   </item>
-                  <item row="0" column="0">
-                   <widget class="QLabel" name="label_115">
-                    <property name="text">
-                     <string>Control Group</string>
-                    </property>
-                   </widget>
-                  </item>
-                  <item row="2" column="0">
-                   <widget class="QCheckBox" name="checkBox_comparing_group_control_in_condition">
-                    <property name="sizePolicy">
-                     <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
-                      <horstretch>0</horstretch>
-                      <verstretch>0</verstretch>
-                     </sizepolicy>
-                    </property>
-                    <property name="text">
-                     <string>Comparing in Each Condition</string>
-                    </property>
-                   </widget>
-                  </item>
                   <item row="2" column="1">
                    <layout class="QHBoxLayout" name="horizontalLayout_24">
                     <item>
@@ -5321,8 +5268,61 @@
                     </item>
                    </layout>
                   </item>
+                  <item row="2" column="0">
+                   <widget class="QCheckBox" name="checkBox_comparing_group_control_in_condition">
+                    <property name="sizePolicy">
+                     <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
+                      <horstretch>0</horstretch>
+                      <verstretch>0</verstretch>
+                     </sizepolicy>
+                    </property>
+                    <property name="text">
+                     <string>Comparing in Each Condition</string>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="1" column="1">
+                   <layout class="QHBoxLayout" name="horizontalLayout_dunnett_group"/>
+                  </item>
+                  <item row="0" column="1">
+                   <widget class="QLabel" name="label_114">
+                    <property name="sizePolicy">
+                     <sizepolicy hsizetype="Preferred" vsizetype="Fixed">
+                      <horstretch>0</horstretch>
+                      <verstretch>0</verstretch>
+                     </sizepolicy>
+                    </property>
+                    <property name="text">
+                     <string>Groups (Default all)</string>
+                    </property>
+                   </widget>
+                  </item>
+                  <item row="0" column="0">
+                   <widget class="QLabel" name="label_115">
+                    <property name="text">
+                     <string>Control Group</string>
+                    </property>
+                   </widget>
+                  </item>
                  </layout>
                 </item>
+                <item row="9" column="1" colspan="2">
+                 <widget class="Line" name="line_26">
+                  <property name="orientation">
+                   <enum>Qt::Horizontal</enum>
+                  </property>
+                 </widget>
+                </item>
+                <item row="10" column="2">
+                 <widget class="QPushButton" name="pushButton_multi_deseq2">
+                  <property name="enabled">
+                   <bool>false</bool>
+                  </property>
+                  <property name="text">
+                   <string>Run Deseq2</string>
+                  </property>
+                 </widget>
+                </item>
                </layout>
               </widget>
               <widget class="QWidget" name="tab_19">
@@ -6207,7 +6207,7 @@
                   <property name="maximumSize">
                    <size>
                     <width>16777215</width>
-                    <height>220</height>
+                    <height>240</height>
                    </size>
                   </property>
                   <property name="title">
@@ -7437,7 +7437,7 @@
                   <property name="maximumSize">
                    <size>
                     <width>16777215</width>
-                    <height>220</height>
+                    <height>240</height>
                    </size>
                   </property>
                   <property name="title">
@@ -7454,8 +7454,8 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>620</width>
-                        <height>65</height>
+                        <width>1016</width>
+                        <height>105</height>
                        </rect>
                       </property>
                       <layout class="QGridLayout" name="gridLayout_57">
@@ -7819,7 +7819,7 @@
                <enum>QTabWidget::Triangular</enum>
               </property>
               <property name="currentIndex">
-               <number>0</number>
+               <number>1</number>
               </property>
               <widget class="QWidget" name="tab_8">
                <attribute name="title">
@@ -8146,7 +8146,7 @@
                   <property name="maximumSize">
                    <size>
                     <width>16777215</width>
-                    <height>220</height>
+                    <height>240</height>
                    </size>
                   </property>
                   <property name="title">
@@ -9258,7 +9258,7 @@
                   <property name="maximumSize">
                    <size>
                     <width>16777215</width>
-                    <height>220</height>
+                    <height>240</height>
                    </size>
                   </property>
                   <property name="title">
@@ -9275,8 +9275,8 @@
                        <rect>
                         <x>0</x>
                         <y>0</y>
-                        <width>383</width>
-                        <height>68</height>
+                        <width>1016</width>
+                        <height>141</height>
                        </rect>
                       </property>
                       <layout class="QGridLayout" name="gridLayout_66">
diff --git a/metax/gui/metax_gui/ui_main_window.py b/metax/gui/metax_gui/ui_main_window.py
index efb045f..f1c6c9b 100644
--- a/metax/gui/metax_gui/ui_main_window.py
+++ b/metax/gui/metax_gui/ui_main_window.py
@@ -147,7 +147,7 @@ def setupUi(self, metaX_main):
         self.toolBox_2.setMaximumSize(QtCore.QSize(1677, 16777215))
         self.toolBox_2.setObjectName("toolBox_2")
         self.page_2 = QtWidgets.QWidget()
-        self.page_2.setGeometry(QtCore.QRect(0, 0, 391, 80))
+        self.page_2.setGeometry(QtCore.QRect(0, 0, 528, 534))
         self.page_2.setObjectName("page_2")
         self.gridLayout_27 = QtWidgets.QGridLayout(self.page_2)
         self.gridLayout_27.setObjectName("gridLayout_27")
@@ -730,7 +730,7 @@ def setupUi(self, metaX_main):
         self.line_7.setObjectName("line_7")
         self.gridLayout_26.addWidget(self.line_7, 1, 0, 1, 3)
         self.groupBox_basic_plot = QtWidgets.QGroupBox(self.tab_12)
-        self.groupBox_basic_plot.setMaximumSize(QtCore.QSize(16777215, 280))
+        self.groupBox_basic_plot.setMaximumSize(QtCore.QSize(16777215, 300))
         self.groupBox_basic_plot.setObjectName("groupBox_basic_plot")
         self.gridLayout_40 = QtWidgets.QGridLayout(self.groupBox_basic_plot)
         self.gridLayout_40.setObjectName("gridLayout_40")
@@ -744,7 +744,7 @@ def setupUi(self, metaX_main):
         self.scrollArea.setWidgetResizable(True)
         self.scrollArea.setObjectName("scrollArea")
         self.scrollAreaWidgetContents = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 660, 232))
+        self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 1016, 232))
         self.scrollAreaWidgetContents.setObjectName("scrollAreaWidgetContents")
         self.gridLayout_34 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents)
         self.gridLayout_34.setObjectName("gridLayout_34")
@@ -1400,7 +1400,7 @@ def setupUi(self, metaX_main):
         self.pushButton_basic_heatmap_add.setObjectName("pushButton_basic_heatmap_add")
         self.gridLayout_23.addWidget(self.pushButton_basic_heatmap_add, 5, 3, 1, 1)
         self.groupBox_basic_heatmap_plot_settings = QtWidgets.QGroupBox(self.tab_13)
-        self.groupBox_basic_heatmap_plot_settings.setMaximumSize(QtCore.QSize(16777215, 280))
+        self.groupBox_basic_heatmap_plot_settings.setMaximumSize(QtCore.QSize(16777215, 300))
         self.groupBox_basic_heatmap_plot_settings.setObjectName("groupBox_basic_heatmap_plot_settings")
         self.gridLayout_41 = QtWidgets.QGridLayout(self.groupBox_basic_heatmap_plot_settings)
         self.gridLayout_41.setObjectName("gridLayout_41")
@@ -1408,7 +1408,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_2.setWidgetResizable(True)
         self.scrollArea_2.setObjectName("scrollArea_2")
         self.scrollAreaWidgetContents_2 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 621, 150))
+        self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 999, 150))
         self.scrollAreaWidgetContents_2.setObjectName("scrollAreaWidgetContents_2")
         self.gridLayout_50 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_2)
         self.gridLayout_50.setObjectName("gridLayout_50")
@@ -1943,7 +1943,7 @@ def setupUi(self, metaX_main):
         self.gridLayout_46.addWidget(self.checkBox_2, 1, 0, 1, 1)
         self.gridLayout_75.addLayout(self.gridLayout_46, 0, 0, 1, 1)
         self.groupBox_cross_heatmap_settings = QtWidgets.QGroupBox(self.groupBox_cross_heatmap_plot)
-        self.groupBox_cross_heatmap_settings.setMaximumSize(QtCore.QSize(16777215, 240))
+        self.groupBox_cross_heatmap_settings.setMaximumSize(QtCore.QSize(16777215, 280))
         self.groupBox_cross_heatmap_settings.setObjectName("groupBox_cross_heatmap_settings")
         self.gridLayout_52 = QtWidgets.QGridLayout(self.groupBox_cross_heatmap_settings)
         self.gridLayout_52.setObjectName("gridLayout_52")
@@ -1957,7 +1957,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_cross_heatmap_settings.setWidgetResizable(True)
         self.scrollArea_cross_heatmap_settings.setObjectName("scrollArea_cross_heatmap_settings")
         self.scrollAreaWidgetContents_3 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_3.setGeometry(QtCore.QRect(0, 0, 878, 128))
+        self.scrollAreaWidgetContents_3.setGeometry(QtCore.QRect(0, 0, 1020, 128))
         self.scrollAreaWidgetContents_3.setObjectName("scrollAreaWidgetContents_3")
         self.gridLayout_38 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_3)
         self.gridLayout_38.setObjectName("gridLayout_38")
@@ -2591,7 +2591,7 @@ def setupUi(self, metaX_main):
         self.pushButton_dunnett_test = QtWidgets.QPushButton(self.tab_16)
         self.pushButton_dunnett_test.setEnabled(False)
         self.pushButton_dunnett_test.setObjectName("pushButton_dunnett_test")
-        self.gridLayout_33.addWidget(self.pushButton_dunnett_test, 10, 1, 1, 2)
+        self.gridLayout_33.addWidget(self.pushButton_dunnett_test, 10, 1, 1, 1)
         self.horizontalLayout_39 = QtWidgets.QHBoxLayout()
         self.horizontalLayout_39.setObjectName("horizontalLayout_39")
         self.label_112 = QtWidgets.QLabel(self.tab_16)
@@ -2636,42 +2636,11 @@ def setupUi(self, metaX_main):
         self.horizontalLayout_73.addWidget(self.comboBox_group_control_condition_group)
         self.horizontalLayout_39.addLayout(self.horizontalLayout_73)
         self.gridLayout_33.addLayout(self.horizontalLayout_39, 1, 1, 1, 2)
-        self.line_26 = QtWidgets.QFrame(self.tab_16)
-        self.line_26.setFrameShape(QtWidgets.QFrame.HLine)
-        self.line_26.setFrameShadow(QtWidgets.QFrame.Sunken)
-        self.line_26.setObjectName("line_26")
-        self.gridLayout_33.addWidget(self.line_26, 9, 1, 1, 2)
-        self.pushButton_multi_deseq2 = QtWidgets.QPushButton(self.tab_16)
-        self.pushButton_multi_deseq2.setEnabled(False)
-        self.pushButton_multi_deseq2.setObjectName("pushButton_multi_deseq2")
-        self.gridLayout_33.addWidget(self.pushButton_multi_deseq2, 11, 1, 1, 2)
         self.gridLayout_72 = QtWidgets.QGridLayout()
         self.gridLayout_72.setObjectName("gridLayout_72")
-        self.horizontalLayout_dunnett_group = QtWidgets.QHBoxLayout()
-        self.horizontalLayout_dunnett_group.setObjectName("horizontalLayout_dunnett_group")
-        self.gridLayout_72.addLayout(self.horizontalLayout_dunnett_group, 1, 1, 1, 1)
-        self.label_114 = QtWidgets.QLabel(self.tab_16)
-        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed)
-        sizePolicy.setHorizontalStretch(0)
-        sizePolicy.setVerticalStretch(0)
-        sizePolicy.setHeightForWidth(self.label_114.sizePolicy().hasHeightForWidth())
-        self.label_114.setSizePolicy(sizePolicy)
-        self.label_114.setObjectName("label_114")
-        self.gridLayout_72.addWidget(self.label_114, 0, 1, 1, 1)
         self.comboBox_dunnett_control_group = QtWidgets.QComboBox(self.tab_16)
         self.comboBox_dunnett_control_group.setObjectName("comboBox_dunnett_control_group")
         self.gridLayout_72.addWidget(self.comboBox_dunnett_control_group, 1, 0, 1, 1)
-        self.label_115 = QtWidgets.QLabel(self.tab_16)
-        self.label_115.setObjectName("label_115")
-        self.gridLayout_72.addWidget(self.label_115, 0, 0, 1, 1)
-        self.checkBox_comparing_group_control_in_condition = QtWidgets.QCheckBox(self.tab_16)
-        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed)
-        sizePolicy.setHorizontalStretch(0)
-        sizePolicy.setVerticalStretch(0)
-        sizePolicy.setHeightForWidth(self.checkBox_comparing_group_control_in_condition.sizePolicy().hasHeightForWidth())
-        self.checkBox_comparing_group_control_in_condition.setSizePolicy(sizePolicy)
-        self.checkBox_comparing_group_control_in_condition.setObjectName("checkBox_comparing_group_control_in_condition")
-        self.gridLayout_72.addWidget(self.checkBox_comparing_group_control_in_condition, 2, 0, 1, 1)
         self.horizontalLayout_24 = QtWidgets.QHBoxLayout()
         self.horizontalLayout_24.setObjectName("horizontalLayout_24")
         self.label_140 = QtWidgets.QLabel(self.tab_16)
@@ -2687,7 +2656,38 @@ def setupUi(self, metaX_main):
         self.comboBox_group_control_comparing_each_condition_meta.setObjectName("comboBox_group_control_comparing_each_condition_meta")
         self.horizontalLayout_24.addWidget(self.comboBox_group_control_comparing_each_condition_meta)
         self.gridLayout_72.addLayout(self.horizontalLayout_24, 2, 1, 1, 1)
+        self.checkBox_comparing_group_control_in_condition = QtWidgets.QCheckBox(self.tab_16)
+        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed)
+        sizePolicy.setHorizontalStretch(0)
+        sizePolicy.setVerticalStretch(0)
+        sizePolicy.setHeightForWidth(self.checkBox_comparing_group_control_in_condition.sizePolicy().hasHeightForWidth())
+        self.checkBox_comparing_group_control_in_condition.setSizePolicy(sizePolicy)
+        self.checkBox_comparing_group_control_in_condition.setObjectName("checkBox_comparing_group_control_in_condition")
+        self.gridLayout_72.addWidget(self.checkBox_comparing_group_control_in_condition, 2, 0, 1, 1)
+        self.horizontalLayout_dunnett_group = QtWidgets.QHBoxLayout()
+        self.horizontalLayout_dunnett_group.setObjectName("horizontalLayout_dunnett_group")
+        self.gridLayout_72.addLayout(self.horizontalLayout_dunnett_group, 1, 1, 1, 1)
+        self.label_114 = QtWidgets.QLabel(self.tab_16)
+        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed)
+        sizePolicy.setHorizontalStretch(0)
+        sizePolicy.setVerticalStretch(0)
+        sizePolicy.setHeightForWidth(self.label_114.sizePolicy().hasHeightForWidth())
+        self.label_114.setSizePolicy(sizePolicy)
+        self.label_114.setObjectName("label_114")
+        self.gridLayout_72.addWidget(self.label_114, 0, 1, 1, 1)
+        self.label_115 = QtWidgets.QLabel(self.tab_16)
+        self.label_115.setObjectName("label_115")
+        self.gridLayout_72.addWidget(self.label_115, 0, 0, 1, 1)
         self.gridLayout_33.addLayout(self.gridLayout_72, 4, 1, 1, 2)
+        self.line_26 = QtWidgets.QFrame(self.tab_16)
+        self.line_26.setFrameShape(QtWidgets.QFrame.HLine)
+        self.line_26.setFrameShadow(QtWidgets.QFrame.Sunken)
+        self.line_26.setObjectName("line_26")
+        self.gridLayout_33.addWidget(self.line_26, 9, 1, 1, 2)
+        self.pushButton_multi_deseq2 = QtWidgets.QPushButton(self.tab_16)
+        self.pushButton_multi_deseq2.setEnabled(False)
+        self.pushButton_multi_deseq2.setObjectName("pushButton_multi_deseq2")
+        self.gridLayout_33.addWidget(self.pushButton_multi_deseq2, 10, 2, 1, 1)
         self.tabWidget_3.addTab(self.tab_16, "")
         self.tab_19 = QtWidgets.QWidget()
         self.tab_19.setObjectName("tab_19")
@@ -3184,7 +3184,7 @@ def setupUi(self, metaX_main):
         self.gridLayout_co_expr_sample.setObjectName("gridLayout_co_expr_sample")
         self.gridLayout_47.addLayout(self.gridLayout_co_expr_sample, 3, 1, 1, 3)
         self.groupBox_co_expression_plot_settings = QtWidgets.QGroupBox(self.tab_5)
-        self.groupBox_co_expression_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220))
+        self.groupBox_co_expression_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240))
         self.groupBox_co_expression_plot_settings.setObjectName("groupBox_co_expression_plot_settings")
         self.gridLayout_56 = QtWidgets.QGridLayout(self.groupBox_co_expression_plot_settings)
         self.gridLayout_56.setObjectName("gridLayout_56")
@@ -3829,7 +3829,7 @@ def setupUi(self, metaX_main):
         self.label_100.setObjectName("label_100")
         self.gridLayout_24.addWidget(self.label_100, 5, 0, 1, 1)
         self.groupBox_expression_trends_plot_settings = QtWidgets.QGroupBox(self.tab_15)
-        self.groupBox_expression_trends_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220))
+        self.groupBox_expression_trends_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240))
         self.groupBox_expression_trends_plot_settings.setObjectName("groupBox_expression_trends_plot_settings")
         self.gridLayout_60 = QtWidgets.QGridLayout(self.groupBox_expression_trends_plot_settings)
         self.gridLayout_60.setObjectName("gridLayout_60")
@@ -3837,7 +3837,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_5.setWidgetResizable(True)
         self.scrollArea_5.setObjectName("scrollArea_5")
         self.scrollAreaWidgetContents_6 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 620, 65))
+        self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 1016, 105))
         self.scrollAreaWidgetContents_6.setObjectName("scrollAreaWidgetContents_6")
         self.gridLayout_57 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_6)
         self.gridLayout_57.setObjectName("gridLayout_57")
@@ -4210,7 +4210,7 @@ def setupUi(self, metaX_main):
         self.label_149.setObjectName("label_149")
         self.gridLayout_4.addWidget(self.label_149, 0, 0, 1, 1)
         self.groupBox_taxa_func_link_plot_settings = QtWidgets.QGroupBox(self.tab_8)
-        self.groupBox_taxa_func_link_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220))
+        self.groupBox_taxa_func_link_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240))
         self.groupBox_taxa_func_link_plot_settings.setObjectName("groupBox_taxa_func_link_plot_settings")
         self.gridLayout_65 = QtWidgets.QGridLayout(self.groupBox_taxa_func_link_plot_settings)
         self.gridLayout_65.setObjectName("gridLayout_65")
@@ -4838,7 +4838,7 @@ def setupUi(self, metaX_main):
         self.pushButton_plot_network.setObjectName("pushButton_plot_network")
         self.gridLayout_6.addWidget(self.pushButton_plot_network, 10, 1, 1, 3)
         self.groupBox_taxa_func_link_net_plot_settings = QtWidgets.QGroupBox(self.tab_9)
-        self.groupBox_taxa_func_link_net_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220))
+        self.groupBox_taxa_func_link_net_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240))
         self.groupBox_taxa_func_link_net_plot_settings.setObjectName("groupBox_taxa_func_link_net_plot_settings")
         self.gridLayout_63 = QtWidgets.QGridLayout(self.groupBox_taxa_func_link_net_plot_settings)
         self.gridLayout_63.setObjectName("gridLayout_63")
@@ -4846,7 +4846,7 @@ def setupUi(self, metaX_main):
         self.scrollArea_7.setWidgetResizable(True)
         self.scrollArea_7.setObjectName("scrollArea_7")
         self.scrollAreaWidgetContents_8 = QtWidgets.QWidget()
-        self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 383, 68))
+        self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 1016, 141))
         self.scrollAreaWidgetContents_8.setObjectName("scrollAreaWidgetContents_8")
         self.gridLayout_66 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_8)
         self.gridLayout_66.setObjectName("gridLayout_66")
@@ -5417,12 +5417,12 @@ def setupUi(self, metaX_main):
 
         self.retranslateUi(metaX_main)
         self.stackedWidget.setCurrentIndex(0)
-        self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(2)
+        self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(4)
         self.toolBox_2.setCurrentIndex(0)
         self.tabWidget_4.setCurrentIndex(1)
-        self.tabWidget_3.setCurrentIndex(3)
+        self.tabWidget_3.setCurrentIndex(2)
         self.tabWidget.setCurrentIndex(1)
-        self.tabWidget_2.setCurrentIndex(0)
+        self.tabWidget_2.setCurrentIndex(1)
         self.tabWidget_6.setCurrentIndex(1)
         self.toolBox_metalab_res_anno.setCurrentIndex(0)
         self.tabWidget_5.setCurrentIndex(0)
@@ -5839,11 +5839,11 @@ def retranslateUi(self, metaX_main):
         self.comboBox_table_for_dunnett.setItemText(3, _translate("metaX_main", "peptides"))
         self.label_113.setText(_translate("metaX_main", "Meta"))
         self.checkBox_group_control_in_condition.setText(_translate("metaX_main", "In Condition"))
-        self.pushButton_multi_deseq2.setText(_translate("metaX_main", "Run Deseq2"))
+        self.label_140.setText(_translate("metaX_main", " By:"))
+        self.checkBox_comparing_group_control_in_condition.setText(_translate("metaX_main", "Comparing in Each Condition"))
         self.label_114.setText(_translate("metaX_main", "Groups (Default all)"))
         self.label_115.setText(_translate("metaX_main", "Control Group"))
-        self.checkBox_comparing_group_control_in_condition.setText(_translate("metaX_main", "Comparing in Each Condition"))
-        self.label_140.setText(_translate("metaX_main", " By:"))
+        self.pushButton_multi_deseq2.setText(_translate("metaX_main", "Run Deseq2"))
         self.tabWidget_3.setTabText(self.tabWidget_3.indexOf(self.tab_16), _translate("metaX_main", "Group-Control TEST "))
         self.label_166.setText(_translate("metaX_main", "Groups"))
         self.pushButton_deseq2.setText(_translate("metaX_main", "Run DESeq2"))
diff --git a/metax/taxafunc_analyzer/analyzer.py b/metax/taxafunc_analyzer/analyzer.py
index b21e774..540e421 100644
--- a/metax/taxafunc_analyzer/analyzer.py
+++ b/metax/taxafunc_analyzer/analyzer.py
@@ -54,6 +54,7 @@ def __init__(
 
         self.peptide_col_name = peptide_col_name
         self.protein_col_name = protein_col_name
+        self.protein_separator = ';'
         self.custom_col_name = custom_col_name
         self.sample_list: Optional[List[str]] = None
         self.meta_df: Optional[pd.DataFrame] = None
@@ -78,6 +79,7 @@ def __init__(
         self.any_df_mode = any_df_mode  # if True, the consider the TaxaFunc df as other_df
         self.custom_df: Optional[pd.DataFrame] = None # other df, any df that user want to add
         self.peptide_num_used = {'taxa': 0, 'func': 0, 'taxa_func': 0, 'protein': 0}
+        self.distinct_peptides_list: list|None = None
         
         self.split_func_status:bool = False
         self.split_func_sep:str = ''
@@ -689,6 +691,18 @@ def run_lfq_for_taxa_func(self, df_taxa_func):
         
         return df_taxa_func
     
+    def calculate_distinct_peptides(self): #! NOT USED YET
+        # extract the peptide column and protein_col_name
+        print("Calculating distinct peptides list...")
+        extract_cols = [self.peptide_col_name, self.protein_col_name]
+        df = self.original_df[extract_cols]
+        separate_protein = self.protein_separator
+        df['protein_num'] = df[self.protein_col_name].apply(lambda x: len(x.split(separate_protein)))
+        df = df[df['protein_num'] == 1]
+        distinct_peptides = df[self.peptide_col_name].tolist()
+        self.distinct_peptides_list = distinct_peptides
+        
+    
     def update_data_preprocess_parameters(self, data_preprocess_params):
         
         normalize_method = data_preprocess_params['normalize_method']
@@ -706,13 +720,12 @@ def update_data_preprocess_parameters(self, data_preprocess_params):
         
         return data_preprocess_params
 
-    def filter_peptides_num_for_splited_func(self, df, peptide_num_threshold, df_type, distinct_threshold_mode=False):
+    def filter_peptides_num_for_splited_func(self, df, peptide_num_threshold, df_type):
         '''
         Only for the splited func table or taxa_func table
         - df: the splited func table or taxa_func table which has been grouped, index is the func or taxa_func
         - peptide_num_threshold: the threshold of peptide number for each func or taxa_func
         - df_type: 'func' or 'taxa_func'
-        - distinct_threshold_mode: TODO
         '''
         
         valid_df_types = ['func', 'taxa_func']
@@ -750,6 +763,49 @@ def filter_peptides_num(self, df, peptide_num_threshold, df_type, distinct_thres
         else:
             item_col = 'Taxon' if df_type == 'taxa' else self.func_name
 
+        # # if True: #! Need to be implemented
+        # if distinct_threshold_mode:
+        #     if self.distinct_peptides_list is None:
+        #         self.calculate_distinct_peptides()
+            
+        #     peptides_in_taxa_func = defaultdict(list)
+        #     peptides_in_taxa = defaultdict(list)
+        #     peptides_in_func = defaultdict(list)
+        #     skiped_peptides_list = []
+        #     for row in tqdm(df.itertuples(index=False), total=len(df), desc="Creating peptides_dict"):
+        #         peptide = row[0]
+        #         if peptide not in self.distinct_peptides_list:
+        #             skiped_peptides_list.append(peptide)
+        #             continue
+                
+        #         if df_type == 'taxa':
+        #             taxa = row[1]
+        #             # Append peptide to taxa list
+        #             peptides_in_taxa[taxa].append(peptide)
+                    
+        #         if self.split_func_status:
+        #                 func_list = [f.strip() for f in row[2].split(self.split_func_sep)]
+        #                 # Process each function in the func_list
+        #                 for func in func_list:
+        #                     peptides_in_func[func].append(peptide)
+        #                     taxa_func = f'{taxa}&&&&{func}'
+        #                     peptides_in_taxa_func[taxa_func].append(peptide)
+        #         else:
+        #             if df_type in ['func', 'taxa_func']:
+        #                 taxa = row[1]
+        #                 func = row[2]
+        #                 # Append peptide to func list
+        #                 peptides_in_func[func].append(peptide)
+        #                 # Create combined key for taxa_func
+        #                 taxa_func = f'{taxa}&&&&{func}'
+        #                 peptides_in_taxa_func[taxa_func].append(peptide)
+
+        #     peitides_dict = {'taxa': peptides_in_taxa, 'func': peptides_in_func, 'taxa_func': peptides_in_taxa_func}
+        #     remove_list = [k for k, v in peitides_dict[df_type].items() if len(v) < peptide_num]
+        #     skiped_peptides_list = set(skiped_peptides_list)
+
+
+        # else:                
         # Group by item_col and filter based on peptide number
         dict_item_pep_num = df.groupby(item_col).size().to_dict()
         remove_list = [k for k, v in dict_item_pep_num.items() if v < peptide_num]
@@ -761,7 +817,7 @@ def filter_peptides_num(self, df, peptide_num_threshold, df_type, distinct_thres
             df = df.drop('taxa_func', axis=1)
 
         self.peptide_num_used[df_type] = len(df)
-        print(f"Removed [{len(remove_list)} {df_type}] from [{df_original_len - len(df)} Peptides] with less than [{peptide_num}] peptides.")
+        print(f"Removed [{len(set((remove_list)))} {df_type}] from [{df_original_len - len(df)} Peptides] with less than [{peptide_num}] peptides.")
 
         return df
 
@@ -819,7 +875,10 @@ def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00,
             self.peptide_num_used['protein'] = 0
             sum_protein_params['quant_method'] = quant_method
             df_peptide_for_protein = self.detect_and_handle_outliers(df=self.original_df, **outlier_params)
-            self.protein_df = SumProteinIntensity(taxa_func_analyzer=self, df=df_peptide_for_protein).sum_protein_intensity( **sum_protein_params)
+            self.protein_df = SumProteinIntensity(taxa_func_analyzer=self, df=df_peptide_for_protein,
+                                                  peptide_num_threshold=sum_protein_params['peptide_num_threshold'],
+                                                  protein_separator = self.protein_separator
+                                                  ).sum_protein_intensity( **sum_protein_params)
             self.protein_df = self.data_preprocess(df=self.protein_df,df_name = 'protein', 
                                                    **data_preprocess_params)
             
@@ -1105,9 +1164,10 @@ def get_df(self, table_name:str = 'taxa'):
                                                 'batch_meta': 'None', 
                                                 'processing_order': ['transform', 'normalize', 'batch']},
                     peptide_num_threshold = {'taxa': 3, 'func': 3, 'taxa_func': 3},
-                    keep_unknow_func=False, sum_protein=False, 
-                    sum_protein_params = {'method': 'razor', 'by_sample': False, 'rank_method': 'unique_counts', 'greedy_method': 'heap', 'peptide_num_threshold': 3},
-                    split_func=True, split_func_params = {'split_by': '|', 'share_intensity': False},
+                    keep_unknow_func=False,
+                    sum_protein=True, 
+                    sum_protein_params = {'method': 'anti-razor', 'by_sample': False, 'rank_method': 'unique_counts', 'greedy_method': 'heap', 'peptide_num_threshold': 3},
+                    split_func=False, split_func_params = {'split_by': '|', 'share_intensity': False},
                     taxa_and_func_only_from_otf=False, quant_method='sum'
                     )
 
diff --git a/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py b/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py
index 7b20bda..5caf2d1 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py
@@ -130,7 +130,8 @@ def get_mini_target_set(self, greedy_method='heap'):
         self.greedy_method = greedy_method
         print('Start to get minimum target set using method: [razor]')
         # only extract the peptide and target columns
-        extract_cols = [self.column_map['peptide'], self.column_map['target']] + self.column_map['sample_list'] if self.column_map['sample_list'] else []
+        extract_cols = [self.column_map['peptide'], self.column_map['target']]
+        extract_cols = extract_cols + self.column_map['sample_list'] if self.column_map['sample_list'] else extract_cols
         # if NA in target column, or '', raise error
         if self.df[self.column_map['target']].isna().any() or '' in self.df[self.column_map['target']].values:
             raise ValueError(f'NA or empty value in target column: {self.column_map["target"]}')
diff --git a/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py b/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py
index 17209aa..d6c0e26 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py
@@ -69,7 +69,7 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
         greedy_method: str, default 'heap'. only used for `razor` method
             options: ['greedy', 'heap']
         peptide_num_threshold: int, default None
-            the protein must have at least 3 peptides to be considered as a target
+            the protein must have at least number peptides to be considered as a target
         quant_method: str, default 'sum'
             options: ['sum', 'lfq']
         '''
@@ -82,23 +82,10 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
         if peptide_num_threshold is not None:
             self.peptide_num_threshold = peptide_num_threshold
         
-        # remove the protein with less than the threshold of peptides
-        # use teh methood in RazorSum
-        razor_integrator = RazorSum(df=self.df, 
-                                    column_map={
-                                                'peptide': self.tfa.peptide_col_name,
-                                                'target': self.tfa.protein_col_name,
-                                                'sample_list': self.tfa.sample_list,
-                                            }, 
-                                    peptide_num_threshold=self.peptide_num_threshold, 
-                                    share_intensity=self.share_intensity, 
-                                    greedy_method=greedy_method,
-                                    protein_separator= self.protein_separator)
-        
         self.rank_method = rank_method
         self.check_protein_col()
         
-        self.df = razor_integrator.remove_protein_less_than_threshold()
+        #innitialize the peptide number used as the total number of peptides
         self.tfa.peptide_num_used['protein'] = len(self.df)
         
         if method == 'rank':
@@ -121,8 +108,19 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
                     self._sum_protein_rank(sample, by_sample)
         elif method == 'razor':
             print('start to sum protein intensity using method: [razor]')
+            # use teh methood in RazorSum
+            razor_integrator = RazorSum(df=self.df, 
+                                        column_map={
+                                                    'peptide': self.tfa.peptide_col_name,
+                                                    'target': self.tfa.protein_col_name,
+                                                    'sample_list': self.tfa.sample_list,
+                                                }, 
+                                        peptide_num_threshold=self.peptide_num_threshold, 
+                                        share_intensity=self.share_intensity, 
+                                        greedy_method=greedy_method,
+                                        protein_separator= self.protein_separator)
             if quant_method == 'sum':
-                razor_integrator.peptide_num_threshold = 1 # set the threshold to 1, to avoid run filter again
+                # razor_integrator.peptide_num_threshold = 1 # set the threshold to 1, to avoid run filter again
                 res_df = razor_integrator.sum_protein_intensity(greedy_method=greedy_method)
             elif quant_method == 'lfq':
                 from .lfq import run_lfq
@@ -137,10 +135,13 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
                 # move teh 2 columns to the front
                 res_df = res_df[['peptides', 'peptide_num'] + [col for col in res_df.columns if col not in ['peptides', 'peptide_num']]]
                 
+            self.tfa.peptide_num_used['protein'] = len(razor_integrator.df)
             return res_df       
         
         elif method == 'anti-razor':
             print(f"\n-------------Start to sum protein intensity using method: [{method}]  by_sample: [True] rank_method: [Shared]-------------")
+            #calculate the peptide number for each protein
+            self.filter_protein_by_peptide_num()
             for sample in self.tfa.sample_list:
                 self._sum_protein_anti_razor(sample)
         
@@ -161,6 +162,30 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
         
         return res_df
 
+
+    def filter_protein_by_peptide_num(self):
+        if self.peptide_num_threshold < 2:
+            return self.df
+        else:
+            peptide_col_name = self.tfa.peptide_col_name
+            protein_col_name = self.tfa.protein_col_name
+            df= self.df.copy()
+            target_to_peptides = defaultdict(set)
+            for _, row in tqdm(df.iterrows(), total=df.shape[0], desc="Creating target to peptides mapping"):
+                sequence = row[peptide_col_name]
+                targets = row[protein_col_name].split(self.protein_separator)
+                for target in targets:
+                    target_to_peptides[target].add(sequence)
+            proteins_less_than_threshold = [target for target, peps in target_to_peptides.items() if len(peps) < self.peptide_num_threshold]
+            print(f'Number of proteins with less than {self.peptide_num_threshold} peptides: {len(proteins_less_than_threshold)}')
+            # remove the proteins with less than 3 peptides from the protein column of the df
+            df[protein_col_name] = df[protein_col_name].apply(lambda x: ';'.join([protein for protein in x.split(self.protein_separator) if protein not in proteins_less_than_threshold]))
+            self.df[protein_col_name] = df[protein_col_name]
+            # remove the row with empty protein
+            self.df = self.df[self.df[protein_col_name].str.strip() != '']
+            self.tfa.peptide_num_used['protein'] = len(self.df)
+            return self.df
+    
     # razor method
     def find_minimum_protein_set(self, peptides, protein_to_peptides):
         protein_to_peptides_copy = protein_to_peptides.copy()
diff --git a/pyproject.toml b/pyproject.toml
index b99ae8e..bb5194f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "MetaXTools"
-version = "1.115.3"
+version = "1.115.4"
 description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics."
 readme = "README_PyPi.md"
 license = { text = "NorthOmics" }