diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md
index fd2c231..9e78d66 100644
--- a/Docs/ChangeLog.md
+++ b/Docs/ChangeLog.md
@@ -1,9 +1,24 @@
+# Version: 1.115.4
+## Date: 2024-10-07
+### Changes:
+- Fix: Fixed the bug of when use Anydata moed, the report will raise error.
+- Change: changed the approche of filter the minimum number of peptides threshold for the protein.(Avaliable for Razor and Anti-Razor method)
+
+# Version: 1.115.3
+## Date: 2024-10-04
+### Changes:
+- Fix: Fixed the filter of pvalue or padj option not work for functional redundancy analysis in T-Test part.
+
+# Version: 1.115.2
+## Date: 2024-10-04
+### Changes:
+- New: added a new Normalization method: "Trace Shifting" for the data preprossing.
+
# Version: 1.115.1
## Date: 2024-10-03
### Changes:
- New: added multiprocessing option for LFQ.
-
# Version: 1.115.0
## Date: 2024-10-02
### Changes:
diff --git a/Docs/MetaX_Cookbook.md b/Docs/MetaX_Cookbook.md
index d496bc0..1808fe0 100644
--- a/Docs/MetaX_Cookbook.md
+++ b/Docs/MetaX_Cookbook.md
@@ -339,7 +339,11 @@ There are several methods for detecting and handling outliers.
- **Data Normalization:**
+ - **Trace Shifting:** Reframing the Normalization Problem with Intensity traces (inspired by DirectLFQ).
+ - Note: If both trace shifting and transformation are applied, *normalization will be done before transformation.*
+
- Standard Scaling (Z-Score), Min-Max Scaling, Pareto Scaling, Mean centring and Normalization by sum.
+
If you use [Z-Score, Mean centring and Pareto Scaling] data normalization, the data will be given a minimum offset again to avoid negative values.
diff --git a/metax/gui/main_gui.py b/metax/gui/main_gui.py
index 3e133cc..7230595 100644
--- a/metax/gui/main_gui.py
+++ b/metax/gui/main_gui.py
@@ -993,10 +993,14 @@ def update_method_of_protein_inference(self):
self.checkBox_infrence_protein_by_sample.setChecked(True)
self.checkBox_infrence_protein_by_sample.setEnabled(False)
self.comboBox_protein_ranking_method.setEnabled(False)
+ # enable the peptide_num_threshold
+ self.spinBox_peptide_num_threshold_protein.setEnabled(True)
else: # method is ["rank"]
self.checkBox_infrence_protein_by_sample.setEnabled(True)
self.comboBox_protein_ranking_method.setEnabled(True)
self.checkBox_infrence_protein_by_sample.setChecked(False)
+ # disable the peptide_num_threshold
+ self.spinBox_peptide_num_threshold_protein.setEnabled(False)
@@ -1853,12 +1857,11 @@ def run_after_set_multi_tables(self):
# Final message
if self.tfa.any_df_mode:
- num_item = self.tfa.custom_df.shape[0]
+ original_num_peptide = self.tfa.custom_df.shape[0]
msg = f"""
Custom data is ready!
- {nan_stats_str}
- Number of items: [{num_item}]
+ Number of items: [{original_num_peptide}]
"""
@@ -1890,7 +1893,7 @@ def run_after_set_multi_tables(self):
Category |
- Number |
+ Number (After Filtering) |
Used Peptides |
% of All Peptides |
@@ -2619,6 +2622,7 @@ def set_multi_table(self, restore_taxafunc=False, saved_obj=None):
}
normalize_dict = {
"None": None,
+ "Trace Shifting": "trace_shift",
"Mean centering": "mean",
"Standard Scaling (Z-Score)": "zscore",
"Min-Max Scaling": "minmax",
@@ -4880,12 +4884,13 @@ def t_test(self):
try:
self.pushButton_ttest.setEnabled(False)
group_list = [group1, group2]
- table_names = []
+ table_names = [] # reset table_names as empty list
if df_type == 'Significant Taxa-Func'.lower():
p_value = self.doubleSpinBox_top_heatmap_pvalue.value()
p_value = round(p_value, 4)
+ p_type = self.comboBox_top_heatmap_p_type.currentText()
- ttest_sig_tf_params = {'group_list': group_list, 'p_value': p_value, 'condition': condition}
+ ttest_sig_tf_params = {'group_list': group_list, 'p_value': p_value, 'condition': condition, "p_type": p_type}
self.run_in_new_window(self.tfa.CrossTest.get_stats_diff_taxa_but_func, callback= self.callback_after_ttest, **ttest_sig_tf_params)
diff --git a/metax/gui/metax_gui/main_window.ui b/metax/gui/metax_gui/main_window.ui
index 22e9bc0..818a6dc 100644
--- a/metax/gui/metax_gui/main_window.ui
+++ b/metax/gui/metax_gui/main_window.ui
@@ -46,7 +46,7 @@
Qt::LeftToRight
- 2
+ 4
false
@@ -939,6 +939,11 @@
None
+ -
+
+ Trace Shifting
+
+
-
Standard Scaling (Z-Score)
@@ -1471,7 +1476,7 @@
16777215
- 280
+ 300
@@ -1500,7 +1505,7 @@
0
0
- 660
+ 1016
232
@@ -2754,7 +2759,7 @@
16777215
- 280
+ 300
@@ -3745,7 +3750,7 @@
16777215
- 240
+ 280
@@ -4796,7 +4801,7 @@
QTabWidget::Triangular
- 3
+ 2
@@ -5139,7 +5144,7 @@
- -
+
-
false
@@ -5234,64 +5239,11 @@
- -
-
-
- Qt::Horizontal
-
-
-
- -
-
-
- false
-
-
- Run Deseq2
-
-
-
-
-
-
-
-
- -
-
-
-
- 0
- 0
-
-
-
- Groups (Default all)
-
-
-
-
- -
-
-
- Control Group
-
-
-
- -
-
-
-
- 0
- 0
-
-
-
- Comparing in Each Condition
-
-
-
-
-
@@ -5316,8 +5268,61 @@
+ -
+
+
+
+ 0
+ 0
+
+
+
+ Comparing in Each Condition
+
+
+
+ -
+
+
+ -
+
+
+
+ 0
+ 0
+
+
+
+ Groups (Default all)
+
+
+
+ -
+
+
+ Control Group
+
+
+
+ -
+
+
+ Qt::Horizontal
+
+
+
+ -
+
+
+ false
+
+
+ Run Deseq2
+
+
+
@@ -5668,8 +5673,8 @@
0
0
- 996
- 103
+ 535
+ 94
@@ -6202,7 +6207,7 @@
16777215
- 220
+ 240
@@ -7432,7 +7437,7 @@
16777215
- 220
+ 240
@@ -7814,7 +7819,7 @@
QTabWidget::Triangular
- 0
+ 1
@@ -8141,7 +8146,7 @@
16777215
- 220
+ 240
@@ -8158,8 +8163,8 @@
0
0
- 1016
- 185
+ 775
+ 102
@@ -9253,7 +9258,7 @@
16777215
- 220
+ 240
@@ -9270,8 +9275,8 @@
0
0
- 383
- 68
+ 1016
+ 141
diff --git a/metax/gui/metax_gui/ui_main_window.py b/metax/gui/metax_gui/ui_main_window.py
index f077358..f1c6c9b 100644
--- a/metax/gui/metax_gui/ui_main_window.py
+++ b/metax/gui/metax_gui/ui_main_window.py
@@ -484,6 +484,7 @@ def setupUi(self, metaX_main):
self.comboBox_set_data_normalization.addItem("")
self.comboBox_set_data_normalization.addItem("")
self.comboBox_set_data_normalization.addItem("")
+ self.comboBox_set_data_normalization.addItem("")
self.gridLayout_15.addWidget(self.comboBox_set_data_normalization, 7, 1, 1, 3)
self.comboBox_remove_batch_effect = QtWidgets.QComboBox(self.tab_set_taxa_func)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed)
@@ -729,7 +730,7 @@ def setupUi(self, metaX_main):
self.line_7.setObjectName("line_7")
self.gridLayout_26.addWidget(self.line_7, 1, 0, 1, 3)
self.groupBox_basic_plot = QtWidgets.QGroupBox(self.tab_12)
- self.groupBox_basic_plot.setMaximumSize(QtCore.QSize(16777215, 280))
+ self.groupBox_basic_plot.setMaximumSize(QtCore.QSize(16777215, 300))
self.groupBox_basic_plot.setObjectName("groupBox_basic_plot")
self.gridLayout_40 = QtWidgets.QGridLayout(self.groupBox_basic_plot)
self.gridLayout_40.setObjectName("gridLayout_40")
@@ -743,7 +744,7 @@ def setupUi(self, metaX_main):
self.scrollArea.setWidgetResizable(True)
self.scrollArea.setObjectName("scrollArea")
self.scrollAreaWidgetContents = QtWidgets.QWidget()
- self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 660, 232))
+ self.scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 1016, 232))
self.scrollAreaWidgetContents.setObjectName("scrollAreaWidgetContents")
self.gridLayout_34 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents)
self.gridLayout_34.setObjectName("gridLayout_34")
@@ -1399,7 +1400,7 @@ def setupUi(self, metaX_main):
self.pushButton_basic_heatmap_add.setObjectName("pushButton_basic_heatmap_add")
self.gridLayout_23.addWidget(self.pushButton_basic_heatmap_add, 5, 3, 1, 1)
self.groupBox_basic_heatmap_plot_settings = QtWidgets.QGroupBox(self.tab_13)
- self.groupBox_basic_heatmap_plot_settings.setMaximumSize(QtCore.QSize(16777215, 280))
+ self.groupBox_basic_heatmap_plot_settings.setMaximumSize(QtCore.QSize(16777215, 300))
self.groupBox_basic_heatmap_plot_settings.setObjectName("groupBox_basic_heatmap_plot_settings")
self.gridLayout_41 = QtWidgets.QGridLayout(self.groupBox_basic_heatmap_plot_settings)
self.gridLayout_41.setObjectName("gridLayout_41")
@@ -1942,7 +1943,7 @@ def setupUi(self, metaX_main):
self.gridLayout_46.addWidget(self.checkBox_2, 1, 0, 1, 1)
self.gridLayout_75.addLayout(self.gridLayout_46, 0, 0, 1, 1)
self.groupBox_cross_heatmap_settings = QtWidgets.QGroupBox(self.groupBox_cross_heatmap_plot)
- self.groupBox_cross_heatmap_settings.setMaximumSize(QtCore.QSize(16777215, 240))
+ self.groupBox_cross_heatmap_settings.setMaximumSize(QtCore.QSize(16777215, 280))
self.groupBox_cross_heatmap_settings.setObjectName("groupBox_cross_heatmap_settings")
self.gridLayout_52 = QtWidgets.QGridLayout(self.groupBox_cross_heatmap_settings)
self.gridLayout_52.setObjectName("gridLayout_52")
@@ -2590,7 +2591,7 @@ def setupUi(self, metaX_main):
self.pushButton_dunnett_test = QtWidgets.QPushButton(self.tab_16)
self.pushButton_dunnett_test.setEnabled(False)
self.pushButton_dunnett_test.setObjectName("pushButton_dunnett_test")
- self.gridLayout_33.addWidget(self.pushButton_dunnett_test, 10, 1, 1, 2)
+ self.gridLayout_33.addWidget(self.pushButton_dunnett_test, 10, 1, 1, 1)
self.horizontalLayout_39 = QtWidgets.QHBoxLayout()
self.horizontalLayout_39.setObjectName("horizontalLayout_39")
self.label_112 = QtWidgets.QLabel(self.tab_16)
@@ -2635,42 +2636,11 @@ def setupUi(self, metaX_main):
self.horizontalLayout_73.addWidget(self.comboBox_group_control_condition_group)
self.horizontalLayout_39.addLayout(self.horizontalLayout_73)
self.gridLayout_33.addLayout(self.horizontalLayout_39, 1, 1, 1, 2)
- self.line_26 = QtWidgets.QFrame(self.tab_16)
- self.line_26.setFrameShape(QtWidgets.QFrame.HLine)
- self.line_26.setFrameShadow(QtWidgets.QFrame.Sunken)
- self.line_26.setObjectName("line_26")
- self.gridLayout_33.addWidget(self.line_26, 9, 1, 1, 2)
- self.pushButton_multi_deseq2 = QtWidgets.QPushButton(self.tab_16)
- self.pushButton_multi_deseq2.setEnabled(False)
- self.pushButton_multi_deseq2.setObjectName("pushButton_multi_deseq2")
- self.gridLayout_33.addWidget(self.pushButton_multi_deseq2, 11, 1, 1, 2)
self.gridLayout_72 = QtWidgets.QGridLayout()
self.gridLayout_72.setObjectName("gridLayout_72")
- self.horizontalLayout_dunnett_group = QtWidgets.QHBoxLayout()
- self.horizontalLayout_dunnett_group.setObjectName("horizontalLayout_dunnett_group")
- self.gridLayout_72.addLayout(self.horizontalLayout_dunnett_group, 1, 1, 1, 1)
- self.label_114 = QtWidgets.QLabel(self.tab_16)
- sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed)
- sizePolicy.setHorizontalStretch(0)
- sizePolicy.setVerticalStretch(0)
- sizePolicy.setHeightForWidth(self.label_114.sizePolicy().hasHeightForWidth())
- self.label_114.setSizePolicy(sizePolicy)
- self.label_114.setObjectName("label_114")
- self.gridLayout_72.addWidget(self.label_114, 0, 1, 1, 1)
self.comboBox_dunnett_control_group = QtWidgets.QComboBox(self.tab_16)
self.comboBox_dunnett_control_group.setObjectName("comboBox_dunnett_control_group")
self.gridLayout_72.addWidget(self.comboBox_dunnett_control_group, 1, 0, 1, 1)
- self.label_115 = QtWidgets.QLabel(self.tab_16)
- self.label_115.setObjectName("label_115")
- self.gridLayout_72.addWidget(self.label_115, 0, 0, 1, 1)
- self.checkBox_comparing_group_control_in_condition = QtWidgets.QCheckBox(self.tab_16)
- sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed)
- sizePolicy.setHorizontalStretch(0)
- sizePolicy.setVerticalStretch(0)
- sizePolicy.setHeightForWidth(self.checkBox_comparing_group_control_in_condition.sizePolicy().hasHeightForWidth())
- self.checkBox_comparing_group_control_in_condition.setSizePolicy(sizePolicy)
- self.checkBox_comparing_group_control_in_condition.setObjectName("checkBox_comparing_group_control_in_condition")
- self.gridLayout_72.addWidget(self.checkBox_comparing_group_control_in_condition, 2, 0, 1, 1)
self.horizontalLayout_24 = QtWidgets.QHBoxLayout()
self.horizontalLayout_24.setObjectName("horizontalLayout_24")
self.label_140 = QtWidgets.QLabel(self.tab_16)
@@ -2686,7 +2656,38 @@ def setupUi(self, metaX_main):
self.comboBox_group_control_comparing_each_condition_meta.setObjectName("comboBox_group_control_comparing_each_condition_meta")
self.horizontalLayout_24.addWidget(self.comboBox_group_control_comparing_each_condition_meta)
self.gridLayout_72.addLayout(self.horizontalLayout_24, 2, 1, 1, 1)
+ self.checkBox_comparing_group_control_in_condition = QtWidgets.QCheckBox(self.tab_16)
+ sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed)
+ sizePolicy.setHorizontalStretch(0)
+ sizePolicy.setVerticalStretch(0)
+ sizePolicy.setHeightForWidth(self.checkBox_comparing_group_control_in_condition.sizePolicy().hasHeightForWidth())
+ self.checkBox_comparing_group_control_in_condition.setSizePolicy(sizePolicy)
+ self.checkBox_comparing_group_control_in_condition.setObjectName("checkBox_comparing_group_control_in_condition")
+ self.gridLayout_72.addWidget(self.checkBox_comparing_group_control_in_condition, 2, 0, 1, 1)
+ self.horizontalLayout_dunnett_group = QtWidgets.QHBoxLayout()
+ self.horizontalLayout_dunnett_group.setObjectName("horizontalLayout_dunnett_group")
+ self.gridLayout_72.addLayout(self.horizontalLayout_dunnett_group, 1, 1, 1, 1)
+ self.label_114 = QtWidgets.QLabel(self.tab_16)
+ sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed)
+ sizePolicy.setHorizontalStretch(0)
+ sizePolicy.setVerticalStretch(0)
+ sizePolicy.setHeightForWidth(self.label_114.sizePolicy().hasHeightForWidth())
+ self.label_114.setSizePolicy(sizePolicy)
+ self.label_114.setObjectName("label_114")
+ self.gridLayout_72.addWidget(self.label_114, 0, 1, 1, 1)
+ self.label_115 = QtWidgets.QLabel(self.tab_16)
+ self.label_115.setObjectName("label_115")
+ self.gridLayout_72.addWidget(self.label_115, 0, 0, 1, 1)
self.gridLayout_33.addLayout(self.gridLayout_72, 4, 1, 1, 2)
+ self.line_26 = QtWidgets.QFrame(self.tab_16)
+ self.line_26.setFrameShape(QtWidgets.QFrame.HLine)
+ self.line_26.setFrameShadow(QtWidgets.QFrame.Sunken)
+ self.line_26.setObjectName("line_26")
+ self.gridLayout_33.addWidget(self.line_26, 9, 1, 1, 2)
+ self.pushButton_multi_deseq2 = QtWidgets.QPushButton(self.tab_16)
+ self.pushButton_multi_deseq2.setEnabled(False)
+ self.pushButton_multi_deseq2.setObjectName("pushButton_multi_deseq2")
+ self.gridLayout_33.addWidget(self.pushButton_multi_deseq2, 10, 2, 1, 1)
self.tabWidget_3.addTab(self.tab_16, "")
self.tab_19 = QtWidgets.QWidget()
self.tab_19.setObjectName("tab_19")
@@ -2890,7 +2891,7 @@ def setupUi(self, metaX_main):
self.scrollArea_3.setWidgetResizable(True)
self.scrollArea_3.setObjectName("scrollArea_3")
self.scrollAreaWidgetContents_4 = QtWidgets.QWidget()
- self.scrollAreaWidgetContents_4.setGeometry(QtCore.QRect(0, 0, 996, 103))
+ self.scrollAreaWidgetContents_4.setGeometry(QtCore.QRect(0, 0, 535, 94))
self.scrollAreaWidgetContents_4.setObjectName("scrollAreaWidgetContents_4")
self.gridLayout_68 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_4)
self.gridLayout_68.setObjectName("gridLayout_68")
@@ -3183,7 +3184,7 @@ def setupUi(self, metaX_main):
self.gridLayout_co_expr_sample.setObjectName("gridLayout_co_expr_sample")
self.gridLayout_47.addLayout(self.gridLayout_co_expr_sample, 3, 1, 1, 3)
self.groupBox_co_expression_plot_settings = QtWidgets.QGroupBox(self.tab_5)
- self.groupBox_co_expression_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220))
+ self.groupBox_co_expression_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240))
self.groupBox_co_expression_plot_settings.setObjectName("groupBox_co_expression_plot_settings")
self.gridLayout_56 = QtWidgets.QGridLayout(self.groupBox_co_expression_plot_settings)
self.gridLayout_56.setObjectName("gridLayout_56")
@@ -3828,7 +3829,7 @@ def setupUi(self, metaX_main):
self.label_100.setObjectName("label_100")
self.gridLayout_24.addWidget(self.label_100, 5, 0, 1, 1)
self.groupBox_expression_trends_plot_settings = QtWidgets.QGroupBox(self.tab_15)
- self.groupBox_expression_trends_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220))
+ self.groupBox_expression_trends_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240))
self.groupBox_expression_trends_plot_settings.setObjectName("groupBox_expression_trends_plot_settings")
self.gridLayout_60 = QtWidgets.QGridLayout(self.groupBox_expression_trends_plot_settings)
self.gridLayout_60.setObjectName("gridLayout_60")
@@ -4209,7 +4210,7 @@ def setupUi(self, metaX_main):
self.label_149.setObjectName("label_149")
self.gridLayout_4.addWidget(self.label_149, 0, 0, 1, 1)
self.groupBox_taxa_func_link_plot_settings = QtWidgets.QGroupBox(self.tab_8)
- self.groupBox_taxa_func_link_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220))
+ self.groupBox_taxa_func_link_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240))
self.groupBox_taxa_func_link_plot_settings.setObjectName("groupBox_taxa_func_link_plot_settings")
self.gridLayout_65 = QtWidgets.QGridLayout(self.groupBox_taxa_func_link_plot_settings)
self.gridLayout_65.setObjectName("gridLayout_65")
@@ -4217,7 +4218,7 @@ def setupUi(self, metaX_main):
self.scrollArea_6.setWidgetResizable(True)
self.scrollArea_6.setObjectName("scrollArea_6")
self.scrollAreaWidgetContents_7 = QtWidgets.QWidget()
- self.scrollAreaWidgetContents_7.setGeometry(QtCore.QRect(0, 0, 1016, 185))
+ self.scrollAreaWidgetContents_7.setGeometry(QtCore.QRect(0, 0, 775, 102))
self.scrollAreaWidgetContents_7.setObjectName("scrollAreaWidgetContents_7")
self.gridLayout_69 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_7)
self.gridLayout_69.setObjectName("gridLayout_69")
@@ -4837,7 +4838,7 @@ def setupUi(self, metaX_main):
self.pushButton_plot_network.setObjectName("pushButton_plot_network")
self.gridLayout_6.addWidget(self.pushButton_plot_network, 10, 1, 1, 3)
self.groupBox_taxa_func_link_net_plot_settings = QtWidgets.QGroupBox(self.tab_9)
- self.groupBox_taxa_func_link_net_plot_settings.setMaximumSize(QtCore.QSize(16777215, 220))
+ self.groupBox_taxa_func_link_net_plot_settings.setMaximumSize(QtCore.QSize(16777215, 240))
self.groupBox_taxa_func_link_net_plot_settings.setObjectName("groupBox_taxa_func_link_net_plot_settings")
self.gridLayout_63 = QtWidgets.QGridLayout(self.groupBox_taxa_func_link_net_plot_settings)
self.gridLayout_63.setObjectName("gridLayout_63")
@@ -4845,7 +4846,7 @@ def setupUi(self, metaX_main):
self.scrollArea_7.setWidgetResizable(True)
self.scrollArea_7.setObjectName("scrollArea_7")
self.scrollAreaWidgetContents_8 = QtWidgets.QWidget()
- self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 383, 68))
+ self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 1016, 141))
self.scrollAreaWidgetContents_8.setObjectName("scrollAreaWidgetContents_8")
self.gridLayout_66 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_8)
self.gridLayout_66.setObjectName("gridLayout_66")
@@ -5416,12 +5417,12 @@ def setupUi(self, metaX_main):
self.retranslateUi(metaX_main)
self.stackedWidget.setCurrentIndex(0)
- self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(2)
+ self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(4)
self.toolBox_2.setCurrentIndex(0)
self.tabWidget_4.setCurrentIndex(1)
- self.tabWidget_3.setCurrentIndex(3)
+ self.tabWidget_3.setCurrentIndex(2)
self.tabWidget.setCurrentIndex(1)
- self.tabWidget_2.setCurrentIndex(0)
+ self.tabWidget_2.setCurrentIndex(1)
self.tabWidget_6.setCurrentIndex(1)
self.toolBox_metalab_res_anno.setCurrentIndex(0)
self.tabWidget_5.setCurrentIndex(0)
@@ -5555,11 +5556,12 @@ def retranslateUi(self, metaX_main):
self.comboBox_outlier_handling_method2.setItemText(4, _translate("metaX_main", "regression"))
self.label_102.setText(_translate("metaX_main", "Outliers Handling by"))
self.comboBox_set_data_normalization.setItemText(0, _translate("metaX_main", "None"))
- self.comboBox_set_data_normalization.setItemText(1, _translate("metaX_main", "Standard Scaling (Z-Score)"))
- self.comboBox_set_data_normalization.setItemText(2, _translate("metaX_main", "Min-Max Scaling"))
- self.comboBox_set_data_normalization.setItemText(3, _translate("metaX_main", "Pareto Scaling"))
- self.comboBox_set_data_normalization.setItemText(4, _translate("metaX_main", "Mean centering"))
- self.comboBox_set_data_normalization.setItemText(5, _translate("metaX_main", "Normalization by sum"))
+ self.comboBox_set_data_normalization.setItemText(1, _translate("metaX_main", "Trace Shifting"))
+ self.comboBox_set_data_normalization.setItemText(2, _translate("metaX_main", "Standard Scaling (Z-Score)"))
+ self.comboBox_set_data_normalization.setItemText(3, _translate("metaX_main", "Min-Max Scaling"))
+ self.comboBox_set_data_normalization.setItemText(4, _translate("metaX_main", "Pareto Scaling"))
+ self.comboBox_set_data_normalization.setItemText(5, _translate("metaX_main", "Mean centering"))
+ self.comboBox_set_data_normalization.setItemText(6, _translate("metaX_main", "Normalization by sum"))
self.comboBox_remove_batch_effect.setItemText(0, _translate("metaX_main", "None"))
self.label_41.setText(_translate("metaX_main", "Data Normalization"))
self.label_43.setText(_translate("metaX_main", "Batch Effect Correction"))
@@ -5837,11 +5839,11 @@ def retranslateUi(self, metaX_main):
self.comboBox_table_for_dunnett.setItemText(3, _translate("metaX_main", "peptides"))
self.label_113.setText(_translate("metaX_main", "Meta"))
self.checkBox_group_control_in_condition.setText(_translate("metaX_main", "In Condition"))
- self.pushButton_multi_deseq2.setText(_translate("metaX_main", "Run Deseq2"))
+ self.label_140.setText(_translate("metaX_main", " By:"))
+ self.checkBox_comparing_group_control_in_condition.setText(_translate("metaX_main", "Comparing in Each Condition"))
self.label_114.setText(_translate("metaX_main", "Groups (Default all)"))
self.label_115.setText(_translate("metaX_main", "Control Group"))
- self.checkBox_comparing_group_control_in_condition.setText(_translate("metaX_main", "Comparing in Each Condition"))
- self.label_140.setText(_translate("metaX_main", " By:"))
+ self.pushButton_multi_deseq2.setText(_translate("metaX_main", "Run Deseq2"))
self.tabWidget_3.setTabText(self.tabWidget_3.indexOf(self.tab_16), _translate("metaX_main", "Group-Control TEST "))
self.label_166.setText(_translate("metaX_main", "Groups"))
self.pushButton_deseq2.setText(_translate("metaX_main", "Run DESeq2"))
diff --git a/metax/taxafunc_analyzer/analyzer.py b/metax/taxafunc_analyzer/analyzer.py
index 8dbb7ac..540e421 100644
--- a/metax/taxafunc_analyzer/analyzer.py
+++ b/metax/taxafunc_analyzer/analyzer.py
@@ -54,6 +54,7 @@ def __init__(
self.peptide_col_name = peptide_col_name
self.protein_col_name = protein_col_name
+ self.protein_separator = ';'
self.custom_col_name = custom_col_name
self.sample_list: Optional[List[str]] = None
self.meta_df: Optional[pd.DataFrame] = None
@@ -78,6 +79,7 @@ def __init__(
self.any_df_mode = any_df_mode # if True, the consider the TaxaFunc df as other_df
self.custom_df: Optional[pd.DataFrame] = None # other df, any df that user want to add
self.peptide_num_used = {'taxa': 0, 'func': 0, 'taxa_func': 0, 'protein': 0}
+ self.distinct_peptides_list: list|None = None
self.split_func_status:bool = False
self.split_func_sep:str = ''
@@ -688,12 +690,144 @@ def run_lfq_for_taxa_func(self, df_taxa_func):
df_taxa_func = df_taxa_func.set_index(['Taxon', self.func_name], drop=True)
return df_taxa_func
+
+ def calculate_distinct_peptides(self): #! NOT USED YET
+ # extract the peptide column and protein_col_name
+ print("Calculating distinct peptides list...")
+ extract_cols = [self.peptide_col_name, self.protein_col_name]
+ df = self.original_df[extract_cols]
+ separate_protein = self.protein_separator
+ df['protein_num'] = df[self.protein_col_name].apply(lambda x: len(x.split(separate_protein)))
+ df = df[df['protein_num'] == 1]
+ distinct_peptides = df[self.peptide_col_name].tolist()
+ self.distinct_peptides_list = distinct_peptides
+
+
+ def update_data_preprocess_parameters(self, data_preprocess_params):
+
+ normalize_method = data_preprocess_params['normalize_method']
+ transform_method = data_preprocess_params['transform_method']
+ processing_order = data_preprocess_params['processing_order']
+
+ if 'trace_shift' == normalize_method and transform_method not in ['None', None]:
+ print(f'Warning: [Trace Shifting] and {transform_method} are both set, Normalize will be prior to Transform.')
+ # move 'normalize' to the first
+ processing_order = ['normalize'] + [i for i in processing_order if i != 'normalize']
+ print(f'Data Preprocessing order: {processing_order}')
+
+ data_preprocess_params['processing_order'] = processing_order
+
+
+ return data_preprocess_params
+
+ def filter_peptides_num_for_splited_func(self, df, peptide_num_threshold, df_type):
+ '''
+ Only for the splited func table or taxa_func table
+ - df: the splited func table or taxa_func table which has been grouped, index is the func or taxa_func
+ - peptide_num_threshold: the threshold of peptide number for each func or taxa_func
+ - df_type: 'func' or 'taxa_func'
+ '''
+
+ valid_df_types = ['func', 'taxa_func']
+ if df_type not in valid_df_types:
+ raise ValueError(f"df_type must be one of {valid_df_types}, your input is [{df_type}]")
+
+ peptide_num= peptide_num_threshold[df_type]
+ df_original_len = len(df)
+
+ df = df[df['peptide_num'] >= peptide_num]
+ print(f"Removed [{df_original_len - len(df)} {df_type}] with less than [{peptide_num}] peptides.")
+ return df
+
+
+
+
+ def filter_peptides_num(self, df, peptide_num_threshold, df_type, distinct_threshold_mode=False):
+ '''
+ Filter the peptides based on the peptide number threshold
+ - df: the original df including peptides, taxa, and functions, etc.
+ - peptide_num_threshold: the threshold of peptide number for each taxa or func
+ - df_type: 'taxa', 'func', or 'taxa_func'
+ - distinct_threshold_mode: TODO
+ '''
+ valid_df_types = ['taxa', 'func', 'taxa_func']
+ if df_type not in valid_df_types:
+ raise ValueError(f"df_type must be one of {valid_df_types}, your input is [{df_type}]")
+
+ peptide_num= peptide_num_threshold[df_type]
+ df_original_len = len(df)
+
+ if df_type == 'taxa_func':
+ item_col = 'taxa_func'
+ df['taxa_func'] = df['Taxon'] + '&&&&' + df[self.func_name]
+ else:
+ item_col = 'Taxon' if df_type == 'taxa' else self.func_name
+
+ # # if True: #! Need to be implemented
+ # if distinct_threshold_mode:
+ # if self.distinct_peptides_list is None:
+ # self.calculate_distinct_peptides()
+
+ # peptides_in_taxa_func = defaultdict(list)
+ # peptides_in_taxa = defaultdict(list)
+ # peptides_in_func = defaultdict(list)
+ # skiped_peptides_list = []
+ # for row in tqdm(df.itertuples(index=False), total=len(df), desc="Creating peptides_dict"):
+ # peptide = row[0]
+ # if peptide not in self.distinct_peptides_list:
+ # skiped_peptides_list.append(peptide)
+ # continue
+
+ # if df_type == 'taxa':
+ # taxa = row[1]
+ # # Append peptide to taxa list
+ # peptides_in_taxa[taxa].append(peptide)
+
+ # if self.split_func_status:
+ # func_list = [f.strip() for f in row[2].split(self.split_func_sep)]
+ # # Process each function in the func_list
+ # for func in func_list:
+ # peptides_in_func[func].append(peptide)
+ # taxa_func = f'{taxa}&&&&{func}'
+ # peptides_in_taxa_func[taxa_func].append(peptide)
+ # else:
+ # if df_type in ['func', 'taxa_func']:
+ # taxa = row[1]
+ # func = row[2]
+ # # Append peptide to func list
+ # peptides_in_func[func].append(peptide)
+ # # Create combined key for taxa_func
+ # taxa_func = f'{taxa}&&&&{func}'
+ # peptides_in_taxa_func[taxa_func].append(peptide)
+
+ # peitides_dict = {'taxa': peptides_in_taxa, 'func': peptides_in_func, 'taxa_func': peptides_in_taxa_func}
+ # remove_list = [k for k, v in peitides_dict[df_type].items() if len(v) < peptide_num]
+ # skiped_peptides_list = set(skiped_peptides_list)
+
+
+ # else:
+ # Group by item_col and filter based on peptide number
+ dict_item_pep_num = df.groupby(item_col).size().to_dict()
+ remove_list = [k for k, v in dict_item_pep_num.items() if v < peptide_num]
+
+ # Remove rows based on peptide number threshold
+ df = df[~df[item_col].isin(remove_list)]
+
+ if df_type == 'taxa_func':
+ df = df.drop('taxa_func', axis=1)
+
+ self.peptide_num_used[df_type] = len(df)
+ print(f"Removed [{len(set((remove_list)))} {df_type}] from [{df_original_len - len(df)} Peptides] with less than [{peptide_num}] peptides.")
+
+ return df
+
+
def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00,
outlier_params: dict = {'detect_method': None, 'handle_method': None,
"detection_by_group" : None, "handle_by_group": None},
data_preprocess_params: dict = {'normalize_method': None, 'transform_method': None,
- 'batch_meta': None, 'processing_order': None},
+ 'batch_meta': None, 'processing_order': ['transform', 'normalize', 'batch']},
peptide_num_threshold: dict = {'taxa': 1, 'func': 1, 'taxa_func': 1},
sum_protein:bool = False, sum_protein_params: dict = {'method': 'razor',
'by_sample': False,
@@ -731,8 +865,8 @@ def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00,
return
#! fllowing code is for the normal mode
- # add 'peptide_num_threshold' to 'data_preprocess_params
- data_preprocess_params['peptide_num_threshold'] = peptide_num_threshold
+ # Update 'data_preprocess_params'
+ data_preprocess_params = self.update_data_preprocess_parameters(data_preprocess_params)
#2. sum the protein intensity
if sum_protein:
@@ -741,7 +875,10 @@ def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00,
self.peptide_num_used['protein'] = 0
sum_protein_params['quant_method'] = quant_method
df_peptide_for_protein = self.detect_and_handle_outliers(df=self.original_df, **outlier_params)
- self.protein_df = SumProteinIntensity(taxa_func_analyzer=self, df=df_peptide_for_protein).sum_protein_intensity( **sum_protein_params)
+ self.protein_df = SumProteinIntensity(taxa_func_analyzer=self, df=df_peptide_for_protein,
+ peptide_num_threshold=sum_protein_params['peptide_num_threshold'],
+ protein_separator = self.protein_separator
+ ).sum_protein_intensity( **sum_protein_params)
self.protein_df = self.data_preprocess(df=self.protein_df,df_name = 'protein',
**data_preprocess_params)
@@ -802,10 +939,15 @@ def strip_taxa(x, level):
if not taxa_and_func_only_from_otf:
# extract 'taxa', sample intensity #! and 'peptide_col' to avoid the duplicated items when handling outlier
- df_taxa_pep = df_filtered_peptides[[self.peptide_col_name,'Taxon'] + self.sample_list]
+ df_taxa_pep = df_filtered_peptides[[self.peptide_col_name,'Taxon'] + self.sample_list] # type: ignore
print("\n-----Starting to perform outlier detection and handling for [Peptide-Taxon] table...-----")
df_taxa_pep = self.detect_and_handle_outliers(df=df_taxa_pep, **outlier_params)
- self.peptide_num_used['taxa'] = len(df_taxa_pep)
+ #TODO: use the peptide number after filtering the minimum peptide number
+ # statastic the peptide number of each taxa
+ df_taxa_pep = self.filter_peptides_num(df=df_taxa_pep, peptide_num_threshold=peptide_num_threshold, df_type='taxa')
+
+
+ # self.peptide_num_used['taxa'] = len(df_taxa_pep)
# add column 'peptide_num' to df_taxa as 1
df_taxa_pep['peptide_num'] = 1
@@ -825,7 +967,8 @@ def strip_taxa(x, level):
df_func_pep = df_func_pep[[self.peptide_col_name, self.func_name] + self.sample_list]
print("\n-----Starting to perform outlier detection and handling for [Peptide-Function] table...-----")
df_func_pep = self.detect_and_handle_outliers(df=df_func_pep, **outlier_params)
- self.peptide_num_used['func'] = len(df_func_pep)
+ if not split_func:
+ df_func_pep = self.filter_peptides_num(df=df_func_pep, peptide_num_threshold=peptide_num_threshold, df_type='func')
df_func_pep['peptide_num'] = 1
if quant_method == 'lfq':
@@ -834,8 +977,10 @@ def strip_taxa(x, level):
df_func = df_func_pep.groupby(self.func_name).sum(numeric_only=True)
if split_func:
+ self.peptide_num_used['func'] = len(df_func_pep)
df_func = self.split_func(df=df_func, split_func_params=split_func_params, df_type='func')
-
+ df_func = self.filter_peptides_num_for_splited_func(df=df_func, peptide_num_threshold=peptide_num_threshold, df_type='func')
+
df_func = self.data_preprocess(df=df_func,df_name = 'func', **data_preprocess_params)
self.func_df = df_func
#-----Func Table End-----
@@ -868,6 +1013,8 @@ def strip_taxa(x, level):
# ----- create taxa_func table -----
df_taxa_func = df_half_processed_peptides[[self.peptide_col_name, 'Taxon', self.func_name] + self.sample_list]
df_taxa_func['peptide_num'] = 1
+ if not split_func:
+ df_taxa_func = self.filter_peptides_num(df=df_taxa_func, peptide_num_threshold=peptide_num_threshold, df_type='taxa_func')
for key in ['taxa_func', 'taxa', 'func']:
self.peptide_num_used[key] = len(df_taxa_func) if self.peptide_num_used[key] == 0 else self.peptide_num_used[key]
@@ -880,6 +1027,9 @@ def strip_taxa(x, level):
# split the function before data preprocess
if split_func:
df_taxa_func = self.split_func( df=df_taxa_func, split_func_params=split_func_params, df_type='taxa_func')
+ df_taxa_func = self.filter_peptides_num_for_splited_func(df=df_taxa_func, peptide_num_threshold=peptide_num_threshold,
+ df_type='taxa_func')
+
print("\n-----Starting to perform data pre-processing for [Taxa-Function] table...-----")
df_taxa_func_all_processed = self.data_preprocess(df=df_taxa_func
@@ -1006,18 +1156,19 @@ def get_df(self, table_name:str = 'taxa'):
sw.set_func('KEGG_Pathway_name')
sw.set_group('Individual')
sw.set_multi_tables(level='s',
- outlier_params = {'detect_method': 'zero-dominant', 'handle_method': 'original',
+ outlier_params = {'detect_method': 'None', 'handle_method': 'original',
"detection_by_group" : 'Individual', "handle_by_group": None},
data_preprocess_params = {
- 'normalize_method': None,
+ 'normalize_method': 'None',
'transform_method': "log2",
'batch_meta': 'None',
- 'processing_order': None},
- peptide_num_threshold = {'taxa': 2, 'func': 2, 'taxa_func': 2},
- keep_unknow_func=False, sum_protein=False,
- sum_protein_params = {'method': 'razor', 'by_sample': False, 'rank_method': 'unique_counts', 'greedy_method': 'heap', 'peptide_num_threshold': 3},
- split_func=True, split_func_params = {'split_by': '|', 'share_intensity': False},
- taxa_and_func_only_from_otf=False, quant_method='lfq'
+ 'processing_order': ['transform', 'normalize', 'batch']},
+ peptide_num_threshold = {'taxa': 3, 'func': 3, 'taxa_func': 3},
+ keep_unknow_func=False,
+ sum_protein=True,
+ sum_protein_params = {'method': 'anti-razor', 'by_sample': False, 'rank_method': 'unique_counts', 'greedy_method': 'heap', 'peptide_num_threshold': 3},
+ split_func=False, split_func_params = {'split_by': '|', 'share_intensity': False},
+ taxa_and_func_only_from_otf=False, quant_method='sum'
)
sw.check_attributes()
\ No newline at end of file
diff --git a/metax/taxafunc_analyzer/analyzer_utils/cross_test.py b/metax/taxafunc_analyzer/analyzer_utils/cross_test.py
index 1139341..6ae5e51 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/cross_test.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/cross_test.py
@@ -598,7 +598,7 @@ def get_stats_diff_taxa_but_func(self, group_list: list|None = None, p_value: fl
condition:list|None =None, p_type: str = 'padj'
) -> tuple:
p_col_name = 'pvalue' if p_type == 'pvalue' else 'padj'
-
+ print(f"Using [{p_col_name}] for filtering")
# calculate the test result if not given
if taxa_res_df is None or func_res_df is None or taxa_func_res_df is None:
print("No test result given, calculating the test result first")
diff --git a/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py b/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py
index 9df4db8..a7ab723 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py
@@ -101,6 +101,10 @@ def _data_transform(self, df: pd.DataFrame, transform_method: str|None = None) -
def _data_normalization(self, df: pd.DataFrame, normalize_method: str|None = None) -> pd.DataFrame:
+ def trace_shift(x):
+ from .lfq import run_normalization
+ return run_normalization(x)
+
if normalize_method is None:
print('normalize_method is not set, data normalization did not perform.')
else:
@@ -117,7 +121,8 @@ def _data_normalization(self, df: pd.DataFrame, normalize_method: str|None = Non
'sum': lambda x: x / (x.sum() + epsilon),
'minmax': lambda x: (x - x.min()) / (x.max() - x.min()),
'zscore': lambda x: (x - x.mean()) / (x.std() + epsilon),
- 'pareto': lambda x: (x - x.mean()) / (np.sqrt(x.std() + epsilon))
+ 'pareto': lambda x: (x - x.mean()) / (np.sqrt(x.std() + epsilon)),
+ 'trace_shift': lambda x: trace_shift(x)
}
if normalize_method in normalize_operations:
@@ -609,7 +614,7 @@ def detect_and_handle_outliers(self, df: pd.DataFrame,
def data_preprocess(self, df: pd.DataFrame, normalize_method: str|None = None,
transform_method: str|None = None, batch_meta: str|None =None,
processing_order:list|None =None,
- df_name:str = "None", peptide_num_threshold:dict[str, int] ={'taxa': 1, 'func': 1, 'taxa_func': 1}
+ df_name:str = "None"
) -> pd.DataFrame:
"""
## `data_preprocess` Method
@@ -624,6 +629,7 @@ def data_preprocess(self, df: pd.DataFrame, normalize_method: str|None = None,
- `normalize_method` (`str`, optional):
Method used for data normalization. Options include:
- `None`: No normalization.
+ - `trace_shift`: Trace shift normalization inspired by DirectLFQ.
- `mean`: Mean normalization.
- `sum`: Sum normalization.
- `minmax`: Min-max normalization.
@@ -656,11 +662,7 @@ def data_preprocess(self, df: pd.DataFrame, normalize_method: str|None = None,
- `taxa_func`
- `protein`
- `custom`
- - `peptide_num_threshold` (`dict`, optional):
- The threshold for the number of peptides in each DataFrame. Default values are:
- - `taxa`: 3
- - `func`: 3
- - `taxa_func`: 3
+
### Returns:
@@ -671,12 +673,7 @@ def data_preprocess(self, df: pd.DataFrame, normalize_method: str|None = None,
df = df.copy()
- # remove items with peptide number less than threshold
- if df_name in ['taxa', 'func', 'taxa_func']:
- print(f'{df_name.upper()} number before removing: {df.shape[0]}')
- df = df[df['peptide_num'] >= peptide_num_threshold[df_name]]
- print(f'{df_name.upper()} number with peptide_num >= [{peptide_num_threshold[df_name]}]: {df.shape[0]}')
-
+
if processing_order is None:
processing_order = ['transform', 'normalize', 'batch']
else:
diff --git a/metax/taxafunc_analyzer/analyzer_utils/lfq.py b/metax/taxafunc_analyzer/analyzer_utils/lfq.py
index 05fa0a8..abe5dcc 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/lfq.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/lfq.py
@@ -2,7 +2,6 @@
import pandas as pd
import numpy as np
from numba import njit
-import multiprocessing
import os
# Setup logging
@@ -11,7 +10,7 @@
def setup_logging():
logging.basicConfig(
level=logging.INFO,
- format="LFQ: %(message)s",
+ format="%(asctime)s - %(levelname)s - %(message)s",
)
setup_logging()
@@ -451,7 +450,7 @@ def get_list_of_tuple_w_protein_profiles_and_shifted_peptides(
)
if num_cores is not None and num_cores > 1:
- # Use multiprocessing
+ import multiprocessing
pool = multiprocessing.Pool(num_cores)
args = [
(
@@ -644,12 +643,53 @@ def get_ion_intensity_dataframe_from_list_of_shifted_peptides(
return ion_df
+def is_numeric_matrix(df):
+ # mark non-numeric values as NaN
+ numeric_df = df.apply(pd.to_numeric, errors='coerce')
+ # check if nan values are present
+ return numeric_df.notna().all().all()
+
+
+def run_normalization(
+ input_df: pd.DataFrame,
+ number_of_quadratic_samples: int = 100
+ ):
+ '''
+ Normalize the input DataFrame.
+ Args:
+ input_df (pd.DataFrame): A matrix of intensities.Columns are samples, index is items to be normalized.
+ number_of_quadratic_samples (int, optional): How many samples are used to create the anchor intensity trace. Increasing might marginally increase performance at the cost of runtime
+ Returns:
+ pd.DataFrame: The normalized DataFrame.
+ '''
+ # chcek if only numbers are in the dataframe
+ if not is_numeric_matrix(input_df):
+ raise ValueError("Input DataFrame contains non-numeric values. Make sure to the items column is set as index.")
+
+ copy_numpy_arrays = check_whether_to_copy_numpy_arrays_derived_from_pandas()
+ input_df = np.log2(input_df.replace(0, np.nan)) # type: ignore
+ input_df = input_df.dropna(axis=0, how="all")
+
+ LOGGER.info("Performing sample normalization.")
+ input_df = NormalizationManagerSamplesOnSelectedProteins(
+ input_df,
+ num_samples_quadratic=number_of_quadratic_samples,
+ selected_proteins_file=None,
+ copy_numpy_arrays=copy_numpy_arrays,
+ ).complete_dataframe
+ # restore log2 values
+ input_df = 2 ** input_df
+ # fill NaNs with 0
+ input_df = input_df.fillna(0)
+
+ return input_df
+
def run_lfq(
input_df,
protein_id: str = "protein",
quant_id: str = "ion",
min_nonan: int = 1,
- number_of_quadratic_samples: int = 50,
+ number_of_quadratic_samples: int = 100,
maximum_number_of_quadratic_ions_to_use_per_protein: int = 10,
log_processed_proteins: bool = True,
compile_normalized_ion_table: bool = True,
@@ -699,17 +739,22 @@ def run_lfq(
df_path = os.path.join(current_dir, "../../../local_tests/peptide_for_protein.tsv")
df = pd.read_csv(df_path, sep="\t")
+ # protein_df = df.drop(columns=["Proteins"])
+ # protein_df.set_index("Sequence", inplace=True)
+ # print(protein_df.head())
+ # df1 = run_normalization(protein_df)
protein_df, ion_df = run_lfq(
df,
protein_id="Proteins",
quant_id="Sequence",
min_nonan=1,
- number_of_quadratic_samples=50,
+ number_of_quadratic_samples=500,
maximum_number_of_quadratic_ions_to_use_per_protein=10,
num_cores=None,
use_multiprocessing=True
)
+
print(protein_df.shape)
print(protein_df.head())
t2 = time.time()
diff --git a/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py b/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py
index 7b20bda..5caf2d1 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/razor_sum.py
@@ -130,7 +130,8 @@ def get_mini_target_set(self, greedy_method='heap'):
self.greedy_method = greedy_method
print('Start to get minimum target set using method: [razor]')
# only extract the peptide and target columns
- extract_cols = [self.column_map['peptide'], self.column_map['target']] + self.column_map['sample_list'] if self.column_map['sample_list'] else []
+ extract_cols = [self.column_map['peptide'], self.column_map['target']]
+ extract_cols = extract_cols + self.column_map['sample_list'] if self.column_map['sample_list'] else extract_cols
# if NA in target column, or '', raise error
if self.df[self.column_map['target']].isna().any() or '' in self.df[self.column_map['target']].values:
raise ValueError(f'NA or empty value in target column: {self.column_map["target"]}')
diff --git a/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py b/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py
index 17209aa..d6c0e26 100644
--- a/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py
+++ b/metax/taxafunc_analyzer/analyzer_utils/sum_protein_intensity.py
@@ -69,7 +69,7 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
greedy_method: str, default 'heap'. only used for `razor` method
options: ['greedy', 'heap']
peptide_num_threshold: int, default None
- the protein must have at least 3 peptides to be considered as a target
+ the protein must have at least number peptides to be considered as a target
quant_method: str, default 'sum'
options: ['sum', 'lfq']
'''
@@ -82,23 +82,10 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
if peptide_num_threshold is not None:
self.peptide_num_threshold = peptide_num_threshold
- # remove the protein with less than the threshold of peptides
- # use teh methood in RazorSum
- razor_integrator = RazorSum(df=self.df,
- column_map={
- 'peptide': self.tfa.peptide_col_name,
- 'target': self.tfa.protein_col_name,
- 'sample_list': self.tfa.sample_list,
- },
- peptide_num_threshold=self.peptide_num_threshold,
- share_intensity=self.share_intensity,
- greedy_method=greedy_method,
- protein_separator= self.protein_separator)
-
self.rank_method = rank_method
self.check_protein_col()
- self.df = razor_integrator.remove_protein_less_than_threshold()
+ #innitialize the peptide number used as the total number of peptides
self.tfa.peptide_num_used['protein'] = len(self.df)
if method == 'rank':
@@ -121,8 +108,19 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
self._sum_protein_rank(sample, by_sample)
elif method == 'razor':
print('start to sum protein intensity using method: [razor]')
+ # use teh methood in RazorSum
+ razor_integrator = RazorSum(df=self.df,
+ column_map={
+ 'peptide': self.tfa.peptide_col_name,
+ 'target': self.tfa.protein_col_name,
+ 'sample_list': self.tfa.sample_list,
+ },
+ peptide_num_threshold=self.peptide_num_threshold,
+ share_intensity=self.share_intensity,
+ greedy_method=greedy_method,
+ protein_separator= self.protein_separator)
if quant_method == 'sum':
- razor_integrator.peptide_num_threshold = 1 # set the threshold to 1, to avoid run filter again
+ # razor_integrator.peptide_num_threshold = 1 # set the threshold to 1, to avoid run filter again
res_df = razor_integrator.sum_protein_intensity(greedy_method=greedy_method)
elif quant_method == 'lfq':
from .lfq import run_lfq
@@ -137,10 +135,13 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
# move teh 2 columns to the front
res_df = res_df[['peptides', 'peptide_num'] + [col for col in res_df.columns if col not in ['peptides', 'peptide_num']]]
+ self.tfa.peptide_num_used['protein'] = len(razor_integrator.df)
return res_df
elif method == 'anti-razor':
print(f"\n-------------Start to sum protein intensity using method: [{method}] by_sample: [True] rank_method: [Shared]-------------")
+ #calculate the peptide number for each protein
+ self.filter_protein_by_peptide_num()
for sample in self.tfa.sample_list:
self._sum_protein_anti_razor(sample)
@@ -161,6 +162,30 @@ def sum_protein_intensity(self, method='razor', by_sample=False, rank_method='un
return res_df
+
+ def filter_protein_by_peptide_num(self):
+ if self.peptide_num_threshold < 2:
+ return self.df
+ else:
+ peptide_col_name = self.tfa.peptide_col_name
+ protein_col_name = self.tfa.protein_col_name
+ df= self.df.copy()
+ target_to_peptides = defaultdict(set)
+ for _, row in tqdm(df.iterrows(), total=df.shape[0], desc="Creating target to peptides mapping"):
+ sequence = row[peptide_col_name]
+ targets = row[protein_col_name].split(self.protein_separator)
+ for target in targets:
+ target_to_peptides[target].add(sequence)
+ proteins_less_than_threshold = [target for target, peps in target_to_peptides.items() if len(peps) < self.peptide_num_threshold]
+ print(f'Number of proteins with less than {self.peptide_num_threshold} peptides: {len(proteins_less_than_threshold)}')
+ # remove the proteins with less than 3 peptides from the protein column of the df
+ df[protein_col_name] = df[protein_col_name].apply(lambda x: ';'.join([protein for protein in x.split(self.protein_separator) if protein not in proteins_less_than_threshold]))
+ self.df[protein_col_name] = df[protein_col_name]
+ # remove the row with empty protein
+ self.df = self.df[self.df[protein_col_name].str.strip() != '']
+ self.tfa.peptide_num_used['protein'] = len(self.df)
+ return self.df
+
# razor method
def find_minimum_protein_set(self, peptides, protein_to_peptides):
protein_to_peptides_copy = protein_to_peptides.copy()
diff --git a/metax/utils/version.py b/metax/utils/version.py
index cfa61a3..b0d8e7d 100644
--- a/metax/utils/version.py
+++ b/metax/utils/version.py
@@ -1,2 +1,2 @@
-__version__ = '1.115.1'
+__version__ = '1.115.4'
API_version = '3'
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 8d4e3df..bb5194f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "MetaXTools"
-version = "1.115.1"
+version = "1.115.4"
description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics."
readme = "README_PyPi.md"
license = { text = "NorthOmics" }