From 034b09fb3ed677fa71d58382bc43e1dd3cf9a9b9 Mon Sep 17 00:00:00 2001 From: Qing <44231502+byemaxx@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:34:08 -0400 Subject: [PATCH 1/2] - New: 1. Added Advanced Parameters for Peptide Annotator 2. Add [Fill Zero] method for handling missing values in the data preprossing part. --- Docs/ChangeLog.md | 6 + metax/gui/main_gui.py | 20 +- metax/gui/metax_gui/main_window.ui | 524 ++++++++++++------ metax/gui/metax_gui/ui_main_window.py | 153 +++-- metax/peptide_annotator/peptable_annotator.py | 72 ++- metax/taxafunc_analyzer/analyzer.py | 9 +- .../analyzer_utils/data_preprocessing.py | 6 + metax/utils/version.py | 2 +- pyproject.toml | 2 +- 9 files changed, 566 insertions(+), 228 deletions(-) diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md index d33e688..c7897c3 100644 --- a/Docs/ChangeLog.md +++ b/Docs/ChangeLog.md @@ -1,3 +1,9 @@ +# Version: 1.116.0 +## Date: 2024-10-18 +### Changes: +- New: 1. Added Advanced Parameters for Peptide Annotator 2. Add [Fill Zero] method for handling missing values in the data preprossing part. + + # Version: 1.115.5 ## Date: 2024-10-16 ### Changes: diff --git a/metax/gui/main_gui.py b/metax/gui/main_gui.py index 7230595..5feea8c 100644 --- a/metax/gui/main_gui.py +++ b/metax/gui/main_gui.py @@ -796,7 +796,8 @@ def hide_plot_setting_groupbox(self): groupbox_list = ["groupBox_basic_plot", "groupBox_basic_heatmap_plot_settings", "groupBox_cross_heatmap_settings", "groupBox_deseq2_plot_settings", "groupBox_co_expression_plot_settings", "groupBox_expression_trends_plot_settings", - "groupBox_taxa_func_link_plot_settings", "groupBox_taxa_func_link_net_plot_settings" + "groupBox_taxa_func_link_plot_settings", "groupBox_taxa_func_link_net_plot_settings", + "groupBox_peptide_annotator_settings" ] for groupbox_name in groupbox_list: groupbox = getattr(self, groupbox_name) @@ -2049,6 +2050,14 @@ def run_peptide2taxafunc(self): final_peptide_path = f'''{self.lineEdit_final_peptide_path.text()}''' peptide2taxafunc_outpath = f'''{self.lineEdit_peptide2taxafunc_outpath.text()}''' threshold = float(self.doubleSpinBox_LCA_threshold.value()) + genome_mode = self.checkBox_annotator_genome_mode.isChecked() + protein_separator = self.lineEdit_annotator_protein_separator.text() + protein_genome_separator = self.lineEdit_annotator_genome_separator.text() + peptide_col = self.lineEdit_annotator_peptide_col_name.text() + protein_col = self.lineEdit_annotator_protein_col_name.text() + sample_col_prefix = self.lineEdit_annotator_sample_col_prefix.text() + distinct_genome_threshold = self.spinBox_annotator_distinct_num_threshold.value() + exclude_protein_contains = self.lineEdit_annotator_exclude_protein_contains.text() if db_path == '': QMessageBox.warning(self.MainWindow, 'Warning', 'Please select database!') @@ -2065,6 +2074,15 @@ def peptide2taxafunc_main_wrapper(): peptide_path = final_peptide_path, output_path = peptide2taxafunc_outpath, threshold=threshold, + genome_mode=genome_mode, + protein_separator=protein_separator, + protein_genome_separator=protein_genome_separator, + protein_col=protein_col, + peptide_col=peptide_col, + sample_col_prefix=sample_col_prefix, + distinct_genome_threshold=distinct_genome_threshold, + exclude_protein_contains = exclude_protein_contains + ) return instance.run_annotate() self.run_in_new_window(peptide2taxafunc_main_wrapper, show_msg=True) diff --git a/metax/gui/metax_gui/main_window.ui b/metax/gui/metax_gui/main_window.ui index d780368..81c7523 100644 --- a/metax/gui/metax_gui/main_window.ui +++ b/metax/gui/metax_gui/main_window.ui @@ -245,8 +245,8 @@ 0 0 - 391 - 80 + 528 + 534 @@ -726,6 +726,11 @@ multiple + + + FillZero + + @@ -907,6 +912,11 @@ regression + + + FillZero + + @@ -2779,7 +2789,7 @@ 0 0 - 999 + 621 150 @@ -7457,8 +7467,8 @@ 0 0 - 620 - 65 + 1016 + 105 @@ -9278,8 +9288,8 @@ 0 0 - 383 - 68 + 1016 + 141 @@ -9502,16 +9512,13 @@ - 1 + 0 MAG - - - @@ -9519,34 +9526,174 @@ + + + + Open + + + + + + + ? + + + - - + + - Database + ? - - + + - LCA Threshold + GO - - - - Open + + + + Annotating Settings + + + + + + + Proteins + + + + + + + Peptide Column Name + + + + + + + Protein Separator + + + + + + + Genome Separator in Protein ID + + + + + + + Proteins Group Column Name + + + + + + + Sequence + + + + + + + The separator between proteins in protein groups, e.g. ";" in MGYG000003683_00301;MGYG000000756_01431;MGYG000001490_01143 + + + ; + + + + + + + The separator in protein ID to split the genome ID. e.g. "_" in MGYG000003683_00301 + + + _ + + + + + + + Prefix of Intensity Column + + + + + + + e.g. "Intensity" in Intensity_V2_05, Intensity_V2_06 + + + Intensity + + + + + + + Filter Genome with Distinct peptide Number + + + + + + + 9999 + + + + + + + Staring LCA level from Genome + + + true + + + + + + + Exclude Protein with + + + + + + + Remove the peptides which annoate to exclude proteins + + + REV_ + + + + + + - - + + - ? + LCA Threshold @@ -9557,17 +9704,17 @@ - - + + - Open + Database - - + + - ? + Open @@ -9597,6 +9744,9 @@ + + + @@ -9604,10 +9754,10 @@ - - + + - GO + Show Advanced Settings @@ -9635,8 +9785,8 @@ 0 0 - 313 - 41 + 1044 + 493 @@ -10273,7 +10423,7 @@ 0 0 1122 - 23 + 21 @@ -10406,12 +10556,12 @@ setEnabled(bool) - 100 - 81 + 102 + 95 - 109 - 81 + 111 + 95 @@ -10422,12 +10572,12 @@ setEnabled(bool) - 100 - 81 + 102 + 95 - 117 - 82 + 119 + 96 @@ -10438,12 +10588,12 @@ setEnabled(bool) - 61 - 85 + 63 + 99 - 68 - 85 + 70 + 99 @@ -10454,12 +10604,12 @@ setEnabled(bool) - 61 - 85 + 63 + 99 - 76 - 87 + 78 + 101 @@ -10470,12 +10620,12 @@ setEnabled(bool) - 61 - 86 + 161 + 151 - 68 - 86 + 255 + 153 @@ -10486,12 +10636,12 @@ setEnabled(bool) - 61 - 86 + 161 + 151 - 75 - 87 + 421 + 153 @@ -10502,12 +10652,12 @@ setEnabled(bool) - 94 - 80 + 96 + 94 - 106 - 80 + 108 + 94 @@ -10518,12 +10668,12 @@ setEnabled(bool) - 94 - 80 + 96 + 94 - 117 - 81 + 119 + 95 @@ -10534,12 +10684,12 @@ setEnabled(bool) - 94 - 79 + 96 + 93 - 106 - 79 + 108 + 93 @@ -10550,12 +10700,12 @@ setEnabled(bool) - 94 - 79 + 96 + 93 - 117 - 80 + 119 + 94 @@ -10566,12 +10716,12 @@ setEnabled(bool) - 61 - 86 + 63 + 100 - 68 - 86 + 70 + 100 @@ -10582,12 +10732,12 @@ setEnabled(bool) - 61 - 86 + 63 + 100 - 75 - 87 + 77 + 101 @@ -10598,12 +10748,12 @@ setEnabled(bool) - 85 - 91 + 168 + 181 - 97 - 91 + 267 + 182 @@ -10614,12 +10764,12 @@ setEnabled(bool) - 93 - 81 + 95 + 95 - 106 - 81 + 108 + 95 @@ -10630,12 +10780,12 @@ setEnabled(bool) - 93 - 81 + 95 + 95 - 117 - 82 + 119 + 96 @@ -10646,12 +10796,12 @@ setEnabled(bool) - 94 - 80 + 562 + 173 - 106 - 80 + 770 + 174 @@ -10662,12 +10812,12 @@ setEnabled(bool) - 94 - 80 + 562 + 173 - 117 - 81 + 978 + 174 @@ -10678,12 +10828,12 @@ setEnabled(bool) - 698 - 176 + 1056 + 503 - 311 - 213 + 335 + 540 @@ -10694,12 +10844,12 @@ setEnabled(bool) - 698 - 176 + 1056 + 503 - 311 - 342 + 335 + 669 @@ -10710,12 +10860,12 @@ setEnabled(bool) - 698 - 176 + 1056 + 503 - 602 - 215 + 845 + 542 @@ -10726,12 +10876,12 @@ setEnabled(bool) - 85 - 91 + 168 + 181 - 108 - 92 + 478 + 182 @@ -10742,12 +10892,12 @@ setEnabled(bool) - 328 - 152 + 669 + 585 - 421 - 152 + 1012 + 585 @@ -10762,8 +10912,8 @@ 180 - 74 - 81 + 76 + 95 @@ -10774,12 +10924,12 @@ setEnabled(bool) - 61 - 85 + 63 + 99 - 68 - 85 + 70 + 99 @@ -10790,12 +10940,12 @@ setEnabled(bool) - 61 - 85 + 63 + 99 - 225 - 86 + 227 + 100 @@ -10806,12 +10956,12 @@ setEnabled(bool) - 61 - 85 + 147 + 151 - 71 - 85 + 244 + 153 @@ -10822,12 +10972,12 @@ setEnabled(bool) - 61 - 85 + 147 + 151 - 80 - 86 + 447 + 153 @@ -10838,12 +10988,12 @@ setVisible(bool) - 61 - 93 + 74 + 417 - 119 - 96 + 132 + 477 @@ -10854,12 +11004,12 @@ setVisible(bool) - 75 - 98 + 77 + 112 - 117 - 100 + 121 + 114 @@ -10870,12 +11020,12 @@ setVisible(bool) - 66 - 108 + 88 + 520 - 100 - 97 + 121 + 601 @@ -10886,12 +11036,12 @@ setVisible(bool) - 75 - 120 + 77 + 134 - 109 - 100 + 111 + 114 @@ -10902,12 +11052,12 @@ setVisible(bool) - 75 - 98 + 77 + 112 - 119 - 100 + 121 + 114 @@ -10918,12 +11068,12 @@ setVisible(bool) - 50 - 106 + 64 + 543 - 119 - 101 + 132 + 604 @@ -10934,12 +11084,12 @@ setVisible(bool) - 75 - 103 + 77 + 117 - 119 - 100 + 121 + 114 @@ -10950,12 +11100,12 @@ setVisible(bool) - 53 - 98 + 66 + 491 - 119 - 100 + 132 + 568 @@ -10967,11 +11117,11 @@ 915 - 142 + 141 971 - 143 + 142 @@ -10983,7 +11133,7 @@ 915 - 142 + 141 1044 @@ -11002,8 +11152,24 @@ 153 - 96 - 81 + 98 + 95 + + + + + checkBox_show_advanced_annotator_settings + toggled(bool) + groupBox_peptide_annotator_settings + setVisible(bool) + + + 79 + 234 + + + 101 + 267 diff --git a/metax/gui/metax_gui/ui_main_window.py b/metax/gui/metax_gui/ui_main_window.py index 1362bb2..64b4e2f 100644 --- a/metax/gui/metax_gui/ui_main_window.py +++ b/metax/gui/metax_gui/ui_main_window.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Form implementation generated from reading ui file 'c:\Users\max\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\metax\gui\metax_gui\main_window.ui' +# Form implementation generated from reading ui file 'c:\Users\Qing\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\metax\gui\metax_gui\main_window.ui' # # Created by: PyQt5 UI code generator 5.15.9 # @@ -147,7 +147,7 @@ def setupUi(self, metaX_main): self.toolBox_2.setMaximumSize(QtCore.QSize(1677, 16777215)) self.toolBox_2.setObjectName("toolBox_2") self.page_2 = QtWidgets.QWidget() - self.page_2.setGeometry(QtCore.QRect(0, 0, 391, 80)) + self.page_2.setGeometry(QtCore.QRect(0, 0, 528, 534)) self.page_2.setObjectName("page_2") self.gridLayout_27 = QtWidgets.QGridLayout(self.page_2) self.gridLayout_27.setObjectName("gridLayout_27") @@ -397,6 +397,7 @@ def setupUi(self, metaX_main): self.comboBox_outlier_handling_method1.addItem("") self.comboBox_outlier_handling_method1.addItem("") self.comboBox_outlier_handling_method1.addItem("") + self.comboBox_outlier_handling_method1.addItem("") self.horizontalLayout_11.addWidget(self.comboBox_outlier_handling_method1) self.gridLayout_15.addLayout(self.horizontalLayout_11, 4, 2, 1, 1) self.comboBox_outlier_detection = QtWidgets.QComboBox(self.tab_set_taxa_func) @@ -463,6 +464,7 @@ def setupUi(self, metaX_main): self.comboBox_outlier_handling_method2.addItem("") self.comboBox_outlier_handling_method2.addItem("") self.comboBox_outlier_handling_method2.addItem("") + self.comboBox_outlier_handling_method2.addItem("") self.horizontalLayout_8.addWidget(self.comboBox_outlier_handling_method2) self.gridLayout_15.addLayout(self.horizontalLayout_8, 4, 3, 1, 1) self.horizontalLayout_27 = QtWidgets.QHBoxLayout() @@ -1409,7 +1411,7 @@ def setupUi(self, metaX_main): self.scrollArea_2.setWidgetResizable(True) self.scrollArea_2.setObjectName("scrollArea_2") self.scrollAreaWidgetContents_2 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 999, 150)) + self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 621, 150)) self.scrollAreaWidgetContents_2.setObjectName("scrollAreaWidgetContents_2") self.gridLayout_50 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_2) self.gridLayout_50.setObjectName("gridLayout_50") @@ -3838,7 +3840,7 @@ def setupUi(self, metaX_main): self.scrollArea_5.setWidgetResizable(True) self.scrollArea_5.setObjectName("scrollArea_5") self.scrollAreaWidgetContents_6 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 620, 65)) + self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 1016, 105)) self.scrollAreaWidgetContents_6.setObjectName("scrollAreaWidgetContents_6") self.gridLayout_57 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_6) self.gridLayout_57.setObjectName("gridLayout_57") @@ -4847,7 +4849,7 @@ def setupUi(self, metaX_main): self.scrollArea_7.setWidgetResizable(True) self.scrollArea_7.setObjectName("scrollArea_7") self.scrollAreaWidgetContents_8 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 383, 68)) + self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 1016, 141)) self.scrollAreaWidgetContents_8.setObjectName("scrollAreaWidgetContents_8") self.gridLayout_66 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_8) self.gridLayout_66.setObjectName("gridLayout_66") @@ -4976,36 +4978,91 @@ def setupUi(self, metaX_main): self.tab_17.setObjectName("tab_17") self.gridLayout_42 = QtWidgets.QGridLayout(self.tab_17) self.gridLayout_42.setObjectName("gridLayout_42") - self.lineEdit_final_peptide_path = QtWidgets.QLineEdit(self.tab_17) - self.lineEdit_final_peptide_path.setObjectName("lineEdit_final_peptide_path") - self.gridLayout_42.addWidget(self.lineEdit_final_peptide_path, 1, 2, 1, 1) self.toolButton_db_path_help = QtWidgets.QToolButton(self.tab_17) self.toolButton_db_path_help.setObjectName("toolButton_db_path_help") self.gridLayout_42.addWidget(self.toolButton_db_path_help, 0, 1, 1, 1) + self.pushButton_get_final_peptide_path = QtWidgets.QPushButton(self.tab_17) + self.pushButton_get_final_peptide_path.setObjectName("pushButton_get_final_peptide_path") + self.gridLayout_42.addWidget(self.pushButton_get_final_peptide_path, 1, 3, 1, 1) + self.toolButton__final_peptide_help = QtWidgets.QToolButton(self.tab_17) + self.toolButton__final_peptide_help.setObjectName("toolButton__final_peptide_help") + self.gridLayout_42.addWidget(self.toolButton__final_peptide_help, 1, 1, 1, 1) self.lineEdit_peptide2taxafunc_outpath = QtWidgets.QLineEdit(self.tab_17) self.lineEdit_peptide2taxafunc_outpath.setObjectName("lineEdit_peptide2taxafunc_outpath") self.gridLayout_42.addWidget(self.lineEdit_peptide2taxafunc_outpath, 2, 2, 1, 1) - self.label_5 = QtWidgets.QLabel(self.tab_17) - self.label_5.setObjectName("label_5") - self.gridLayout_42.addWidget(self.label_5, 0, 0, 1, 1) - self.label_8 = QtWidgets.QLabel(self.tab_17) - self.label_8.setObjectName("label_8") - self.gridLayout_42.addWidget(self.label_8, 3, 0, 1, 1) - self.pushButton_get_db_path = QtWidgets.QPushButton(self.tab_17) - self.pushButton_get_db_path.setObjectName("pushButton_get_db_path") - self.gridLayout_42.addWidget(self.pushButton_get_db_path, 0, 3, 1, 1) self.toolButton_lca_threshould_help = QtWidgets.QToolButton(self.tab_17) self.toolButton_lca_threshould_help.setObjectName("toolButton_lca_threshould_help") self.gridLayout_42.addWidget(self.toolButton_lca_threshould_help, 3, 1, 1, 1) + self.pushButton_run_peptide2taxafunc = QtWidgets.QPushButton(self.tab_17) + self.pushButton_run_peptide2taxafunc.setObjectName("pushButton_run_peptide2taxafunc") + self.gridLayout_42.addWidget(self.pushButton_run_peptide2taxafunc, 6, 0, 1, 4) + self.groupBox_peptide_annotator_settings = QtWidgets.QGroupBox(self.tab_17) + self.groupBox_peptide_annotator_settings.setObjectName("groupBox_peptide_annotator_settings") + self.gridLayout_71 = QtWidgets.QGridLayout(self.groupBox_peptide_annotator_settings) + self.gridLayout_71.setObjectName("gridLayout_71") + self.gridLayout_35 = QtWidgets.QGridLayout() + self.gridLayout_35.setObjectName("gridLayout_35") + self.lineEdit_annotator_protein_col_name = QtWidgets.QLineEdit(self.groupBox_peptide_annotator_settings) + self.lineEdit_annotator_protein_col_name.setObjectName("lineEdit_annotator_protein_col_name") + self.gridLayout_35.addWidget(self.lineEdit_annotator_protein_col_name, 2, 3, 1, 1) + self.label_200 = QtWidgets.QLabel(self.groupBox_peptide_annotator_settings) + self.label_200.setObjectName("label_200") + self.gridLayout_35.addWidget(self.label_200, 2, 0, 1, 1) + self.label_199 = QtWidgets.QLabel(self.groupBox_peptide_annotator_settings) + self.label_199.setObjectName("label_199") + self.gridLayout_35.addWidget(self.label_199, 1, 0, 1, 1) + self.label_201 = QtWidgets.QLabel(self.groupBox_peptide_annotator_settings) + self.label_201.setObjectName("label_201") + self.gridLayout_35.addWidget(self.label_201, 1, 2, 1, 1) + self.label_202 = QtWidgets.QLabel(self.groupBox_peptide_annotator_settings) + self.label_202.setObjectName("label_202") + self.gridLayout_35.addWidget(self.label_202, 2, 2, 1, 1) + self.lineEdit_annotator_peptide_col_name = QtWidgets.QLineEdit(self.groupBox_peptide_annotator_settings) + self.lineEdit_annotator_peptide_col_name.setObjectName("lineEdit_annotator_peptide_col_name") + self.gridLayout_35.addWidget(self.lineEdit_annotator_peptide_col_name, 2, 1, 1, 1) + self.lineEdit_annotator_protein_separator = QtWidgets.QLineEdit(self.groupBox_peptide_annotator_settings) + self.lineEdit_annotator_protein_separator.setObjectName("lineEdit_annotator_protein_separator") + self.gridLayout_35.addWidget(self.lineEdit_annotator_protein_separator, 1, 1, 1, 1) + self.lineEdit_annotator_genome_separator = QtWidgets.QLineEdit(self.groupBox_peptide_annotator_settings) + self.lineEdit_annotator_genome_separator.setObjectName("lineEdit_annotator_genome_separator") + self.gridLayout_35.addWidget(self.lineEdit_annotator_genome_separator, 1, 3, 1, 1) + self.label_203 = QtWidgets.QLabel(self.groupBox_peptide_annotator_settings) + self.label_203.setObjectName("label_203") + self.gridLayout_35.addWidget(self.label_203, 3, 0, 1, 1) + self.lineEdit_annotator_sample_col_prefix = QtWidgets.QLineEdit(self.groupBox_peptide_annotator_settings) + self.lineEdit_annotator_sample_col_prefix.setObjectName("lineEdit_annotator_sample_col_prefix") + self.gridLayout_35.addWidget(self.lineEdit_annotator_sample_col_prefix, 3, 1, 1, 1) + self.label_204 = QtWidgets.QLabel(self.groupBox_peptide_annotator_settings) + self.label_204.setObjectName("label_204") + self.gridLayout_35.addWidget(self.label_204, 0, 0, 1, 1) + self.spinBox_annotator_distinct_num_threshold = QtWidgets.QSpinBox(self.groupBox_peptide_annotator_settings) + self.spinBox_annotator_distinct_num_threshold.setMaximum(9999) + self.spinBox_annotator_distinct_num_threshold.setObjectName("spinBox_annotator_distinct_num_threshold") + self.gridLayout_35.addWidget(self.spinBox_annotator_distinct_num_threshold, 0, 1, 1, 1) + self.checkBox_annotator_genome_mode = QtWidgets.QCheckBox(self.groupBox_peptide_annotator_settings) + self.checkBox_annotator_genome_mode.setChecked(True) + self.checkBox_annotator_genome_mode.setObjectName("checkBox_annotator_genome_mode") + self.gridLayout_35.addWidget(self.checkBox_annotator_genome_mode, 0, 3, 1, 1) + self.label_205 = QtWidgets.QLabel(self.groupBox_peptide_annotator_settings) + self.label_205.setObjectName("label_205") + self.gridLayout_35.addWidget(self.label_205, 3, 2, 1, 1) + self.lineEdit_annotator_exclude_protein_contains = QtWidgets.QLineEdit(self.groupBox_peptide_annotator_settings) + self.lineEdit_annotator_exclude_protein_contains.setObjectName("lineEdit_annotator_exclude_protein_contains") + self.gridLayout_35.addWidget(self.lineEdit_annotator_exclude_protein_contains, 3, 3, 1, 1) + self.gridLayout_71.addLayout(self.gridLayout_35, 0, 0, 1, 1) + self.gridLayout_42.addWidget(self.groupBox_peptide_annotator_settings, 5, 0, 1, 4) + self.label_8 = QtWidgets.QLabel(self.tab_17) + self.label_8.setObjectName("label_8") + self.gridLayout_42.addWidget(self.label_8, 3, 0, 1, 1) self.label_6 = QtWidgets.QLabel(self.tab_17) self.label_6.setObjectName("label_6") self.gridLayout_42.addWidget(self.label_6, 1, 0, 1, 1) - self.pushButton_get_final_peptide_path = QtWidgets.QPushButton(self.tab_17) - self.pushButton_get_final_peptide_path.setObjectName("pushButton_get_final_peptide_path") - self.gridLayout_42.addWidget(self.pushButton_get_final_peptide_path, 1, 3, 1, 1) - self.toolButton__final_peptide_help = QtWidgets.QToolButton(self.tab_17) - self.toolButton__final_peptide_help.setObjectName("toolButton__final_peptide_help") - self.gridLayout_42.addWidget(self.toolButton__final_peptide_help, 1, 1, 1, 1) + self.label_5 = QtWidgets.QLabel(self.tab_17) + self.label_5.setObjectName("label_5") + self.gridLayout_42.addWidget(self.label_5, 0, 0, 1, 1) + self.pushButton_get_db_path = QtWidgets.QPushButton(self.tab_17) + self.pushButton_get_db_path.setObjectName("pushButton_get_db_path") + self.gridLayout_42.addWidget(self.pushButton_get_db_path, 0, 3, 1, 1) self.lineEdit_db_path = QtWidgets.QLineEdit(self.tab_17) self.lineEdit_db_path.setObjectName("lineEdit_db_path") self.gridLayout_42.addWidget(self.lineEdit_db_path, 0, 2, 1, 1) @@ -5019,12 +5076,15 @@ def setupUi(self, metaX_main): self.doubleSpinBox_LCA_threshold.setProperty("value", 1.0) self.doubleSpinBox_LCA_threshold.setObjectName("doubleSpinBox_LCA_threshold") self.gridLayout_42.addWidget(self.doubleSpinBox_LCA_threshold, 3, 2, 1, 2) + self.lineEdit_final_peptide_path = QtWidgets.QLineEdit(self.tab_17) + self.lineEdit_final_peptide_path.setObjectName("lineEdit_final_peptide_path") + self.gridLayout_42.addWidget(self.lineEdit_final_peptide_path, 1, 2, 1, 1) self.label_7 = QtWidgets.QLabel(self.tab_17) self.label_7.setObjectName("label_7") self.gridLayout_42.addWidget(self.label_7, 2, 0, 1, 1) - self.pushButton_run_peptide2taxafunc = QtWidgets.QPushButton(self.tab_17) - self.pushButton_run_peptide2taxafunc.setObjectName("pushButton_run_peptide2taxafunc") - self.gridLayout_42.addWidget(self.pushButton_run_peptide2taxafunc, 4, 0, 1, 4) + self.checkBox_show_advanced_annotator_settings = QtWidgets.QCheckBox(self.tab_17) + self.checkBox_show_advanced_annotator_settings.setObjectName("checkBox_show_advanced_annotator_settings") + self.gridLayout_42.addWidget(self.checkBox_show_advanced_annotator_settings, 4, 0, 1, 3) self.tabWidget_6.addTab(self.tab_17, "") self.tab_18 = QtWidgets.QWidget() self.tab_18.setObjectName("tab_18") @@ -5036,7 +5096,7 @@ def setupUi(self, metaX_main): self.toolBox_metalab_res_anno = QtWidgets.QToolBox(self.tab_18) self.toolBox_metalab_res_anno.setObjectName("toolBox_metalab_res_anno") self.page_3 = QtWidgets.QWidget() - self.page_3.setGeometry(QtCore.QRect(0, 0, 313, 41)) + self.page_3.setGeometry(QtCore.QRect(0, 0, 1044, 493)) self.page_3.setObjectName("page_3") self.gridLayout_45 = QtWidgets.QGridLayout(self.page_3) self.gridLayout_45.setObjectName("gridLayout_45") @@ -5358,7 +5418,7 @@ def setupUi(self, metaX_main): self.statusbar.setObjectName("statusbar") metaX_main.setStatusBar(self.statusbar) self.menuBar = QtWidgets.QMenuBar(metaX_main) - self.menuBar.setGeometry(QtCore.QRect(0, 0, 1122, 23)) + self.menuBar.setGeometry(QtCore.QRect(0, 0, 1122, 21)) self.menuBar.setObjectName("menuBar") self.menuTools = QtWidgets.QMenu(self.menuBar) self.menuTools.setObjectName("menuTools") @@ -5424,7 +5484,7 @@ def setupUi(self, metaX_main): self.tabWidget_3.setCurrentIndex(2) self.tabWidget.setCurrentIndex(1) self.tabWidget_2.setCurrentIndex(1) - self.tabWidget_6.setCurrentIndex(1) + self.tabWidget_6.setCurrentIndex(0) self.toolBox_metalab_res_anno.setCurrentIndex(0) self.tabWidget_5.setCurrentIndex(0) self.checkBox_deseq2_comparing_in_condition.clicked['bool'].connect(self.comboBox_deseq2_condition_meta.setEnabled) # type: ignore @@ -5465,6 +5525,7 @@ def setupUi(self, metaX_main): self.checkBox_set_taxa_func_split_func.clicked['bool'].connect(self.lineEdit_set_taxa_func_split_func_sep.setEnabled) # type: ignore self.checkBox_set_taxa_func_split_func.clicked['bool'].connect(self.checkBox_set_taxa_func_split_func_share_intensity.setEnabled) # type: ignore self.checkBox_tflink_plot_mean.clicked['bool'].connect(self.comboBox_tflink_sub_meta.setDisabled) # type: ignore + self.checkBox_show_advanced_annotator_settings.toggled['bool'].connect(self.groupBox_peptide_annotator_settings.setVisible) # type: ignore QtCore.QMetaObject.connectSlotsByName(metaX_main) metaX_main.setTabOrder(self.comboBox_taxa_level_to_stast, self.toolButton_meta_table_help) metaX_main.setTabOrder(self.toolButton_meta_table_help, self.comboBox_function_to_stast) @@ -5530,6 +5591,7 @@ def retranslateUi(self, metaX_main): self.comboBox_outlier_handling_method1.setItemText(4, _translate("metaX_main", "KNN")) self.comboBox_outlier_handling_method1.setItemText(5, _translate("metaX_main", "regression")) self.comboBox_outlier_handling_method1.setItemText(6, _translate("metaX_main", "multiple")) + self.comboBox_outlier_handling_method1.setItemText(7, _translate("metaX_main", "FillZero")) self.comboBox_outlier_detection.setItemText(0, _translate("metaX_main", "None")) self.comboBox_outlier_detection.setItemText(1, _translate("metaX_main", "Missing-Value")) self.comboBox_outlier_detection.setItemText(2, _translate("metaX_main", "IQR")) @@ -5555,6 +5617,7 @@ def retranslateUi(self, metaX_main): self.comboBox_outlier_handling_method2.setItemText(2, _translate("metaX_main", "KNN")) self.comboBox_outlier_handling_method2.setItemText(3, _translate("metaX_main", "multiple")) self.comboBox_outlier_handling_method2.setItemText(4, _translate("metaX_main", "regression")) + self.comboBox_outlier_handling_method2.setItemText(5, _translate("metaX_main", "FillZero")) self.label_102.setText(_translate("metaX_main", "Outliers Handling by")) self.comboBox_set_data_normalization.setItemText(0, _translate("metaX_main", "None")) self.comboBox_set_data_normalization.setItemText(1, _translate("metaX_main", "Trace Shifting")) @@ -6104,16 +6167,36 @@ def retranslateUi(self, metaX_main): self.pushButton_view_table.setText(_translate("metaX_main", "View Table")) self.tabWidget_TaxaFuncAnalyzer.setTabText(self.tabWidget_TaxaFuncAnalyzer.indexOf(self.tab_table_review), _translate("metaX_main", "Table Review")) self.toolButton_db_path_help.setText(_translate("metaX_main", "?")) - self.label_5.setText(_translate("metaX_main", "Database")) - self.label_8.setText(_translate("metaX_main", "LCA Threshold")) - self.pushButton_get_db_path.setText(_translate("metaX_main", "Open")) - self.toolButton_lca_threshould_help.setText(_translate("metaX_main", "?")) - self.label_6.setText(_translate("metaX_main", "Peptide Table")) self.pushButton_get_final_peptide_path.setText(_translate("metaX_main", "Open")) self.toolButton__final_peptide_help.setText(_translate("metaX_main", "?")) + self.toolButton_lca_threshould_help.setText(_translate("metaX_main", "?")) + self.pushButton_run_peptide2taxafunc.setText(_translate("metaX_main", "GO")) + self.groupBox_peptide_annotator_settings.setTitle(_translate("metaX_main", "Annotating Settings")) + self.lineEdit_annotator_protein_col_name.setText(_translate("metaX_main", "Proteins")) + self.label_200.setText(_translate("metaX_main", "Peptide Column Name")) + self.label_199.setText(_translate("metaX_main", "Protein Separator")) + self.label_201.setText(_translate("metaX_main", "Genome Separator in Protein ID")) + self.label_202.setText(_translate("metaX_main", "Proteins Group Column Name")) + self.lineEdit_annotator_peptide_col_name.setText(_translate("metaX_main", "Sequence")) + self.lineEdit_annotator_protein_separator.setToolTip(_translate("metaX_main", "The separator between proteins in protein groups, e.g. \";\" in MGYG000003683_00301;MGYG000000756_01431;MGYG000001490_01143")) + self.lineEdit_annotator_protein_separator.setText(_translate("metaX_main", ";")) + self.lineEdit_annotator_genome_separator.setToolTip(_translate("metaX_main", "The separator in protein ID to split the genome ID. e.g. \"_\" in MGYG000003683_00301")) + self.lineEdit_annotator_genome_separator.setText(_translate("metaX_main", "_")) + self.label_203.setText(_translate("metaX_main", "Prefix of Intensity Column")) + self.lineEdit_annotator_sample_col_prefix.setToolTip(_translate("metaX_main", "e.g. \"Intensity\" in Intensity_V2_05, Intensity_V2_06")) + self.lineEdit_annotator_sample_col_prefix.setText(_translate("metaX_main", "Intensity")) + self.label_204.setText(_translate("metaX_main", "Filter Genome with Distinct peptide Number")) + self.checkBox_annotator_genome_mode.setText(_translate("metaX_main", "Staring LCA level from Genome")) + self.label_205.setText(_translate("metaX_main", "Exclude Protein with")) + self.lineEdit_annotator_exclude_protein_contains.setToolTip(_translate("metaX_main", "Remove the peptides which annoate to exclude proteins")) + self.lineEdit_annotator_exclude_protein_contains.setText(_translate("metaX_main", "REV_")) + self.label_8.setText(_translate("metaX_main", "LCA Threshold")) + self.label_6.setText(_translate("metaX_main", "Peptide Table")) + self.label_5.setText(_translate("metaX_main", "Database")) + self.pushButton_get_db_path.setText(_translate("metaX_main", "Open")) self.pushButton_get_taxafunc_save_path.setText(_translate("metaX_main", "Open")) self.label_7.setText(_translate("metaX_main", "OTFs Save To")) - self.pushButton_run_peptide2taxafunc.setText(_translate("metaX_main", "GO")) + self.checkBox_show_advanced_annotator_settings.setText(_translate("metaX_main", "Show Advanced Settings")) self.tabWidget_6.setTabText(self.tabWidget_6.indexOf(self.tab_17), _translate("metaX_main", "MAG")) self.pushButton_run_metalab_maxq_annotate.setText(_translate("metaX_main", "GO")) self.pushButton_open_metalab_res_folder.setText(_translate("metaX_main", "Open")) diff --git a/metax/peptide_annotator/peptable_annotator.py b/metax/peptide_annotator/peptable_annotator.py index ba966b8..74aa93b 100644 --- a/metax/peptide_annotator/peptable_annotator.py +++ b/metax/peptide_annotator/peptable_annotator.py @@ -17,7 +17,8 @@ class PeptideAnnotator: def __init__(self, db_path:str, peptide_path: str, output_path: str, threshold=1.0, genome_mode=True, protein_separator=';', protein_genome_separator = '_', - protein_col='Proteins', peptide_col='Sequence', sample_col_prefix='Intensity_'): + protein_col='Proteins', peptide_col='Sequence', sample_col_prefix='Intensity_', + distinct_genome_threshold:int=0, exclude_protein_contains:str='REV_'): self.db_path = db_path self.peptide_path = peptide_path @@ -30,6 +31,8 @@ def __init__(self, db_path:str, peptide_path: str, output_path: str, self.protein_col = protein_col self.peptide_col = peptide_col self.sample_col_prefix = sample_col_prefix + self.distinct_genome_threshold = distinct_genome_threshold + self.exclude_protein_contains = exclude_protein_contains self.thread_local = threading.local() @@ -93,6 +96,7 @@ def add_additional_columns(self, df): def run_2_result(self, df): tqdm.pandas() df_t = df.copy() + df_t.rename(columns={self.peptide_col: 'Sequence'}, inplace=True) print('Running proteins_to_taxa_func...') with ThreadPoolExecutor() as executor: @@ -122,6 +126,9 @@ def run_2_result(self, df): def save_result(self, df): dir_path = os.path.dirname(self.output_path) + if dir_path == '': + dir_path = '.' + if not os.path.exists(dir_path): os.makedirs(dir_path) print(f'Output directory did not exist, created: {dir_path}') @@ -144,17 +151,56 @@ def save_result(self, df): print(f'Output shape: {df.shape}') - def remove_reversed(self, df): - print('Removing reversed proteins...') - print(f'Original shape: {df.shape}') + def exclude_proteins(self, df): + print(f'Removing reversed proteins containing [{self.exclude_protein_contains}]...') try: - df = df[~df[self.protein_col].str.contains('REV_')] + df = df[~df[self.protein_col].str.contains(self.exclude_protein_contains)] print(f'After removing reversed proteins: {df.shape}') except Exception as e: print('Error: removing reversed proteins failed!') print(e) return df + + def extract_genome_from_protein(self, protein:str): + pro_list = protein.split(self.protein_separator) + genome_list = [pro.split(self.protein_genome_separator)[0] for pro in pro_list] + genome = set(genome_list) + genome = ';'.join(genome) + return genome + + def get_genome_list_by_distinct_pep_num(self, df): + print('Calculating distinct peptides number for each genome...') + df_t = df[[self.peptide_col, self.protein_col]].copy() + df_t['genome'] = df_t[self.protein_col].apply(self.extract_genome_from_protein) + df_t['genome_count'] = df_t['genome'].apply(lambda x: len(x.split(';'))) + df_distinct = df_t.loc[df_t['genome_count'] == 1, ['genome', 'genome_count']] + df_distinct = df_distinct.groupby('genome').count().reset_index() + genome_list = df_distinct.loc[df_distinct['genome_count'] >= self.distinct_genome_threshold, 'genome'].tolist() + print(f'Total genomes: {df_distinct.shape[0]}, genomes with distinct peptides >= {self.distinct_genome_threshold}: [{len(genome_list)}]') + return genome_list + + def remove_proteins_not_in_genome_list(self, protein_str, genome_list): + pro_list = protein_str.split(self.protein_separator) + pro_list = [pro for pro in pro_list if pro.split(self.protein_genome_separator)[0] in genome_list] + return ';'.join(pro_list) + + def filter_genome_with_distinct_pep_num(self, df): + if self.distinct_genome_threshold < 1: + return df + + print(f'Filtering genomes less than [{self.distinct_genome_threshold}] distinct peptides...') + original_num = df.shape[0] + genome_list = self.get_genome_list_by_distinct_pep_num(df) + df[self.protein_col] = df[self.protein_col].apply(lambda x: self.remove_proteins_not_in_genome_list(x, genome_list)) + # remove rows with empty proteins + df = df[df[self.protein_col].str.len() > 0] + print(f'Peptides number: from [{original_num}] -> [{df.shape[0]}] after filtering genomes with distinct peptides') + return df + + + + def run_annotate(self): print('Start running Peptide Annotator...') @@ -177,7 +223,9 @@ def run_annotate(self): print(f'After filtering Intensity 0 in all samples and removing other columns: {df.shape}') - df = self.remove_reversed(df) + df = self.exclude_proteins(df) + + df = self.filter_genome_with_distinct_pep_num(df) df_res = self.run_2_result(df) @@ -186,8 +234,11 @@ def run_annotate(self): return df_res if __name__ == '__main__': - db_path = 'UHGP.db' - final_peptides_path = 'peptide.tsv' + current_path = os.path.dirname(os.path.abspath(__file__)) + # db_path = 'UHGP.db' + db_path = os.path.join(current_path, '../../local_tests/UHGP.db') + # final_peptides_path = 'peptide.tsv' + final_peptides_path = os.path.join(current_path, '../data/example_data/Example_final_peptide.tsv') output_path = 'OTF.tsv' threshold = 1 t0 = time.time() @@ -200,9 +251,10 @@ def run_annotate(self): genome_mode=True, protein_separator=';', protein_genome_separator = '_', - protein_col='final_proteins', + protein_col='Proteins', peptide_col='Sequence', - sample_col_prefix='CHFL' + sample_col_prefix='Intensity', + distinct_genome_threshold=3, ) annotator.run_annotate() diff --git a/metax/taxafunc_analyzer/analyzer.py b/metax/taxafunc_analyzer/analyzer.py index 540e421..62536a4 100644 --- a/metax/taxafunc_analyzer/analyzer.py +++ b/metax/taxafunc_analyzer/analyzer.py @@ -94,6 +94,7 @@ def __init__( self._set_original_df(df_path) self._set_meta(meta_path) + self._check_if_intensity_cols_numberic() self._remove_all_zero_row() self.get_func_list_in_df() # self.set_func('eggNOG_Description') @@ -167,7 +168,7 @@ def _set_meta(self, meta_path=None) -> None: check_result = self.check_meta_match_df() - if check_result[0] == False: + if not check_result[0]: raise ValueError(f"The meta data does not match the TaxaFunc data, Please check! \n\n{check_result[1]}") # check if there is NA in the original_df[self.sample_list] @@ -177,6 +178,12 @@ def _set_meta(self, meta_path=None) -> None: else: self.has_na_in_original_df = False + + def _check_if_intensity_cols_numberic(self): + if not self.original_df[self.sample_list].apply(pd.to_numeric, errors='coerce').notnull().all().all(): + raise ValueError("The sample columns must contain only numeric values!") + + def update_meta(self, meta_df: pd.DataFrame) -> None: self.meta_df = meta_df old_sample_list = self.sample_list diff --git a/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py b/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py index a7ab723..210e013 100644 --- a/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py +++ b/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py @@ -395,6 +395,7 @@ def _handle_missing_value(self, df: pd.DataFrame, method: str |None= 'drop+drop' - `regression`: Use regression imputation. - `multiple`: Use multiple imputation. - `original`: Keep the original data unchanged. + - `fillzero`: Fill missing values with 0. - **by_group** (`str`, optional): The column name for grouping samples during missing value handling. If not specified, the default group list from `tfa` is used. @@ -529,6 +530,10 @@ def impute_method(df, method, by_group): elif method == 'drop': print('NO HANDLING FOR MISSING VALUE, DROP ROWS WITH MISSING VALUE') df = df.dropna(subset=self.tfa.sample_list) + elif method == 'fillzero': + print('Fill NA with 0...') + df[self.tfa.sample_list] = df[self.tfa.sample_list].fillna(0) + else: raise ValueError(f'Invalid method: {method}') @@ -594,6 +599,7 @@ def detect_and_handle_outliers(self, df: pd.DataFrame, - `regression`: Regression imputation. - `multiple`: Multiple imputation. - `original`: Keep original data unchanged. + - `fillzero`: Fill with 0. - `detect_by_group` (`str`, optional): Column name for grouping samples for outlier detection. diff --git a/metax/utils/version.py b/metax/utils/version.py index db562c9..f4be0ce 100644 --- a/metax/utils/version.py +++ b/metax/utils/version.py @@ -1,2 +1,2 @@ -__version__ = '1.115.5' +__version__ = '1.116.0' API_version = '3' \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 5300e9c..3fa8034 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "MetaXTools" -version = "1.115.5" +version = "1.116.0" description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics." readme = "README_PyPi.md" license = { text = "NorthOmics" } From 0ddde2651753d4e12310a88e1d7806f0f4e68451 Mon Sep 17 00:00:00 2001 From: Qing <44231502+byemaxx@users.noreply.github.com> Date: Mon, 28 Oct 2024 16:06:32 -0400 Subject: [PATCH 2/2] - Fix: Fixed the bug of when plot the heatmap of taxa-funcs with t-ststistic and f-statistic, the value still selected as p-value. - Change: Updated the cookbook. --- Docs/ChangeLog.md | 7 +- Docs/MetaX_Cookbook.md | 270 +++++++++++++------------- metax/taxafunc_ploter/heatmap_plot.py | 13 +- metax/utils/version.py | 2 +- pyproject.toml | 2 +- 5 files changed, 152 insertions(+), 142 deletions(-) diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md index c7897c3..a565514 100644 --- a/Docs/ChangeLog.md +++ b/Docs/ChangeLog.md @@ -1,9 +1,14 @@ +# Version: 1.116.1 +## Date: 2024-10-28 +### Changes: +- Fix: Fixed the bug of when plot the heatmap of taxa-funcs with t-ststistic and f-statistic, the value still selected as p-value. +- Change: Updated the cookbook. + # Version: 1.116.0 ## Date: 2024-10-18 ### Changes: - New: 1. Added Advanced Parameters for Peptide Annotator 2. Add [Fill Zero] method for handling missing values in the data preprossing part. - # Version: 1.115.5 ## Date: 2024-10-16 ### Changes: diff --git a/Docs/MetaX_Cookbook.md b/Docs/MetaX_Cookbook.md index 1808fe0..9f7b81d 100644 --- a/Docs/MetaX_Cookbook.md +++ b/Docs/MetaX_Cookbook.md @@ -13,7 +13,7 @@ MetaX also features statistical modules and plotting tools for ana # Project Page -Visit **Github** to get more information: +Visit **GitHub** to get more information: [https://github.com/byemaxx/MetaX](https://github.com/byemaxx/MetaX) @@ -35,144 +35,17 @@ Visit **Github** to get more information:
-# Preparing Your Data - -## Module 1. Database Builder - -**Note:** The results from **MetaLab v2.3** MaxQuant workflow do not require database building. However, we do not recommend using these results as input to MetaX, as many peptides may be discarded. - -- Build the database for the **first time** using the Database Builder. - - **Option 1: Build Database Using MGnify Data** - - Ensure you download the correct database type corresponding to your data. - - ![dbbuilder](./MetaX_Cookbook.assets/dbbuilder.png) - - **Option 2: Build Database Using Own Data** - - 1. **Annotation Table:** A TSV table (tab-separated), with the first column as protein name joined with Genome by "_", e.g., "Genome1_protein1", and other columns containing annotation information. - - ![dbbuilder_own](./MetaX_Cookbook.assets/dbbuilder_own.png) - - 2. **Taxa Table:** A TSV table (tab-separated), with the first column as Genome name, e.g., "Genome1", and the second column as taxa. - - **Example Annotation Table:** - - | Query | Preferred_name | EC | KEGG_ko | - | ------------------- | -------------- | ----------------- | ------------------- | - | MGYG000000001_00696 | mfd | - | ko:K03723 | - | MGYG000000001_02838 | hxlR | - | - | - | MGYG000000001_01674 | ispG | 1.17.7.1,1.17.7.3 | ko:K03526 | - | MGYG000000001_02710 | glsA | 3.5.1.2 | ko:K01425 | - | MGYG000000001_01356 | mutS2 | - | ko:K07456 | - | MGYG000000001_02630 | - | - | - | - | MGYG000000001_02418 | ackA | 2.7.2.1 | ko:K00925 | - | MGYG000000001_00728 | atpA | 3.6.3.14 | ko:K02111 | - | MGYG000000001_00695 | pth | 3.1.1.29 | ko:K01056 | - | MGYG000000001_02907 | - | - | ko:K03086 | - | MGYG000000001_02592 | rplC | - | ko:K02906 | - | MGYG000000001_00137 | - | - | ko:K03480,ko:K03488 | - - **Example Taxa Table:** - - | Genome | Lineage | - | ------------- | ------------------------------------------------------------ | - | MGYG000000001 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_GCA-900066495;s_GCA-900066495 sp902362365 | - | MGYG000000002 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Lachnospirales;f_Lachnospiraceae;g_Blautia_A;s_Blautia_A faecis | - | MGYG000000003 | d_Bacteria;p_Bacteroidota;c_Bacteroidia;o_Bacteroidales;f_Rikenellaceae;g_Alistipes;s_Alistipes shahii | - | MGYG000000004 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Oscillospirales;f_Ruminococcaceae;g_Anaerotruncus;s_Anaerotruncus colihominis | - | MGYG000000005 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_Terrisporobacter;s_Terrisporobacter glycolicus_A | - | MGYG000000006 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Staphylococcales;f_Staphylococcaceae;g_Staphylococcus;s_Staphylococcus xylosus | - | MGYG000000007 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus intestinalis | - | MGYG000000008 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus johnsonii | - | MGYG000000009 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Ligilactobacillus;s_Ligilactobacillus murinus | - -## Module 2. Database Updater - -The **Database Updater** allows updating the database built by the **Database Builder** or adding more annotations. This step is **optional**. - -- Update the built database and extend annotations. - - ![db_updater](./MetaX_Cookbook.assets/db_updater.png) - - **Option 1: Built-in Mode** - - We recommend some extended databases, such as [dbCAN_seq](https://bcb.unl.edu/dbCAN_seq). - - **Option 2: TSV Table** - - Extend the database by adding a new database to the database table. Ensure the column separator is a tab and the first column is the Protein name, with other columns containing function annotations. - - **Example:** - - | Protein ID | COG | KEGG | ... | - | ------------------- | ---------- | ---------- | ---- | - | MGYG000000001_02630 | Function 1 | Function 1 | ... | - | MGYG000000001_01475 | Function 2 | Function 1 | ... | - | MGYG000000001_01539 | Function 3 | Function 1 | ... | - -## Module 3. Peptide Annotator - -### 1. Results from MAG Workflow - -The peptide results use Metagenome-assembled genomes (MAGs) as the reference database for protein searches, e.g., MetaLab-MAG, MetaLab-DIA and other workflows wich using MAG databases like MGnify or customized MAGs Database. - -- Annotate the peptide to Operational Taxa-Functions (OTF) Table before analysis using the Peptide Annotator. - - ![peptide2taxafunc](./MetaX_Cookbook.assets/peptide2taxafunc.png) - - **Required:** - - - **Database**: The database created by Database Builder - - - **Peptide Table**: - - - *Option 1*: From MetaLab-MAG results (final_peptides.tsv) - - - *Option 2*: Create it manually, with the first column as the ID (e.g., peptide sequence) and the second column as the proteins ID of MGnify (e.g., MGYG000003683_00301; MGYG000001490_01143) or your database, and other columns as the intensity of each sample. - - **Example:** - - | Sequence | Proteins | Intensity_V1_01 | Intensity_V1_02 | Intensity_V1_03 | Intensity_V1_04 | - | ----------------------------------- | ------------------------------------------------------------ | --------------- | --------------- | --------------- | --------------- | - | (Acetyl)KGGVEPQSETVWR | MGYG000002716_01681;MGYG000000195_00452;MGYG000001616_00519;MGYG000002258_01582;MGYG000001300_00281;MGYG000002926_00231;... | 714650 | 0 | 0 | 0 | - | (Acetyl)KVIPELNGK | MGYG000003589_01892;MGYG000001560_01812;MGYG000001789_00244;... | 0 | 0 | 0 | 0 | - | (Acetyl)LAELGAKAVTLSGPDGYIYDPDGITTK | MGYG000001199_02893 | 0 | 0 | 0 | 0 | - | (Acetyl)LLTGLPDAYGR | MGYG000001757_01206;MGYG000004547_02135;MGYG000001283_00124;MGYG000004758_00803;MGYG000002486_00845;MGYG000000271_01269 | 0 | 307519 | 0 | 0 | - | (Acetyl)MDFTLDKK | MGYG000000076_01275;MGYG000003694_00879;MGYG000000312_02425;MGYG000000271_02102;MGYG000004271_00233;MGYG000002517_00542;MGYG000000489_01025 | 306231 | 0 | 0 | 1214497 | - - - **Output Save Path**: The location to save the result table. - - - **LCA Threshold**: Find the LCA with the proportion threshold for each peptide. The default is 1.00 (100%). - - ![LCA_prop](./MetaX_Cookbook.assets/LCA_prop.png) - -### 2. Results from MaxQuant Workflow - -The peptide results from **MetaLab 2.3** MaxQuant workflow. - -- Select the **MetaLab** result folder, which contains the **maxquant_search** folder. - - ![peptide2taxafunc_tab2_1](MetaX_Cookbook.assets/peptide2taxafunc_tab2_1.png) - -- The **Peptide Annotator** will automatically find the **peptides_report.txt**, **BuiltIn.pepTaxa.csv**, and **functions.tsv** in the **maxquant_search** folder. Alternatively, you can select the files manually. - - - Select **OTFs Save To** to set the location to save the result table. - - ![peptide2taxafunc_tab2_2](MetaX_Cookbook.assets/peptide2taxafunc_tab2_2.png) - -
- # Exploring Data with MetaX -## Module 4. OTF Analyzer +See the **[Preparing Your Data](#Preparing-Your-Data)** section to build the database and annotate peptides to OTFs before starting. + +## Module 1. OTF Analyzer -After obtaining the **Operational Taxa-Functions (OTF) Table** using the **Peptide Annotator**, you can perform downstream analysis with the **OTF Analyzer**. +After obtaining the **Operational Taxa-Functions (OTF) Table** using the **[Peptide Annotator](##Module-4.-Peptide-Annotator)**, you can perform downstream analysis with the **OTF Analyzer**. ## 1. Data Preparation -**OTFs (Operational Taxa-Functions) Table:** Obtained from the Peptide Annotator module. +**OTFs (Operational Taxa-Functions) Table:** Obtained from the [Peptide Annotator](##Module-4.-Peptide-Annotator) module. **Meta Table:** The first column is sample names, and the other columns represent different groups. If no meta table is provided, meta info will be generated automatically: (1) all samples are in the same group; (2) each sample is a separate group. @@ -784,6 +657,137 @@ We can select **meta** **groups** or **samples** (default a +# Preparing Your Data + +## Module 2. Database Builder + +**Note:** The results from **MetaLab v2.3** MaxQuant workflow do not require database building. However, we do not recommend using these results as input to MetaX, as many peptides may be discarded. + +- Build the database for the **first time** using the Database Builder. + + **Option 1: Build Database Using MGnify Data** + + Ensure you download the correct database type corresponding to your data. + + ![dbbuilder](./MetaX_Cookbook.assets/dbbuilder.png) + + **Option 2: Build Database Using Own Data** + + 1. **Annotation Table:** A TSV table (tab-separated), with the first column as protein name joined with Genome by "_", e.g., "Genome1_protein1", and other columns containing annotation information. + + ![dbbuilder_own](./MetaX_Cookbook.assets/dbbuilder_own.png) + + 2. **Taxa Table:** A TSV table (tab-separated), with the first column as Genome name, e.g., "Genome1", and the second column as taxa. + + **Example Annotation Table:** + + | Query | Preferred_name | EC | KEGG_ko | + | ------------------- | -------------- | ----------------- | ------------------- | + | MGYG000000001_00696 | mfd | - | ko:K03723 | + | MGYG000000001_02838 | hxlR | - | - | + | MGYG000000001_01674 | ispG | 1.17.7.1,1.17.7.3 | ko:K03526 | + | MGYG000000001_02710 | glsA | 3.5.1.2 | ko:K01425 | + | MGYG000000001_01356 | mutS2 | - | ko:K07456 | + | MGYG000000001_02630 | - | - | - | + | MGYG000000001_02418 | ackA | 2.7.2.1 | ko:K00925 | + | MGYG000000001_00728 | atpA | 3.6.3.14 | ko:K02111 | + | MGYG000000001_00695 | pth | 3.1.1.29 | ko:K01056 | + | MGYG000000001_02907 | - | - | ko:K03086 | + | MGYG000000001_02592 | rplC | - | ko:K02906 | + | MGYG000000001_00137 | - | - | ko:K03480,ko:K03488 | + + **Example Taxa Table:** + + | Genome | Lineage | + | ------------- | ------------------------------------------------------------ | + | MGYG000000001 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_GCA-900066495;s_GCA-900066495 sp902362365 | + | MGYG000000002 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Lachnospirales;f_Lachnospiraceae;g_Blautia_A;s_Blautia_A faecis | + | MGYG000000003 | d_Bacteria;p_Bacteroidota;c_Bacteroidia;o_Bacteroidales;f_Rikenellaceae;g_Alistipes;s_Alistipes shahii | + | MGYG000000004 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Oscillospirales;f_Ruminococcaceae;g_Anaerotruncus;s_Anaerotruncus colihominis | + | MGYG000000005 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_Terrisporobacter;s_Terrisporobacter glycolicus_A | + | MGYG000000006 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Staphylococcales;f_Staphylococcaceae;g_Staphylococcus;s_Staphylococcus xylosus | + | MGYG000000007 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus intestinalis | + | MGYG000000008 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus johnsonii | + | MGYG000000009 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Ligilactobacillus;s_Ligilactobacillus murinus | + +## Module 3. Database Updater + +The **Database Updater** allows updating the database built by the **Database Builder** or adding more annotations. This step is **optional**. + +- Update the built database and extend annotations. + + ![db_updater](./MetaX_Cookbook.assets/db_updater.png) + + **Option 1: Built-in Mode** + + We recommend some extended databases, such as [dbCAN_seq](https://bcb.unl.edu/dbCAN_seq). + + **Option 2: TSV Table** + + Extend the database by adding a new database to the database table. Ensure the column separator is a tab and the first column is the Protein name, with other columns containing function annotations. + + **Example:** + + | Protein ID | COG | KEGG | ... | + | ------------------- | ---------- | ---------- | ---- | + | MGYG000000001_02630 | Function 1 | Function 1 | ... | + | MGYG000000001_01475 | Function 2 | Function 1 | ... | + | MGYG000000001_01539 | Function 3 | Function 1 | ... | + +## Module 4. Peptide Annotator + +### 1. Results from MAG Workflow + +The peptide results use Metagenome-assembled genomes (MAGs) as the reference database for protein searches, e.g., MetaLab-MAG, MetaLab-DIA and other workflows wich using MAG databases like MGnify or customized MAGs Database. + +- Annotate the peptide to the Operational Taxa-Functions (OTF) Table before analysis using the Peptide Annotator. + + ![peptide2taxafunc](./MetaX_Cookbook.assets/peptide2taxafunc.png) + + **Required:** + + - **Database**: The database created by [Database Builder](##Module-2.-Database-Builder) + + - **Peptide Table**: + + - *Option 1*: From MetaLab-MAG results (final_peptides.tsv) + + - *Option 2*: Create it manually, with the first column as the ID (e.g., peptide sequence) and the second column as the proteins ID of MGnify (e.g., MGYG000003683_00301; MGYG000001490_01143) or your database, and other columns as the intensity of each sample. + + **Example:** + + | Sequence | Proteins | Intensity_V1_01 | Intensity_V1_02 | Intensity_V1_03 | Intensity_V1_04 | + | ----------------------------------- | ------------------------------------------------------------ | --------------- | --------------- | --------------- | --------------- | + | (Acetyl)KGGVEPQSETVWR | MGYG000002716_01681;MGYG000000195_00452;MGYG000001616_00519;MGYG000002258_01582;MGYG000001300_00281;MGYG000002926_00231;... | 714650 | 0 | 0 | 0 | + | (Acetyl)KVIPELNGK | MGYG000003589_01892;MGYG000001560_01812;MGYG000001789_00244;... | 0 | 0 | 0 | 0 | + | (Acetyl)LAELGAKAVTLSGPDGYIYDPDGITTK | MGYG000001199_02893 | 0 | 0 | 0 | 0 | + | (Acetyl)LLTGLPDAYGR | MGYG000001757_01206;MGYG000004547_02135;MGYG000001283_00124;MGYG000004758_00803;MGYG000002486_00845;MGYG000000271_01269 | 0 | 307519 | 0 | 0 | + | (Acetyl)MDFTLDKK | MGYG000000076_01275;MGYG000003694_00879;MGYG000000312_02425;MGYG000000271_02102;MGYG000004271_00233;MGYG000002517_00542;MGYG000000489_01025 | 306231 | 0 | 0 | 1214497 | + + - **Output Save Path**: The location to save the result table. + + - **LCA Threshold**: Find the LCA with the proportion threshold for each peptide. The default is 1.00 (100%). + + ![LCA_prop](./MetaX_Cookbook.assets/LCA_prop.png) + +### 2. Results from MaxQuant Workflow + +The peptide results from **MetaLab 2.3** MaxQuant workflow. + +- Select the **MetaLab** result folder, which contains the **maxquant_search** folder. + + ![peptide2taxafunc_tab2_1](MetaX_Cookbook.assets/peptide2taxafunc_tab2_1.png) + +- The **Peptide Annotator** will automatically find the **peptides_report.txt**, **BuiltIn.pepTaxa.csv**, and **functions.tsv** in the **maxquant_search** folder. Alternatively, you can select the files manually. + + - Select **OTFs Save To** to set the location to save the result table. + + ![peptide2taxafunc_tab2_2](MetaX_Cookbook.assets/peptide2taxafunc_tab2_2.png) + +
+ + + # Developer Tools diff --git a/metax/taxafunc_ploter/heatmap_plot.py b/metax/taxafunc_ploter/heatmap_plot.py index 79d1d14..5190664 100644 --- a/metax/taxafunc_ploter/heatmap_plot.py +++ b/metax/taxafunc_ploter/heatmap_plot.py @@ -93,6 +93,7 @@ def plot_top_taxa_func_heatmap_of_test_res(self, df, top_number:int|str= 100, cmap = type_map.get(value_type, "None")[1] if cmap is None else cmap + value_col_name = type_map.get(value_type, "None")[0] @@ -110,7 +111,7 @@ def plot_top_taxa_func_heatmap_of_test_res(self, df, top_number:int|str= 100, if rename_taxa: df_top['Taxon'] = df_top['Taxon'].apply(lambda x: x.split('|')[-1]) # df_top = self.rename_taxa(df_top) - df_top = df_top.pivot(index=func_name, columns='Taxon', values=p_type) + df_top = df_top.pivot(index=func_name, columns='Taxon', values=value_col_name) print(f"Top [{top_number}] significant: Taxa ({df_top.shape[1]}), Functions ({df_top.shape[0]})") df_plot = df_top.fillna(1) if plot_type in ['pvalue', 'padj'] else df_top.fillna(0) @@ -154,11 +155,11 @@ def plot_top_taxa_func_heatmap_of_test_res(self, df, top_number:int|str= 100, fig.ax_heatmap.set_xlabel("Taxa") fig.ax_heatmap.set_ylabel("Functions") - scale_title = f"scaled by {scale}" if scale in ['row', 'column', 'all'] else '' + scale_title = f", scaled by {scale}" if scale in ['row', 'column', 'all'] else '' if title == "": - title = f"Significant Differences in Taxa-Function (Top {top_number} sorted by {plot_type}, filtered by {p_type}, {scale_title})" + title = f"Significant Differences in Taxa-Function (Top {top_number} sorted by {plot_type}, filtered by {p_type}{scale_title})" else: - title = f"{title} (Top {top_number} sorted by {plot_type}, filtered by {p_type}, {scale_title})" + title = f"{title} (Top {top_number} sorted by {plot_type}, filtered by {p_type}{scale_title})" plt.suptitle(title) @@ -321,9 +322,9 @@ def plot_basic_heatmap_of_test_res(self, df, top_number:int = 100, value_type:st va = self.get_y_labels_va() ) - scale_title = f"scaled by {scale}" if scale in ['row', 'column', 'all'] else '' + scale_title = f", scaled by {scale}" if scale in ['row', 'column', 'all'] else '' plt.suptitle( - f"The intensity of Significant differences (top {len(mat)} sorted by {sort_by.split('(')[0]}, filtered by {p_type}, {scale_title})" + f"The intensity of Significant differences (top {len(mat)} sorted by {sort_by.split('(')[0]}, filtered by {p_type}{scale_title})" ) cbar = fig.ax_heatmap.collections[0].colorbar cbar.set_label("Intensity", rotation=90, labelpad=1) diff --git a/metax/utils/version.py b/metax/utils/version.py index f4be0ce..d936c4f 100644 --- a/metax/utils/version.py +++ b/metax/utils/version.py @@ -1,2 +1,2 @@ -__version__ = '1.116.0' +__version__ = '1.116.1' API_version = '3' \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 3fa8034..209f360 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "MetaXTools" -version = "1.116.0" +version = "1.116.1" description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics." readme = "README_PyPi.md" license = { text = "NorthOmics" }