From f9bc6d0c96fbff9f6effcb334b5af3115d028256 Mon Sep 17 00:00:00 2001 From: Qing Date: Sat, 15 Jun 2024 19:03:22 -0400 Subject: [PATCH] - New: MetaX now supports make the OTFs Table from the MetaLab v2.3 MaxQuant output file. --- Docs/ChangeLog.md | 5 + README.md | 2 + utils/GUI.py | 94 +++++++- utils/MetaX_GUI/MainWindow.ui | 389 ++++++++++++++++++++++--------- utils/MetaX_GUI/Ui_MainWindow.py | 223 ++++++++++++------ utils/metalab2otf.py | 248 ++++++++++++++++++++ utils/version.py | 2 +- 7 files changed, 777 insertions(+), 186 deletions(-) create mode 100644 utils/metalab2otf.py diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md index 3492142..aa6db50 100644 --- a/Docs/ChangeLog.md +++ b/Docs/ChangeLog.md @@ -1,3 +1,8 @@ +# Version: 1.107.0 +## Date: 2024-06-15 +### Changes: +- New: MetaX now supports make the OTFs Table from the MetaLab v2.3 MaxQuant output file. + # Version: 1.106.1 ## Date: 2024-06-09 ### Changes: diff --git a/README.md b/README.md index b20aea9..2e96299 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ MetaX also features statistical modules and plotting tools for ana ![abstract](./Docs/MetaX_Cookbook.assets/abstract.png) +## Taxa-Functions Linkage + ## Download ### `Desktop Version(Recommended)`: diff --git a/utils/GUI.py b/utils/GUI.py index 98bf43d..23dc0ff 100644 --- a/utils/GUI.py +++ b/utils/GUI.py @@ -219,7 +219,10 @@ def __init__(self, MainWindow): self.lineEdit_db_path = self.make_line_edit_drag_drop(self.lineEdit_db_path, 'file') self.lineEdit_final_peptide_path = self.make_line_edit_drag_drop(self.lineEdit_final_peptide_path, 'file') self.lineEdit_peptide2taxafunc_outpath = self.make_line_edit_drag_drop(self.lineEdit_peptide2taxafunc_outpath, 'folder', 'OTF.tsv') - + self.lineEdit_metalab_anno_peptides_report = self.make_line_edit_drag_drop(self.lineEdit_metalab_anno_peptides_report, 'file') + self.lineEdit_metalab_anno_built_in_taxa = self.make_line_edit_drag_drop(self.lineEdit_metalab_anno_built_in_taxa, 'file') + self.lineEdit_metalab_anno_functions = self.make_line_edit_drag_drop(self.lineEdit_metalab_anno_functions, 'file') + self.lineEdit_metalab_anno_otf_save_path = self.make_line_edit_drag_drop(self.lineEdit_metalab_anno_otf_save_path, 'folder', 'OTF.tsv') # set ComboBox eanble searchable self.make_related_comboboxes_searchable() @@ -233,11 +236,18 @@ def __init__(self, MainWindow): # set button click event - # peptide2taxafunc + # peptideAnnotator MAG self.pushButton_get_db_path.clicked.connect(self.set_lineEdit_db_path) self.pushButton_get_final_peptide_path.clicked.connect(self.set_lineEdit_final_peptide_path) self.pushButton_get_taxafunc_save_path.clicked.connect(self.set_lineEdit_peptide2taxafunc_outpath) self.pushButton_run_peptide2taxafunc.clicked.connect(self.run_peptide2taxafunc) + # peptideAnnotator MetaLab2.3 + self.pushButton_open_metalab_res_folder.clicked.connect(self.set_lineEdit_metalab_res_folder) + self.pushButton_open_metalab_anno_peptides_report.clicked.connect(self.set_lineEdit_metalab_anno_peptides_report_path) + self.pushButton_open_metalab_anno_built_in_taxa.clicked.connect(self.set_lineEdit_metalab_anno_built_in_taxa_path) + self.pushButton_open_metalab_anno_functions.clicked.connect(self.set_lineEdit_metalab_anno_functions_path) + self.pushButton_open_metalab_anno_otf_save_path.clicked.connect(self.set_lineEdit_metalab_anno_otf_save_path) + self.pushButton_run_metalab_maxq_annotate.clicked.connect(self.run_metalab_maxq_annotate) ## help button click event self.toolButton_db_path_help.clicked.connect(self.show_toolButton_db_path_help) @@ -247,7 +257,7 @@ def __init__(self, MainWindow): self.pushButton_func_threshold_help.clicked.connect(self.show_func_threshold_help) self.toolButton_db_update_built_in_help.clicked.connect(self.show_toolButton_db_update_built_in_help) self.toolButton_db_update_table_help.clicked.connect(self.show_toolButton_db_update_table_help) - + self.toolButton_metalab_res_folder_help.clicked.connect(self.show_toolButton_metalab_res_folder_help) @@ -1431,7 +1441,7 @@ def show_message(self,message,title='Information'): QApplication.processEvents() - + ## peptideAnnotator MAG tab def set_lineEdit_db_path(self): db_path = QFileDialog.getOpenFileName(self.MainWindow, 'Select Database', self.last_path, 'sqlite3 (*.db)')[0] self.last_path = os.path.dirname(db_path) @@ -1448,7 +1458,50 @@ def set_lineEdit_peptide2taxafunc_outpath(self): peptide2taxafunc_outpath = QFileDialog.getSaveFileName(self.MainWindow, 'Save Operational Taxa-Functions (OTF) Table', os.path.join(self.last_path, 'OTF.tsv'), 'tsv (*.tsv)')[0] self.last_path = os.path.dirname(peptide2taxafunc_outpath) self.lineEdit_peptide2taxafunc_outpath.setText(peptide2taxafunc_outpath) + ## peptideAnnotator MAG tab end + + ## peptideAnnotator MetaLab2.3 tab + def set_lineEdit_metalab_res_folder(self): + metalab_res_folder = QFileDialog.getExistingDirectory(self.MainWindow, 'Select MetaLab Result Folder', self.last_path) + self.last_path = metalab_res_folder + # check if the folder contains MetaLab result files + peptide_file = os.path.join(metalab_res_folder, 'maxquant_search/combined/txt/peptides_report.txt') + pepTaxa_file = os.path.join(metalab_res_folder, 'maxquant_search/taxonomy_analysis/BuiltIn.pepTaxa.csv') + functions_file = os.path.join(metalab_res_folder, 'maxquant_search/functional_annotation/functions.tsv') + for file in [peptide_file, pepTaxa_file, functions_file]: + if not os.path.exists(file): + QMessageBox.warning(self.MainWindow, "Warning", f"MetaLab result folder does not contain the required file:\n{file}") + return + + # set the path to lineEdit + self.lineEdit_metalab_res_folder.setText(metalab_res_folder) + self.lineEdit_metalab_anno_peptides_report.setText(peptide_file) + self.lineEdit_metalab_anno_built_in_taxa.setText(pepTaxa_file) + self.lineEdit_metalab_anno_functions.setText(functions_file) + # switch to MetaLab Annotated set path tab + self.toolBox_metalab_res_anno.setCurrentIndex(1) + + def set_lineEdit_metalab_anno_peptides_report_path(self): + metalab_anno_peptides_report_path = QFileDialog.getOpenFileName(self.MainWindow, 'Select MetaLab Annotated Peptides Report', self.last_path, 'txt (*.txt);;All Files (*)')[0] + self.last_path = os.path.dirname(metalab_anno_peptides_report_path) + self.lineEdit_metalab_anno_peptides_report.setText(metalab_anno_peptides_report_path) + + def set_lineEdit_metalab_anno_built_in_taxa_path(self): + metalab_anno_built_in_taxa_path = QFileDialog.getOpenFileName(self.MainWindow, 'Select MetaLab Annotated Built-in Taxa', self.last_path, 'CSV Files (*.csv);;All Files (*)')[0] + self.lineEdit_metalab_anno_built_in_taxa.setText(metalab_anno_built_in_taxa_path) + self.last_path = os.path.dirname(metalab_anno_built_in_taxa_path) + def set_lineEdit_metalab_anno_functions_path(self): + metalab_anno_functions_path = QFileDialog.getOpenFileName(self.MainWindow, 'Select MetaLab Annotated Functions', self.last_path, 'TSV Files (*.tsv);;All Files (*)')[0] + self.lineEdit_metalab_anno_functions.setText(metalab_anno_functions_path) + self.last_path = os.path.dirname(metalab_anno_functions_path) + + def set_lineEdit_metalab_anno_otf_save_path(self): + metalab_anno_otf_save_path = QFileDialog.getSaveFileName(self.MainWindow, 'Save MetaLab Annotated OTF Table', os.path.join(self.last_path, 'OTF.tsv'), 'tsv (*.tsv)')[0] + self.last_path = os.path.dirname(metalab_anno_otf_save_path) + self.lineEdit_metalab_anno_otf_save_path.setText(metalab_anno_otf_save_path) + + ## peptideAnnotator MetaLab2.3 tab end def load_example_for_analyzer(self): current_path = os.path.dirname(os.path.abspath(__file__)) @@ -1649,6 +1702,7 @@ def run_after_set_multi_tables(self): print("\n---------------------------------- Set Multi Table End ----------------------------------\n") # go to basic analysis tab and the first tab + self.stackedWidget.setCurrentIndex(0) # go to page_analyzer self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(3) self.tabWidget_4.setCurrentIndex(0) self.pushButton_set_multi_table.setEnabled(True) @@ -1742,7 +1796,8 @@ def run_db_updater(self): ## Database Updater - # Peptide to TaxaFunc + ## Peptide Annotator + # MAG tab def run_peptide2taxafunc(self): db_path = f'''{self.lineEdit_db_path.text()}''' final_peptide_path = f'''{self.lineEdit_final_peptide_path.text()}''' @@ -1767,6 +1822,30 @@ def run_peptide2taxafunc(self): except Exception as e: self.logger.write_log(f'run_peptide2taxafunc error: {e}', 'e') QMessageBox.warning(self.MainWindow, 'Warning', f'Error: {e}') + # MetaLab2.3 tab + def run_metalab_maxq_annotate(self): + pepTaxa_file = f'''{self.lineEdit_metalab_anno_peptides_report.text()}''' + peptide_file = f'''{self.lineEdit_metalab_anno_built_in_taxa.text()}''' + functions_file = f'''{self.lineEdit_metalab_anno_functions.text()}''' + otf_save_path = f'''{self.lineEdit_metalab_anno_otf_save_path.text()}''' + print(f'pepTaxa_file:\n{pepTaxa_file} \npeptide_file:\n{peptide_file} \nfunctions_file:\n{functions_file} \notf_save_path:\n{otf_save_path}') + + if pepTaxa_file == '' or peptide_file == '' or functions_file == '' or otf_save_path == '': + QMessageBox.warning(self.MainWindow, 'Warning', 'Please set all above paths') + return None + try: + self.logger.write_log(f'run_metalab_maxq_annotate: pepTaxa_file:{pepTaxa_file} peptide_file:{peptide_file} functions_file:{functions_file} otf_save_path:{otf_save_path}') + + from MetaX.utils.metalab2otf import MetaLab2OTF + def metalab_main_wrapper(): + instance = MetaLab2OTF(pepTaxa_file, peptide_file, functions_file, otf_save_path) + return instance.main() + + self.run_in_new_window(metalab_main_wrapper, show_msg=True) + except Exception as e: + error_message = traceback.format_exc() + self.logger.write_log(f'Error when run_metalab_maxq_annotate: {error_message}', 'e') + QMessageBox.warning(self.MainWindow, 'Error', error_message) #### TaxaFuncAnalyzer #### @@ -1855,6 +1934,11 @@ def show_toolButton_db_path_help(self): msg_box.addButton(QMessageBox.Cancel) switch_button.clicked.connect(self.swith_stack_page_dbuilder) msg_box.exec_() + + def show_toolButton_metalab_res_folder_help(self): + QMessageBox.information(self.MainWindow, 'MetaLab Result Folder Help', 'Select the folder of MetaLab v2.3 result.\n\n make sure it contains [maxquant_search] folder.') + + def show_pushButton_preprocessing_help(self): msg_box = QMessageBox(parent=self.MainWindow) msg_box.setWindowTitle('Preprocessing Help') diff --git a/utils/MetaX_GUI/MainWindow.ui b/utils/MetaX_GUI/MainWindow.ui index ebab4ec..f300b98 100644 --- a/utils/MetaX_GUI/MainWindow.ui +++ b/utils/MetaX_GUI/MainWindow.ui @@ -46,7 +46,7 @@ Qt::LeftToRight - 3 + 0 @@ -240,7 +240,7 @@ 0 0 477 - 377 + 373 @@ -1315,7 +1315,7 @@ - 1 + 0 @@ -7152,109 +7152,281 @@ - - - - Output Save Path - - - - - - - GO - - - - - - - ? - - - - - - - - - - - - - 3 - - - 1.000000000000000 - - - 0.050000000000000 - - - 1.000000000000000 - - - - - - - ? - - - - - - - Peptide Table - - - - - - Database - - - - - - - LCA Threshold - - - - - - - Open - - - - - - - - - - Open - - - - - - - ? - - - - - - - Open + + + 1 + + + MAG + + + + + + + + + ? + + + + + + + + + + Database + + + + + + + LCA Threshold + + + + + + + Open + + + + + + + ? + + + + + + + Peptide Table + + + + + + + Open + + + + + + + ? + + + + + + + + + + Open + + + + + + + 3 + + + 1.000000000000000 + + + 0.050000000000000 + + + 1.000000000000000 + + + + + + + OTFs Save To + + + + + + + GO + + + + + + + + MetaLab v2.3 + + + + + + GO + + + + + + + 0 + + + + + 0 + 0 + 943 + 332 + + + + Set Rsults Folder + + + + + + Open + + + + + + + + + + MetaLab Resul Folder Wich contain "maxquant_search" filder + + + MetaLab 2.3 Result Folder + + + + + + + ? + + + + + + + + + 0 + 0 + 943 + 332 + + + + Set Path + + + + + + In the maxquant_search/taxonomy_analysis/ + + + BuiltIn.pepTaxa.csv + + + + + + + + + + Open + + + + + + + + + + In the maxquant_search/combined/txt/ + + + peptides_report.txt + + + + + + + In the maxquant_search/functional_annotation/ + + + functions.tsv + + + + + + + + + + Open + + + + + + + Open + + + + + + + Path to Save Output + + + OTFs Save To + + + + + + + + + + Open + + + + + + + + + - + @@ -7754,7 +7926,7 @@ 0 0 1021 - 21 + 23 @@ -7862,14 +8034,6 @@ - toolButton__final_peptide_help - lineEdit_final_peptide_path - pushButton_get_final_peptide_path - lineEdit_peptide2taxafunc_outpath - pushButton_get_taxafunc_save_path - toolButton_lca_threshould_help - doubleSpinBox_LCA_threshold - pushButton_run_peptide2taxafunc comboBox_taxa_level_to_stast toolButton_meta_table_help comboBox_function_to_stast @@ -7879,9 +8043,6 @@ toolButton_taxafunc_table_help listWidget_table_list pushButton_view_table - toolButton_db_path_help - lineEdit_db_path - pushButton_get_db_path diff --git a/utils/MetaX_GUI/Ui_MainWindow.py b/utils/MetaX_GUI/Ui_MainWindow.py index a37b0d2..a01aa12 100644 --- a/utils/MetaX_GUI/Ui_MainWindow.py +++ b/utils/MetaX_GUI/Ui_MainWindow.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Form implementation generated from reading ui file 'c:\Users\Qing\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\utils\MetaX_GUI\MainWindow.ui' +# Form implementation generated from reading ui file 'c:\Users\max\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\utils\MetaX_GUI\MainWindow.ui' # # Created by: PyQt5 UI code generator 5.15.9 # @@ -145,7 +145,7 @@ def setupUi(self, metaX_main): self.toolBox_2.setMaximumSize(QtCore.QSize(1677, 16777215)) self.toolBox_2.setObjectName("toolBox_2") self.page_2 = QtWidgets.QWidget() - self.page_2.setGeometry(QtCore.QRect(0, 0, 477, 377)) + self.page_2.setGeometry(QtCore.QRect(0, 0, 477, 373)) self.page_2.setObjectName("page_2") self.gridLayout_27 = QtWidgets.QGridLayout(self.page_2) self.gridLayout_27.setObjectName("gridLayout_27") @@ -3773,55 +3773,134 @@ def setupUi(self, metaX_main): self.widget_Peptide2taxafunc.setObjectName("widget_Peptide2taxafunc") self.gridLayout_3 = QtWidgets.QGridLayout(self.widget_Peptide2taxafunc) self.gridLayout_3.setObjectName("gridLayout_3") - self.label_7 = QtWidgets.QLabel(self.widget_Peptide2taxafunc) - self.label_7.setObjectName("label_7") - self.gridLayout_3.addWidget(self.label_7, 3, 0, 1, 1) - self.pushButton_run_peptide2taxafunc = QtWidgets.QPushButton(self.widget_Peptide2taxafunc) - self.pushButton_run_peptide2taxafunc.setObjectName("pushButton_run_peptide2taxafunc") - self.gridLayout_3.addWidget(self.pushButton_run_peptide2taxafunc, 5, 0, 1, 4) - self.toolButton_lca_threshould_help = QtWidgets.QToolButton(self.widget_Peptide2taxafunc) - self.toolButton_lca_threshould_help.setObjectName("toolButton_lca_threshould_help") - self.gridLayout_3.addWidget(self.toolButton_lca_threshould_help, 4, 1, 1, 1) - self.lineEdit_peptide2taxafunc_outpath = QtWidgets.QLineEdit(self.widget_Peptide2taxafunc) + self.tabWidget_6 = QtWidgets.QTabWidget(self.widget_Peptide2taxafunc) + self.tabWidget_6.setObjectName("tabWidget_6") + self.tab_17 = QtWidgets.QWidget() + self.tab_17.setObjectName("tab_17") + self.gridLayout_42 = QtWidgets.QGridLayout(self.tab_17) + self.gridLayout_42.setObjectName("gridLayout_42") + self.lineEdit_final_peptide_path = QtWidgets.QLineEdit(self.tab_17) + self.lineEdit_final_peptide_path.setObjectName("lineEdit_final_peptide_path") + self.gridLayout_42.addWidget(self.lineEdit_final_peptide_path, 1, 2, 1, 1) + self.toolButton_db_path_help = QtWidgets.QToolButton(self.tab_17) + self.toolButton_db_path_help.setObjectName("toolButton_db_path_help") + self.gridLayout_42.addWidget(self.toolButton_db_path_help, 0, 1, 1, 1) + self.lineEdit_peptide2taxafunc_outpath = QtWidgets.QLineEdit(self.tab_17) self.lineEdit_peptide2taxafunc_outpath.setObjectName("lineEdit_peptide2taxafunc_outpath") - self.gridLayout_3.addWidget(self.lineEdit_peptide2taxafunc_outpath, 3, 2, 1, 1) - self.lineEdit_db_path = QtWidgets.QLineEdit(self.widget_Peptide2taxafunc) + self.gridLayout_42.addWidget(self.lineEdit_peptide2taxafunc_outpath, 2, 2, 1, 1) + self.label_5 = QtWidgets.QLabel(self.tab_17) + self.label_5.setObjectName("label_5") + self.gridLayout_42.addWidget(self.label_5, 0, 0, 1, 1) + self.label_8 = QtWidgets.QLabel(self.tab_17) + self.label_8.setObjectName("label_8") + self.gridLayout_42.addWidget(self.label_8, 3, 0, 1, 1) + self.pushButton_get_db_path = QtWidgets.QPushButton(self.tab_17) + self.pushButton_get_db_path.setObjectName("pushButton_get_db_path") + self.gridLayout_42.addWidget(self.pushButton_get_db_path, 0, 3, 1, 1) + self.toolButton_lca_threshould_help = QtWidgets.QToolButton(self.tab_17) + self.toolButton_lca_threshould_help.setObjectName("toolButton_lca_threshould_help") + self.gridLayout_42.addWidget(self.toolButton_lca_threshould_help, 3, 1, 1, 1) + self.label_6 = QtWidgets.QLabel(self.tab_17) + self.label_6.setObjectName("label_6") + self.gridLayout_42.addWidget(self.label_6, 1, 0, 1, 1) + self.pushButton_get_final_peptide_path = QtWidgets.QPushButton(self.tab_17) + self.pushButton_get_final_peptide_path.setObjectName("pushButton_get_final_peptide_path") + self.gridLayout_42.addWidget(self.pushButton_get_final_peptide_path, 1, 3, 1, 1) + self.toolButton__final_peptide_help = QtWidgets.QToolButton(self.tab_17) + self.toolButton__final_peptide_help.setObjectName("toolButton__final_peptide_help") + self.gridLayout_42.addWidget(self.toolButton__final_peptide_help, 1, 1, 1, 1) + self.lineEdit_db_path = QtWidgets.QLineEdit(self.tab_17) self.lineEdit_db_path.setObjectName("lineEdit_db_path") - self.gridLayout_3.addWidget(self.lineEdit_db_path, 1, 2, 1, 1) - self.doubleSpinBox_LCA_threshold = QtWidgets.QDoubleSpinBox(self.widget_Peptide2taxafunc) + self.gridLayout_42.addWidget(self.lineEdit_db_path, 0, 2, 1, 1) + self.pushButton_get_taxafunc_save_path = QtWidgets.QPushButton(self.tab_17) + self.pushButton_get_taxafunc_save_path.setObjectName("pushButton_get_taxafunc_save_path") + self.gridLayout_42.addWidget(self.pushButton_get_taxafunc_save_path, 2, 3, 1, 1) + self.doubleSpinBox_LCA_threshold = QtWidgets.QDoubleSpinBox(self.tab_17) self.doubleSpinBox_LCA_threshold.setDecimals(3) self.doubleSpinBox_LCA_threshold.setMaximum(1.0) self.doubleSpinBox_LCA_threshold.setSingleStep(0.05) self.doubleSpinBox_LCA_threshold.setProperty("value", 1.0) self.doubleSpinBox_LCA_threshold.setObjectName("doubleSpinBox_LCA_threshold") - self.gridLayout_3.addWidget(self.doubleSpinBox_LCA_threshold, 4, 2, 1, 2) - self.toolButton__final_peptide_help = QtWidgets.QToolButton(self.widget_Peptide2taxafunc) - self.toolButton__final_peptide_help.setObjectName("toolButton__final_peptide_help") - self.gridLayout_3.addWidget(self.toolButton__final_peptide_help, 2, 1, 1, 1) - self.label_6 = QtWidgets.QLabel(self.widget_Peptide2taxafunc) - self.label_6.setObjectName("label_6") - self.gridLayout_3.addWidget(self.label_6, 2, 0, 1, 1) - self.label_5 = QtWidgets.QLabel(self.widget_Peptide2taxafunc) - self.label_5.setObjectName("label_5") - self.gridLayout_3.addWidget(self.label_5, 1, 0, 1, 1) - self.label_8 = QtWidgets.QLabel(self.widget_Peptide2taxafunc) - self.label_8.setObjectName("label_8") - self.gridLayout_3.addWidget(self.label_8, 4, 0, 1, 1) - self.pushButton_get_final_peptide_path = QtWidgets.QPushButton(self.widget_Peptide2taxafunc) - self.pushButton_get_final_peptide_path.setObjectName("pushButton_get_final_peptide_path") - self.gridLayout_3.addWidget(self.pushButton_get_final_peptide_path, 2, 3, 1, 1) - self.lineEdit_final_peptide_path = QtWidgets.QLineEdit(self.widget_Peptide2taxafunc) - self.lineEdit_final_peptide_path.setObjectName("lineEdit_final_peptide_path") - self.gridLayout_3.addWidget(self.lineEdit_final_peptide_path, 2, 2, 1, 1) - self.pushButton_get_db_path = QtWidgets.QPushButton(self.widget_Peptide2taxafunc) - self.pushButton_get_db_path.setObjectName("pushButton_get_db_path") - self.gridLayout_3.addWidget(self.pushButton_get_db_path, 1, 3, 1, 1) - self.toolButton_db_path_help = QtWidgets.QToolButton(self.widget_Peptide2taxafunc) - self.toolButton_db_path_help.setObjectName("toolButton_db_path_help") - self.gridLayout_3.addWidget(self.toolButton_db_path_help, 1, 1, 1, 1) - self.pushButton_get_taxafunc_save_path = QtWidgets.QPushButton(self.widget_Peptide2taxafunc) - self.pushButton_get_taxafunc_save_path.setObjectName("pushButton_get_taxafunc_save_path") - self.gridLayout_3.addWidget(self.pushButton_get_taxafunc_save_path, 3, 3, 1, 1) + self.gridLayout_42.addWidget(self.doubleSpinBox_LCA_threshold, 3, 2, 1, 2) + self.label_7 = QtWidgets.QLabel(self.tab_17) + self.label_7.setObjectName("label_7") + self.gridLayout_42.addWidget(self.label_7, 2, 0, 1, 1) + self.pushButton_run_peptide2taxafunc = QtWidgets.QPushButton(self.tab_17) + self.pushButton_run_peptide2taxafunc.setObjectName("pushButton_run_peptide2taxafunc") + self.gridLayout_42.addWidget(self.pushButton_run_peptide2taxafunc, 4, 0, 1, 4) + self.tabWidget_6.addTab(self.tab_17, "") + self.tab_18 = QtWidgets.QWidget() + self.tab_18.setObjectName("tab_18") + self.gridLayout_43 = QtWidgets.QGridLayout(self.tab_18) + self.gridLayout_43.setObjectName("gridLayout_43") + self.pushButton_run_metalab_maxq_annotate = QtWidgets.QPushButton(self.tab_18) + self.pushButton_run_metalab_maxq_annotate.setObjectName("pushButton_run_metalab_maxq_annotate") + self.gridLayout_43.addWidget(self.pushButton_run_metalab_maxq_annotate, 2, 0, 1, 3) + self.toolBox_metalab_res_anno = QtWidgets.QToolBox(self.tab_18) + self.toolBox_metalab_res_anno.setObjectName("toolBox_metalab_res_anno") + self.page_3 = QtWidgets.QWidget() + self.page_3.setGeometry(QtCore.QRect(0, 0, 943, 332)) + self.page_3.setObjectName("page_3") + self.gridLayout_45 = QtWidgets.QGridLayout(self.page_3) + self.gridLayout_45.setObjectName("gridLayout_45") + self.pushButton_open_metalab_res_folder = QtWidgets.QPushButton(self.page_3) + self.pushButton_open_metalab_res_folder.setObjectName("pushButton_open_metalab_res_folder") + self.gridLayout_45.addWidget(self.pushButton_open_metalab_res_folder, 0, 3, 1, 1) + self.lineEdit_metalab_res_folder = QtWidgets.QLineEdit(self.page_3) + self.lineEdit_metalab_res_folder.setObjectName("lineEdit_metalab_res_folder") + self.gridLayout_45.addWidget(self.lineEdit_metalab_res_folder, 0, 2, 1, 1) + self.label_161 = QtWidgets.QLabel(self.page_3) + self.label_161.setObjectName("label_161") + self.gridLayout_45.addWidget(self.label_161, 0, 0, 1, 1) + self.toolButton_metalab_res_folder_help = QtWidgets.QToolButton(self.page_3) + self.toolButton_metalab_res_folder_help.setObjectName("toolButton_metalab_res_folder_help") + self.gridLayout_45.addWidget(self.toolButton_metalab_res_folder_help, 0, 1, 1, 1) + self.toolBox_metalab_res_anno.addItem(self.page_3, "") + self.page_4 = QtWidgets.QWidget() + self.page_4.setGeometry(QtCore.QRect(0, 0, 943, 332)) + self.page_4.setObjectName("page_4") + self.gridLayout_44 = QtWidgets.QGridLayout(self.page_4) + self.gridLayout_44.setObjectName("gridLayout_44") + self.label_metalab_anno_built_in_taxa = QtWidgets.QLabel(self.page_4) + self.label_metalab_anno_built_in_taxa.setObjectName("label_metalab_anno_built_in_taxa") + self.gridLayout_44.addWidget(self.label_metalab_anno_built_in_taxa, 1, 0, 1, 1) + self.lineEdit_metalab_anno_functions = QtWidgets.QLineEdit(self.page_4) + self.lineEdit_metalab_anno_functions.setObjectName("lineEdit_metalab_anno_functions") + self.gridLayout_44.addWidget(self.lineEdit_metalab_anno_functions, 2, 1, 1, 1) + self.pushButton_open_metalab_anno_functions = QtWidgets.QPushButton(self.page_4) + self.pushButton_open_metalab_anno_functions.setObjectName("pushButton_open_metalab_anno_functions") + self.gridLayout_44.addWidget(self.pushButton_open_metalab_anno_functions, 2, 2, 1, 1) + self.lineEdit_metalab_anno_peptides_report = QtWidgets.QLineEdit(self.page_4) + self.lineEdit_metalab_anno_peptides_report.setObjectName("lineEdit_metalab_anno_peptides_report") + self.gridLayout_44.addWidget(self.lineEdit_metalab_anno_peptides_report, 0, 1, 1, 1) + self.label_metalab_anno_peptides_report = QtWidgets.QLabel(self.page_4) + self.label_metalab_anno_peptides_report.setObjectName("label_metalab_anno_peptides_report") + self.gridLayout_44.addWidget(self.label_metalab_anno_peptides_report, 0, 0, 1, 1) + self.label_metalab_anno_functions = QtWidgets.QLabel(self.page_4) + self.label_metalab_anno_functions.setObjectName("label_metalab_anno_functions") + self.gridLayout_44.addWidget(self.label_metalab_anno_functions, 2, 0, 1, 1) + self.lineEdit_metalab_anno_built_in_taxa = QtWidgets.QLineEdit(self.page_4) + self.lineEdit_metalab_anno_built_in_taxa.setObjectName("lineEdit_metalab_anno_built_in_taxa") + self.gridLayout_44.addWidget(self.lineEdit_metalab_anno_built_in_taxa, 1, 1, 1, 1) + self.pushButton_open_metalab_anno_built_in_taxa = QtWidgets.QPushButton(self.page_4) + self.pushButton_open_metalab_anno_built_in_taxa.setObjectName("pushButton_open_metalab_anno_built_in_taxa") + self.gridLayout_44.addWidget(self.pushButton_open_metalab_anno_built_in_taxa, 1, 2, 1, 1) + self.pushButton_open_metalab_anno_peptides_report = QtWidgets.QPushButton(self.page_4) + self.pushButton_open_metalab_anno_peptides_report.setObjectName("pushButton_open_metalab_anno_peptides_report") + self.gridLayout_44.addWidget(self.pushButton_open_metalab_anno_peptides_report, 0, 2, 1, 1) + self.label_metalab_anno_otf_save_path = QtWidgets.QLabel(self.page_4) + self.label_metalab_anno_otf_save_path.setObjectName("label_metalab_anno_otf_save_path") + self.gridLayout_44.addWidget(self.label_metalab_anno_otf_save_path, 3, 0, 1, 1) + self.lineEdit_metalab_anno_otf_save_path = QtWidgets.QLineEdit(self.page_4) + self.lineEdit_metalab_anno_otf_save_path.setObjectName("lineEdit_metalab_anno_otf_save_path") + self.gridLayout_44.addWidget(self.lineEdit_metalab_anno_otf_save_path, 3, 1, 1, 1) + self.pushButton_open_metalab_anno_otf_save_path = QtWidgets.QPushButton(self.page_4) + self.pushButton_open_metalab_anno_otf_save_path.setObjectName("pushButton_open_metalab_anno_otf_save_path") + self.gridLayout_44.addWidget(self.pushButton_open_metalab_anno_otf_save_path, 3, 2, 1, 1) + self.toolBox_metalab_res_anno.addItem(self.page_4, "") + self.gridLayout_43.addWidget(self.toolBox_metalab_res_anno, 0, 0, 1, 3) + self.tabWidget_6.addTab(self.tab_18, "") + self.gridLayout_3.addWidget(self.tabWidget_6, 1, 0, 1, 1) self.label_47 = QtWidgets.QLabel(self.widget_Peptide2taxafunc) sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed) sizePolicy.setHorizontalStretch(0) @@ -3836,7 +3915,7 @@ def setupUi(self, metaX_main): self.label_47.setFont(font) self.label_47.setAlignment(QtCore.Qt.AlignCenter) self.label_47.setObjectName("label_47") - self.gridLayout_3.addWidget(self.label_47, 0, 0, 1, 4) + self.gridLayout_3.addWidget(self.label_47, 0, 0, 1, 1) self.gridLayout_21.addWidget(self.widget_Peptide2taxafunc, 0, 0, 1, 1) self.stackedWidget.addWidget(self.page_pep_to_taxafunc) self.page_dbbuilder = QtWidgets.QWidget() @@ -4082,7 +4161,7 @@ def setupUi(self, metaX_main): self.statusbar.setObjectName("statusbar") metaX_main.setStatusBar(self.statusbar) self.menuBar = QtWidgets.QMenuBar(metaX_main) - self.menuBar.setGeometry(QtCore.QRect(0, 0, 1021, 21)) + self.menuBar.setGeometry(QtCore.QRect(0, 0, 1021, 23)) self.menuBar.setObjectName("menuBar") self.menuTools = QtWidgets.QMenu(self.menuBar) self.menuTools.setObjectName("menuTools") @@ -4139,12 +4218,14 @@ def setupUi(self, metaX_main): self.retranslateUi(metaX_main) self.stackedWidget.setCurrentIndex(0) - self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(3) + self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(0) self.toolBox_2.setCurrentIndex(0) - self.tabWidget_4.setCurrentIndex(1) + self.tabWidget_4.setCurrentIndex(0) self.tabWidget_3.setCurrentIndex(0) self.tabWidget.setCurrentIndex(0) self.tabWidget_2.setCurrentIndex(0) + self.tabWidget_6.setCurrentIndex(1) + self.toolBox_metalab_res_anno.setCurrentIndex(0) self.tabWidget_5.setCurrentIndex(0) self.checkBox_co_expression_in_condition.clicked['bool'].connect(self.comboBox_co_expression_condition_meta.setEnabled) # type: ignore self.checkBox_co_expression_in_condition.clicked['bool'].connect(self.comboBox_co_expression_condition_group.setEnabled) # type: ignore @@ -4172,14 +4253,6 @@ def setupUi(self, metaX_main): self.checkBox_pca_if_show_lable.clicked['bool'].connect(self.checkBox_sunburst_show_all_lables.setEnabled) # type: ignore self.checkBox_pca_if_show_lable.clicked['bool'].connect(self.doubleSpinBox_basic_pca_label_font_transparency.setEnabled) # type: ignore QtCore.QMetaObject.connectSlotsByName(metaX_main) - metaX_main.setTabOrder(self.toolButton__final_peptide_help, self.lineEdit_final_peptide_path) - metaX_main.setTabOrder(self.lineEdit_final_peptide_path, self.pushButton_get_final_peptide_path) - metaX_main.setTabOrder(self.pushButton_get_final_peptide_path, self.lineEdit_peptide2taxafunc_outpath) - metaX_main.setTabOrder(self.lineEdit_peptide2taxafunc_outpath, self.pushButton_get_taxafunc_save_path) - metaX_main.setTabOrder(self.pushButton_get_taxafunc_save_path, self.toolButton_lca_threshould_help) - metaX_main.setTabOrder(self.toolButton_lca_threshould_help, self.doubleSpinBox_LCA_threshold) - metaX_main.setTabOrder(self.doubleSpinBox_LCA_threshold, self.pushButton_run_peptide2taxafunc) - metaX_main.setTabOrder(self.pushButton_run_peptide2taxafunc, self.comboBox_taxa_level_to_stast) metaX_main.setTabOrder(self.comboBox_taxa_level_to_stast, self.toolButton_meta_table_help) metaX_main.setTabOrder(self.toolButton_meta_table_help, self.comboBox_function_to_stast) metaX_main.setTabOrder(self.comboBox_function_to_stast, self.pushButton_get_meta_path) @@ -4188,9 +4261,6 @@ def setupUi(self, metaX_main): metaX_main.setTabOrder(self.pushButton_get_taxafunc_path, self.toolButton_taxafunc_table_help) metaX_main.setTabOrder(self.toolButton_taxafunc_table_help, self.listWidget_table_list) metaX_main.setTabOrder(self.listWidget_table_list, self.pushButton_view_table) - metaX_main.setTabOrder(self.pushButton_view_table, self.toolButton_db_path_help) - metaX_main.setTabOrder(self.toolButton_db_path_help, self.lineEdit_db_path) - metaX_main.setTabOrder(self.lineEdit_db_path, self.pushButton_get_db_path) def retranslateUi(self, metaX_main): _translate = QtCore.QCoreApplication.translate @@ -4738,17 +4808,38 @@ def retranslateUi(self, metaX_main): self.tabWidget_TaxaFuncAnalyzer.setTabText(self.tabWidget_TaxaFuncAnalyzer.indexOf(self.tab_others_stats), _translate("metaX_main", "Taxa-Func Link")) self.pushButton_view_table.setText(_translate("metaX_main", "View Table")) self.tabWidget_TaxaFuncAnalyzer.setTabText(self.tabWidget_TaxaFuncAnalyzer.indexOf(self.tab_table_review), _translate("metaX_main", "Table Review")) - self.label_7.setText(_translate("metaX_main", "Output Save Path")) - self.pushButton_run_peptide2taxafunc.setText(_translate("metaX_main", "GO")) - self.toolButton_lca_threshould_help.setText(_translate("metaX_main", "?")) - self.toolButton__final_peptide_help.setText(_translate("metaX_main", "?")) - self.label_6.setText(_translate("metaX_main", "Peptide Table")) + self.toolButton_db_path_help.setText(_translate("metaX_main", "?")) self.label_5.setText(_translate("metaX_main", "Database")) self.label_8.setText(_translate("metaX_main", "LCA Threshold")) - self.pushButton_get_final_peptide_path.setText(_translate("metaX_main", "Open")) self.pushButton_get_db_path.setText(_translate("metaX_main", "Open")) - self.toolButton_db_path_help.setText(_translate("metaX_main", "?")) + self.toolButton_lca_threshould_help.setText(_translate("metaX_main", "?")) + self.label_6.setText(_translate("metaX_main", "Peptide Table")) + self.pushButton_get_final_peptide_path.setText(_translate("metaX_main", "Open")) + self.toolButton__final_peptide_help.setText(_translate("metaX_main", "?")) self.pushButton_get_taxafunc_save_path.setText(_translate("metaX_main", "Open")) + self.label_7.setText(_translate("metaX_main", "OTFs Save To")) + self.pushButton_run_peptide2taxafunc.setText(_translate("metaX_main", "GO")) + self.tabWidget_6.setTabText(self.tabWidget_6.indexOf(self.tab_17), _translate("metaX_main", "MAG")) + self.pushButton_run_metalab_maxq_annotate.setText(_translate("metaX_main", "GO")) + self.pushButton_open_metalab_res_folder.setText(_translate("metaX_main", "Open")) + self.label_161.setToolTip(_translate("metaX_main", "MetaLab Resul Folder Wich contain \"maxquant_search\" filder")) + self.label_161.setText(_translate("metaX_main", "MetaLab 2.3 Result Folder")) + self.toolButton_metalab_res_folder_help.setText(_translate("metaX_main", "?")) + self.toolBox_metalab_res_anno.setItemText(self.toolBox_metalab_res_anno.indexOf(self.page_3), _translate("metaX_main", "Set Rsults Folder")) + self.label_metalab_anno_built_in_taxa.setToolTip(_translate("metaX_main", "In the maxquant_search/taxonomy_analysis/")) + self.label_metalab_anno_built_in_taxa.setText(_translate("metaX_main", "BuiltIn.pepTaxa.csv")) + self.pushButton_open_metalab_anno_functions.setText(_translate("metaX_main", "Open")) + self.label_metalab_anno_peptides_report.setToolTip(_translate("metaX_main", "In the maxquant_search/combined/txt/")) + self.label_metalab_anno_peptides_report.setText(_translate("metaX_main", "peptides_report.txt")) + self.label_metalab_anno_functions.setToolTip(_translate("metaX_main", "In the maxquant_search/functional_annotation/")) + self.label_metalab_anno_functions.setText(_translate("metaX_main", "functions.tsv")) + self.pushButton_open_metalab_anno_built_in_taxa.setText(_translate("metaX_main", "Open")) + self.pushButton_open_metalab_anno_peptides_report.setText(_translate("metaX_main", "Open")) + self.label_metalab_anno_otf_save_path.setToolTip(_translate("metaX_main", "Path to Save Output")) + self.label_metalab_anno_otf_save_path.setText(_translate("metaX_main", "OTFs Save To")) + self.pushButton_open_metalab_anno_otf_save_path.setText(_translate("metaX_main", "Open")) + self.toolBox_metalab_res_anno.setItemText(self.toolBox_metalab_res_anno.indexOf(self.page_4), _translate("metaX_main", "Set Path")) + self.tabWidget_6.setTabText(self.tabWidget_6.indexOf(self.tab_18), _translate("metaX_main", "MetaLab v2.3")) self.label_47.setText(_translate("metaX_main", "Peptide Annotator")) self.label_48.setText(_translate("metaX_main", "Database Builder")) self.label.setText(_translate("metaX_main", "MGnify Database Type")) diff --git a/utils/metalab2otf.py b/utils/metalab2otf.py new file mode 100644 index 0000000..2b9c04e --- /dev/null +++ b/utils/metalab2otf.py @@ -0,0 +1,248 @@ +# This script is used to convert the MetaLab 2.3 results to OTF table. +# input: +# peptide_file: maxquant_search/combined/txt/peptides_report.txt -> for the pep2pro_dict +# pepTaxa_file: maxquant_search/taxonomy_analysis/BuiltIn.pepTaxa.csv -> for the peptide taxonomy and intensity +# functions_file: maxquant_search/functional_annotation/functions.tsv +# output: +# - OTF.tsv + +import pandas as pd +from tqdm import tqdm +from typing import Optional, Dict, List +from collections import Counter +import os + +class MetaLab2OTF: + def __init__(self, peptide_file, pepTaxa_file, functions_file, save_path: Optional[str] = None): + self.peptide_file = peptide_file + self.pepTaxa_file = pepTaxa_file + self.functions_file = functions_file + self.save_path = save_path + self.check_files() + + self.pep2pro_dict: Dict[str, List[str]] = {} # AAAAAPEAPVCIGR: ['HT14A_GL0083014', 'V1.CD54-0_GL0054240'] + self.pepTaxa_df: Optional[pd.DataFrame] = None # peptide taxonomy dataframe + self.df_anno: Optional[pd.DataFrame] = None # protein annotation dataframe, index is the protein name, each column is a function + self.func_list: List[str] = [] + self.anno_protein_list: List[str] = [] + + + + def check_files(self): + files = [self.peptide_file, self.pepTaxa_file, self.functions_file] + for file in files: + if not os.path.isfile(file): + raise FileNotFoundError(f'{file} is not found!') + print('All files are found!') + + # check the save_path parent directory exists + if self.save_path: + save_dir = os.path.dirname(self.save_path) + if not os.path.isdir(save_dir): + # create the directory if it does not exist + os.makedirs(save_dir) + print(f'Created the directory: {save_dir}') + + + def create_pep2pro_dict(self): + print('Creating the peptide to proteins dictionary...') + df = pd.read_csv(self.peptide_file, sep='\t') + + # # Split the proteins by ';' + df['Proteins'] = df['Proteins'].str.split(';') + + # set the index to the peptide sequence and convert the dataframe to a dictionary + self.pep2pro_dict = df.set_index('Sequence')['Proteins'].to_dict() + + print(f'Total number of peptides: {len(self.pep2pro_dict)}') + + + + # Format taxonomy column + def format_taxonomy(self, row): + # use row.get('column_name', '') to return '' if the column is not found or empty + taxon = f"d__{row['Superkingdom']}|p__{row['Phylum']}|c__{row['Class']}|o__{row['Order']}|f__{row['Family']}|g__{row['Genus']}|s__{row['Species']}" + taxon = taxon.replace('nan', '') + return taxon + + def create_pepTaxa_df(self): + ''' + Process a CSV file containing peptide taxonomy data and transform it into a + simplified DataFrame with essential `taxonomy`, `rank information` and `intensity values`. + + input: + + | Peptide id | Sequence | LCA | Rank | Superkingdom | Kingdom | Phylum | Class | Order | Family | Genus | Species |LFQ intensity F1| + |------------|--------------------|-----------------|---------|--------------|---------|--------------|-------------|--------------|----------------|------------|------------------|----------------| + | 1 | AAAAAKDVIELAK | Bacteroides | Genus | Bacteria | | Bacteroidetes| Bacteroidia | Bacteroidales| Bacteroidaceae | Bacteroides| |100 | + | 2 | AAAAAPEAPVCIGR | Blautia sp. YL58| Species | Bacteria | | Firmicutes | Clostridia | Eubacteriales| Lachnospiraceae| Blautia | Blautia sp. YL58 |0 | + | 3 | AAAAAQHHLYGTTSGK | Bacteroides | Genus | Bacteria | | Bacteroidetes| Bacteroidia | Bacteroidales| Bacteroidaceae | Bacteroides| |200 | + + return: + + taxa_df: + | Sequence | LCA_level | Taxon | Taxon_prop |Intensity F1| ... + |---------------------|-----------|-------------------------------------------------------|------------|------------|---- + | 0 AAAAAKDVIELAK | genus | d__Bacteria|p__Bacteroidetes|c__Bacteroidia|o_... | 1 |100 | ... + | 1 AAAAAPEAPVCIGR | species | d__Bacteria|p__Firmicutes|c__Clostridia|o__Eub... | 1 |0 | ... + | 2 AAAAAQHHLYGTTSGK | genus | d__Bacteria|p__Bacteroidetes|c__Bacteroidia|o_... | 1 |200 | ... + + ''' + print('Reading the peptide taxonomy file...') + + pepTaxa_df = pd.read_csv(self.pepTaxa_file) + print(f'The number of peptides: {len(pepTaxa_df)}') + + + # create a df with only taxonomy information + rank_df = pepTaxa_df[['Rank']].value_counts().reset_index() + rank_df.columns = ['Rank', 'Count'] + rank_df['Percentage'] = rank_df['Count'] / rank_df['Count'].sum() * 100 + print(rank_df) # print the rank distribution + + extract_list = ['Sequence', 'Rank', 'Superkingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'] + samples_list = [col for col in pepTaxa_df.columns if "LFQ intensity" in col] + extract_list += samples_list + + df_pep_taxa = pepTaxa_df[extract_list] + + # Process each row to format the taxonomy information + tqdm.pandas(desc="Formatting taxonomy") + df_pep_taxa = df_pep_taxa.copy() # use copy to avoid SettingWithCopyWarning + + df_pep_taxa.loc[:, 'Taxon'] = df_pep_taxa.progress_apply(self.format_taxonomy, axis=1) + df_pep_taxa.loc[:, 'LCA_level'] = df_pep_taxa['Rank'].apply(lambda x: x.lower().replace('superkingdom', 'domain')) + df_pep_taxa.drop(columns=['Rank', 'Superkingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'], inplace=True) + df_pep_taxa.loc[:, 'Taxon_prop'] = 1 + + # move samples to the end + df_pep_taxa = df_pep_taxa[['Sequence', 'LCA_level', 'Taxon', 'Taxon_prop'] + samples_list] + df_pep_taxa.columns = [col.replace("LFQ intensity", "Intensity") for col in df_pep_taxa.columns] + + self.pepTaxa_df = df_pep_taxa + print('Peptide taxonomy dataframe is created!') + + + def create_df_anno(self): + ''' + Read the functional annotation file and return a DataFrame with the protein name as the index, each column is a function. + ''' + print('Reading the functions file...') + df_anno = pd.read_csv(self.functions_file, sep='\t') + df_anno.set_index('Name', inplace=True) # "Name" is the protein name + + df_anno.columns + + extract_list = [ + 'Preferred name', 'Gene_Ontology_id', 'Gene_Ontology_name', + 'Gene_Ontology_namespace', 'EC_id', 'EC_de', + 'EC_an', 'EC_ca', 'KEGG_ko', 'KEGG_Pathway_Entry', 'KEGG_Pathway_Name', + 'KEGG_Module', 'KEGG_Reaction', 'KEGG_rclass', 'BRITE', 'KEGG_TC', + 'CAZy', 'BiGG_Reaction', 'COG accession', 'COG category', 'COG name', + 'NOG accession', 'NOG category', 'NOG name' + ] + + # check if the columns are in the dataframe + available_columns = [col for col in extract_list if col in df_anno.columns] + self.func_list = available_columns + + # print(f'Etracting the following columns: {available_columns}') + df_anno = df_anno[available_columns] + print(f'The number of proteins: {len(df_anno)}') + self.df_anno = df_anno + self.anno_protein_list = df_anno.index.tolist() + + + def get_func_dict(self, protein_list): + df_anno = self.df_anno + funcs = self.func_list # save as local variable to avoid multiple lookups in the loop + + func_dict = {func: [] for func in funcs} + + for protein in protein_list: + if protein in df_anno.index: + for func in funcs: + func_query_result = df_anno.at[protein, func] + func_dict[func].append(func_query_result if pd.notnull(func_query_result) else '-') + else: + for func in funcs: + func_dict[func].append('-') + + return func_dict + + # find the most common annotation and its percentage + def stats_fun(self, func_dict): + ''' + input: + re_dict: {'Preferred name': ['tccB'], 'Gene_Ontology_id': [nan], ...} + return: + {'Preferred name': ('tccB', 1.0), 'Gene_Ontology_id': (nan, 1.0), ...} + ''' + stats = {} + for func_type, anno_list in func_dict.items(): + count = Counter(anno_list) + most_common, count_most_common = count.most_common(1)[0] + stats[func_type] = (most_common, count_most_common / len(anno_list)) + return stats + + + + def get_func_res_dict_from_pep(self, pep_seq: str): + protein_list = self.pep2pro_dict.get(pep_seq, []) + func_dict = self.get_func_dict(protein_list) + function_results = self.stats_fun(func_dict) + + re_out = {'Proteins': ";".join(protein_list)} + for function, (result, proportion) in function_results.items(): + re_out[function] = result + re_out[f'{function}_prop'] = proportion + + return re_out + + + + def run_pep2taxafunc(self) -> pd.DataFrame: + def anno_func_by_row(row): + peptide = row.Sequence + func_dict = self.get_func_res_dict_from_pep(peptide) + return pd.Series(func_dict) + + print('Processing peptides to taxonomy and functional annotation...') + df_pep_taxa = self.pepTaxa_df.copy() + # df_pep_taxa = df_pep_taxa.head(2000) + + tqdm.pandas(desc="Processing peptides") + df_func_re = df_pep_taxa.progress_apply(anno_func_by_row, axis=1, result_type='expand') + + print('Merging the final dataframe...') + # replace the space with underscore + df_func_re.columns = df_func_re.columns.str.replace(' ', '_') + # fill the NaN values with '-' + df_func_re.fillna('-', inplace=True) + + # merge the peptide intensity dataframe with the functional annotation dataframe + + df_re = pd.concat([df_pep_taxa, df_func_re], axis=1) + + return df_re + + def main(self, save_path: Optional[str] = None): + self.create_pep2pro_dict() + self.create_pepTaxa_df() + self.create_df_anno() + df_re = self.run_pep2taxafunc() + save_path = save_path if save_path else self.save_path # if save_path is not provided, use the default save_path + if save_path: + df_re.to_csv(save_path, sep='\t', index=False) + print(f'OTFs table is saved to: {save_path}') + return df_re + +if __name__ == '__main__': + path = "./Maxquant_workflow" + pepTaxa_file = f"{path}/maxquant_search/taxonomy_analysis/BuiltIn.pepTaxa.csv" + peptide_file = f"{path}/maxquant_search/combined/txt/peptides_report.txt" + functions_file = f"{path}/maxquant_search/functional_annotation/functions.tsv" + save_path = f"{path}/OTF.tsv" + + m2o = MetaLab2OTF(peptide_file, pepTaxa_file, functions_file, save_path) + m2o.main() \ No newline at end of file diff --git a/utils/version.py b/utils/version.py index 1ba3211..16dd769 100644 --- a/utils/version.py +++ b/utils/version.py @@ -1,2 +1,2 @@ -__version__ = '1.106.1' +__version__ = '1.107.0' API_version = '1' \ No newline at end of file