diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md
index 3492142..aa6db50 100644
--- a/Docs/ChangeLog.md
+++ b/Docs/ChangeLog.md
@@ -1,3 +1,8 @@
+# Version: 1.107.0
+## Date: 2024-06-15
+### Changes:
+- New: MetaX now supports make the OTFs Table from the MetaLab v2.3 MaxQuant output file.
+
# Version: 1.106.1
## Date: 2024-06-09
### Changes:
diff --git a/README.md b/README.md
index b20aea9..2e96299 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,8 @@ MetaX also features statistical modules and plotting tools for ana
![abstract](./Docs/MetaX_Cookbook.assets/abstract.png)
+## Taxa-Functions Linkage
+
## Download
### `Desktop Version(Recommended)`:
diff --git a/utils/GUI.py b/utils/GUI.py
index 98bf43d..23dc0ff 100644
--- a/utils/GUI.py
+++ b/utils/GUI.py
@@ -219,7 +219,10 @@ def __init__(self, MainWindow):
self.lineEdit_db_path = self.make_line_edit_drag_drop(self.lineEdit_db_path, 'file')
self.lineEdit_final_peptide_path = self.make_line_edit_drag_drop(self.lineEdit_final_peptide_path, 'file')
self.lineEdit_peptide2taxafunc_outpath = self.make_line_edit_drag_drop(self.lineEdit_peptide2taxafunc_outpath, 'folder', 'OTF.tsv')
-
+ self.lineEdit_metalab_anno_peptides_report = self.make_line_edit_drag_drop(self.lineEdit_metalab_anno_peptides_report, 'file')
+ self.lineEdit_metalab_anno_built_in_taxa = self.make_line_edit_drag_drop(self.lineEdit_metalab_anno_built_in_taxa, 'file')
+ self.lineEdit_metalab_anno_functions = self.make_line_edit_drag_drop(self.lineEdit_metalab_anno_functions, 'file')
+ self.lineEdit_metalab_anno_otf_save_path = self.make_line_edit_drag_drop(self.lineEdit_metalab_anno_otf_save_path, 'folder', 'OTF.tsv')
# set ComboBox eanble searchable
self.make_related_comboboxes_searchable()
@@ -233,11 +236,18 @@ def __init__(self, MainWindow):
# set button click event
- # peptide2taxafunc
+ # peptideAnnotator MAG
self.pushButton_get_db_path.clicked.connect(self.set_lineEdit_db_path)
self.pushButton_get_final_peptide_path.clicked.connect(self.set_lineEdit_final_peptide_path)
self.pushButton_get_taxafunc_save_path.clicked.connect(self.set_lineEdit_peptide2taxafunc_outpath)
self.pushButton_run_peptide2taxafunc.clicked.connect(self.run_peptide2taxafunc)
+ # peptideAnnotator MetaLab2.3
+ self.pushButton_open_metalab_res_folder.clicked.connect(self.set_lineEdit_metalab_res_folder)
+ self.pushButton_open_metalab_anno_peptides_report.clicked.connect(self.set_lineEdit_metalab_anno_peptides_report_path)
+ self.pushButton_open_metalab_anno_built_in_taxa.clicked.connect(self.set_lineEdit_metalab_anno_built_in_taxa_path)
+ self.pushButton_open_metalab_anno_functions.clicked.connect(self.set_lineEdit_metalab_anno_functions_path)
+ self.pushButton_open_metalab_anno_otf_save_path.clicked.connect(self.set_lineEdit_metalab_anno_otf_save_path)
+ self.pushButton_run_metalab_maxq_annotate.clicked.connect(self.run_metalab_maxq_annotate)
## help button click event
self.toolButton_db_path_help.clicked.connect(self.show_toolButton_db_path_help)
@@ -247,7 +257,7 @@ def __init__(self, MainWindow):
self.pushButton_func_threshold_help.clicked.connect(self.show_func_threshold_help)
self.toolButton_db_update_built_in_help.clicked.connect(self.show_toolButton_db_update_built_in_help)
self.toolButton_db_update_table_help.clicked.connect(self.show_toolButton_db_update_table_help)
-
+ self.toolButton_metalab_res_folder_help.clicked.connect(self.show_toolButton_metalab_res_folder_help)
@@ -1431,7 +1441,7 @@ def show_message(self,message,title='Information'):
QApplication.processEvents()
-
+ ## peptideAnnotator MAG tab
def set_lineEdit_db_path(self):
db_path = QFileDialog.getOpenFileName(self.MainWindow, 'Select Database', self.last_path, 'sqlite3 (*.db)')[0]
self.last_path = os.path.dirname(db_path)
@@ -1448,7 +1458,50 @@ def set_lineEdit_peptide2taxafunc_outpath(self):
peptide2taxafunc_outpath = QFileDialog.getSaveFileName(self.MainWindow, 'Save Operational Taxa-Functions (OTF) Table', os.path.join(self.last_path, 'OTF.tsv'), 'tsv (*.tsv)')[0]
self.last_path = os.path.dirname(peptide2taxafunc_outpath)
self.lineEdit_peptide2taxafunc_outpath.setText(peptide2taxafunc_outpath)
+ ## peptideAnnotator MAG tab end
+
+ ## peptideAnnotator MetaLab2.3 tab
+ def set_lineEdit_metalab_res_folder(self):
+ metalab_res_folder = QFileDialog.getExistingDirectory(self.MainWindow, 'Select MetaLab Result Folder', self.last_path)
+ self.last_path = metalab_res_folder
+ # check if the folder contains MetaLab result files
+ peptide_file = os.path.join(metalab_res_folder, 'maxquant_search/combined/txt/peptides_report.txt')
+ pepTaxa_file = os.path.join(metalab_res_folder, 'maxquant_search/taxonomy_analysis/BuiltIn.pepTaxa.csv')
+ functions_file = os.path.join(metalab_res_folder, 'maxquant_search/functional_annotation/functions.tsv')
+ for file in [peptide_file, pepTaxa_file, functions_file]:
+ if not os.path.exists(file):
+ QMessageBox.warning(self.MainWindow, "Warning", f"MetaLab result folder does not contain the required file:\n{file}")
+ return
+
+ # set the path to lineEdit
+ self.lineEdit_metalab_res_folder.setText(metalab_res_folder)
+ self.lineEdit_metalab_anno_peptides_report.setText(peptide_file)
+ self.lineEdit_metalab_anno_built_in_taxa.setText(pepTaxa_file)
+ self.lineEdit_metalab_anno_functions.setText(functions_file)
+ # switch to MetaLab Annotated set path tab
+ self.toolBox_metalab_res_anno.setCurrentIndex(1)
+
+ def set_lineEdit_metalab_anno_peptides_report_path(self):
+ metalab_anno_peptides_report_path = QFileDialog.getOpenFileName(self.MainWindow, 'Select MetaLab Annotated Peptides Report', self.last_path, 'txt (*.txt);;All Files (*)')[0]
+ self.last_path = os.path.dirname(metalab_anno_peptides_report_path)
+ self.lineEdit_metalab_anno_peptides_report.setText(metalab_anno_peptides_report_path)
+
+ def set_lineEdit_metalab_anno_built_in_taxa_path(self):
+ metalab_anno_built_in_taxa_path = QFileDialog.getOpenFileName(self.MainWindow, 'Select MetaLab Annotated Built-in Taxa', self.last_path, 'CSV Files (*.csv);;All Files (*)')[0]
+ self.lineEdit_metalab_anno_built_in_taxa.setText(metalab_anno_built_in_taxa_path)
+ self.last_path = os.path.dirname(metalab_anno_built_in_taxa_path)
+ def set_lineEdit_metalab_anno_functions_path(self):
+ metalab_anno_functions_path = QFileDialog.getOpenFileName(self.MainWindow, 'Select MetaLab Annotated Functions', self.last_path, 'TSV Files (*.tsv);;All Files (*)')[0]
+ self.lineEdit_metalab_anno_functions.setText(metalab_anno_functions_path)
+ self.last_path = os.path.dirname(metalab_anno_functions_path)
+
+ def set_lineEdit_metalab_anno_otf_save_path(self):
+ metalab_anno_otf_save_path = QFileDialog.getSaveFileName(self.MainWindow, 'Save MetaLab Annotated OTF Table', os.path.join(self.last_path, 'OTF.tsv'), 'tsv (*.tsv)')[0]
+ self.last_path = os.path.dirname(metalab_anno_otf_save_path)
+ self.lineEdit_metalab_anno_otf_save_path.setText(metalab_anno_otf_save_path)
+
+ ## peptideAnnotator MetaLab2.3 tab end
def load_example_for_analyzer(self):
current_path = os.path.dirname(os.path.abspath(__file__))
@@ -1649,6 +1702,7 @@ def run_after_set_multi_tables(self):
print("\n---------------------------------- Set Multi Table End ----------------------------------\n")
# go to basic analysis tab and the first tab
+ self.stackedWidget.setCurrentIndex(0) # go to page_analyzer
self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(3)
self.tabWidget_4.setCurrentIndex(0)
self.pushButton_set_multi_table.setEnabled(True)
@@ -1742,7 +1796,8 @@ def run_db_updater(self):
## Database Updater
- # Peptide to TaxaFunc
+ ## Peptide Annotator
+ # MAG tab
def run_peptide2taxafunc(self):
db_path = f'''{self.lineEdit_db_path.text()}'''
final_peptide_path = f'''{self.lineEdit_final_peptide_path.text()}'''
@@ -1767,6 +1822,30 @@ def run_peptide2taxafunc(self):
except Exception as e:
self.logger.write_log(f'run_peptide2taxafunc error: {e}', 'e')
QMessageBox.warning(self.MainWindow, 'Warning', f'Error: {e}')
+ # MetaLab2.3 tab
+ def run_metalab_maxq_annotate(self):
+ pepTaxa_file = f'''{self.lineEdit_metalab_anno_peptides_report.text()}'''
+ peptide_file = f'''{self.lineEdit_metalab_anno_built_in_taxa.text()}'''
+ functions_file = f'''{self.lineEdit_metalab_anno_functions.text()}'''
+ otf_save_path = f'''{self.lineEdit_metalab_anno_otf_save_path.text()}'''
+ print(f'pepTaxa_file:\n{pepTaxa_file} \npeptide_file:\n{peptide_file} \nfunctions_file:\n{functions_file} \notf_save_path:\n{otf_save_path}')
+
+ if pepTaxa_file == '' or peptide_file == '' or functions_file == '' or otf_save_path == '':
+ QMessageBox.warning(self.MainWindow, 'Warning', 'Please set all above paths')
+ return None
+ try:
+ self.logger.write_log(f'run_metalab_maxq_annotate: pepTaxa_file:{pepTaxa_file} peptide_file:{peptide_file} functions_file:{functions_file} otf_save_path:{otf_save_path}')
+
+ from MetaX.utils.metalab2otf import MetaLab2OTF
+ def metalab_main_wrapper():
+ instance = MetaLab2OTF(pepTaxa_file, peptide_file, functions_file, otf_save_path)
+ return instance.main()
+
+ self.run_in_new_window(metalab_main_wrapper, show_msg=True)
+ except Exception as e:
+ error_message = traceback.format_exc()
+ self.logger.write_log(f'Error when run_metalab_maxq_annotate: {error_message}', 'e')
+ QMessageBox.warning(self.MainWindow, 'Error', error_message)
#### TaxaFuncAnalyzer ####
@@ -1855,6 +1934,11 @@ def show_toolButton_db_path_help(self):
msg_box.addButton(QMessageBox.Cancel)
switch_button.clicked.connect(self.swith_stack_page_dbuilder)
msg_box.exec_()
+
+ def show_toolButton_metalab_res_folder_help(self):
+ QMessageBox.information(self.MainWindow, 'MetaLab Result Folder Help', 'Select the folder of MetaLab v2.3 result.\n\n make sure it contains [maxquant_search] folder.')
+
+
def show_pushButton_preprocessing_help(self):
msg_box = QMessageBox(parent=self.MainWindow)
msg_box.setWindowTitle('Preprocessing Help')
diff --git a/utils/MetaX_GUI/MainWindow.ui b/utils/MetaX_GUI/MainWindow.ui
index ebab4ec..f300b98 100644
--- a/utils/MetaX_GUI/MainWindow.ui
+++ b/utils/MetaX_GUI/MainWindow.ui
@@ -46,7 +46,7 @@
Qt::LeftToRight
- 3
+ 0
@@ -240,7 +240,7 @@
0
0
477
- 377
+ 373
@@ -1315,7 +1315,7 @@
-
- 1
+ 0
@@ -7152,109 +7152,281 @@
-
-
-
-
-
- Output Save Path
-
-
-
- -
-
-
- GO
-
-
-
- -
-
-
- ?
-
-
-
- -
-
-
- -
-
-
- -
-
-
- 3
-
-
- 1.000000000000000
-
-
- 0.050000000000000
-
-
- 1.000000000000000
-
-
-
- -
-
-
- ?
-
-
-
- -
-
-
- Peptide Table
-
-
-
-
-
-
- Database
-
-
-
- -
-
-
- LCA Threshold
-
-
-
- -
-
-
- Open
-
-
-
- -
-
-
- -
-
-
- Open
-
-
-
- -
-
-
- ?
-
-
-
- -
-
-
- Open
+
+
+ 1
+
+
+ MAG
+
+
+
-
+
+
+ -
+
+
+ ?
+
+
+
+ -
+
+
+ -
+
+
+ Database
+
+
+
+ -
+
+
+ LCA Threshold
+
+
+
+ -
+
+
+ Open
+
+
+
+ -
+
+
+ ?
+
+
+
+ -
+
+
+ Peptide Table
+
+
+
+ -
+
+
+ Open
+
+
+
+ -
+
+
+ ?
+
+
+
+ -
+
+
+ -
+
+
+ Open
+
+
+
+ -
+
+
+ 3
+
+
+ 1.000000000000000
+
+
+ 0.050000000000000
+
+
+ 1.000000000000000
+
+
+
+ -
+
+
+ OTFs Save To
+
+
+
+ -
+
+
+ GO
+
+
+
+
+
+
+
+ MetaLab v2.3
+
+
+ -
+
+
+ GO
+
+
+
+ -
+
+
+ 0
+
+
+
+
+ 0
+ 0
+ 943
+ 332
+
+
+
+ Set Rsults Folder
+
+
+
-
+
+
+ Open
+
+
+
+ -
+
+
+ -
+
+
+ MetaLab Resul Folder Wich contain "maxquant_search" filder
+
+
+ MetaLab 2.3 Result Folder
+
+
+
+ -
+
+
+ ?
+
+
+
+
+
+
+
+
+ 0
+ 0
+ 943
+ 332
+
+
+
+ Set Path
+
+
+ -
+
+
+ In the maxquant_search/taxonomy_analysis/
+
+
+ BuiltIn.pepTaxa.csv
+
+
+
+ -
+
+
+ -
+
+
+ Open
+
+
+
+ -
+
+
+ -
+
+
+ In the maxquant_search/combined/txt/
+
+
+ peptides_report.txt
+
+
+
+ -
+
+
+ In the maxquant_search/functional_annotation/
+
+
+ functions.tsv
+
+
+
+ -
+
+
+ -
+
+
+ Open
+
+
+
+ -
+
+
+ Open
+
+
+
+ -
+
+
+ Path to Save Output
+
+
+ OTFs Save To
+
+
+
+ -
+
+
+ -
+
+
+ Open
+
+
+
+
+
+
+
+
+
- -
+
-
@@ -7754,7 +7926,7 @@
0
0
1021
- 21
+ 23
- toolButton__final_peptide_help
- lineEdit_final_peptide_path
- pushButton_get_final_peptide_path
- lineEdit_peptide2taxafunc_outpath
- pushButton_get_taxafunc_save_path
- toolButton_lca_threshould_help
- doubleSpinBox_LCA_threshold
- pushButton_run_peptide2taxafunc
comboBox_taxa_level_to_stast
toolButton_meta_table_help
comboBox_function_to_stast
@@ -7879,9 +8043,6 @@
toolButton_taxafunc_table_help
listWidget_table_list
pushButton_view_table
- toolButton_db_path_help
- lineEdit_db_path
- pushButton_get_db_path
diff --git a/utils/MetaX_GUI/Ui_MainWindow.py b/utils/MetaX_GUI/Ui_MainWindow.py
index a37b0d2..a01aa12 100644
--- a/utils/MetaX_GUI/Ui_MainWindow.py
+++ b/utils/MetaX_GUI/Ui_MainWindow.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-# Form implementation generated from reading ui file 'c:\Users\Qing\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\utils\MetaX_GUI\MainWindow.ui'
+# Form implementation generated from reading ui file 'c:\Users\max\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\utils\MetaX_GUI\MainWindow.ui'
#
# Created by: PyQt5 UI code generator 5.15.9
#
@@ -145,7 +145,7 @@ def setupUi(self, metaX_main):
self.toolBox_2.setMaximumSize(QtCore.QSize(1677, 16777215))
self.toolBox_2.setObjectName("toolBox_2")
self.page_2 = QtWidgets.QWidget()
- self.page_2.setGeometry(QtCore.QRect(0, 0, 477, 377))
+ self.page_2.setGeometry(QtCore.QRect(0, 0, 477, 373))
self.page_2.setObjectName("page_2")
self.gridLayout_27 = QtWidgets.QGridLayout(self.page_2)
self.gridLayout_27.setObjectName("gridLayout_27")
@@ -3773,55 +3773,134 @@ def setupUi(self, metaX_main):
self.widget_Peptide2taxafunc.setObjectName("widget_Peptide2taxafunc")
self.gridLayout_3 = QtWidgets.QGridLayout(self.widget_Peptide2taxafunc)
self.gridLayout_3.setObjectName("gridLayout_3")
- self.label_7 = QtWidgets.QLabel(self.widget_Peptide2taxafunc)
- self.label_7.setObjectName("label_7")
- self.gridLayout_3.addWidget(self.label_7, 3, 0, 1, 1)
- self.pushButton_run_peptide2taxafunc = QtWidgets.QPushButton(self.widget_Peptide2taxafunc)
- self.pushButton_run_peptide2taxafunc.setObjectName("pushButton_run_peptide2taxafunc")
- self.gridLayout_3.addWidget(self.pushButton_run_peptide2taxafunc, 5, 0, 1, 4)
- self.toolButton_lca_threshould_help = QtWidgets.QToolButton(self.widget_Peptide2taxafunc)
- self.toolButton_lca_threshould_help.setObjectName("toolButton_lca_threshould_help")
- self.gridLayout_3.addWidget(self.toolButton_lca_threshould_help, 4, 1, 1, 1)
- self.lineEdit_peptide2taxafunc_outpath = QtWidgets.QLineEdit(self.widget_Peptide2taxafunc)
+ self.tabWidget_6 = QtWidgets.QTabWidget(self.widget_Peptide2taxafunc)
+ self.tabWidget_6.setObjectName("tabWidget_6")
+ self.tab_17 = QtWidgets.QWidget()
+ self.tab_17.setObjectName("tab_17")
+ self.gridLayout_42 = QtWidgets.QGridLayout(self.tab_17)
+ self.gridLayout_42.setObjectName("gridLayout_42")
+ self.lineEdit_final_peptide_path = QtWidgets.QLineEdit(self.tab_17)
+ self.lineEdit_final_peptide_path.setObjectName("lineEdit_final_peptide_path")
+ self.gridLayout_42.addWidget(self.lineEdit_final_peptide_path, 1, 2, 1, 1)
+ self.toolButton_db_path_help = QtWidgets.QToolButton(self.tab_17)
+ self.toolButton_db_path_help.setObjectName("toolButton_db_path_help")
+ self.gridLayout_42.addWidget(self.toolButton_db_path_help, 0, 1, 1, 1)
+ self.lineEdit_peptide2taxafunc_outpath = QtWidgets.QLineEdit(self.tab_17)
self.lineEdit_peptide2taxafunc_outpath.setObjectName("lineEdit_peptide2taxafunc_outpath")
- self.gridLayout_3.addWidget(self.lineEdit_peptide2taxafunc_outpath, 3, 2, 1, 1)
- self.lineEdit_db_path = QtWidgets.QLineEdit(self.widget_Peptide2taxafunc)
+ self.gridLayout_42.addWidget(self.lineEdit_peptide2taxafunc_outpath, 2, 2, 1, 1)
+ self.label_5 = QtWidgets.QLabel(self.tab_17)
+ self.label_5.setObjectName("label_5")
+ self.gridLayout_42.addWidget(self.label_5, 0, 0, 1, 1)
+ self.label_8 = QtWidgets.QLabel(self.tab_17)
+ self.label_8.setObjectName("label_8")
+ self.gridLayout_42.addWidget(self.label_8, 3, 0, 1, 1)
+ self.pushButton_get_db_path = QtWidgets.QPushButton(self.tab_17)
+ self.pushButton_get_db_path.setObjectName("pushButton_get_db_path")
+ self.gridLayout_42.addWidget(self.pushButton_get_db_path, 0, 3, 1, 1)
+ self.toolButton_lca_threshould_help = QtWidgets.QToolButton(self.tab_17)
+ self.toolButton_lca_threshould_help.setObjectName("toolButton_lca_threshould_help")
+ self.gridLayout_42.addWidget(self.toolButton_lca_threshould_help, 3, 1, 1, 1)
+ self.label_6 = QtWidgets.QLabel(self.tab_17)
+ self.label_6.setObjectName("label_6")
+ self.gridLayout_42.addWidget(self.label_6, 1, 0, 1, 1)
+ self.pushButton_get_final_peptide_path = QtWidgets.QPushButton(self.tab_17)
+ self.pushButton_get_final_peptide_path.setObjectName("pushButton_get_final_peptide_path")
+ self.gridLayout_42.addWidget(self.pushButton_get_final_peptide_path, 1, 3, 1, 1)
+ self.toolButton__final_peptide_help = QtWidgets.QToolButton(self.tab_17)
+ self.toolButton__final_peptide_help.setObjectName("toolButton__final_peptide_help")
+ self.gridLayout_42.addWidget(self.toolButton__final_peptide_help, 1, 1, 1, 1)
+ self.lineEdit_db_path = QtWidgets.QLineEdit(self.tab_17)
self.lineEdit_db_path.setObjectName("lineEdit_db_path")
- self.gridLayout_3.addWidget(self.lineEdit_db_path, 1, 2, 1, 1)
- self.doubleSpinBox_LCA_threshold = QtWidgets.QDoubleSpinBox(self.widget_Peptide2taxafunc)
+ self.gridLayout_42.addWidget(self.lineEdit_db_path, 0, 2, 1, 1)
+ self.pushButton_get_taxafunc_save_path = QtWidgets.QPushButton(self.tab_17)
+ self.pushButton_get_taxafunc_save_path.setObjectName("pushButton_get_taxafunc_save_path")
+ self.gridLayout_42.addWidget(self.pushButton_get_taxafunc_save_path, 2, 3, 1, 1)
+ self.doubleSpinBox_LCA_threshold = QtWidgets.QDoubleSpinBox(self.tab_17)
self.doubleSpinBox_LCA_threshold.setDecimals(3)
self.doubleSpinBox_LCA_threshold.setMaximum(1.0)
self.doubleSpinBox_LCA_threshold.setSingleStep(0.05)
self.doubleSpinBox_LCA_threshold.setProperty("value", 1.0)
self.doubleSpinBox_LCA_threshold.setObjectName("doubleSpinBox_LCA_threshold")
- self.gridLayout_3.addWidget(self.doubleSpinBox_LCA_threshold, 4, 2, 1, 2)
- self.toolButton__final_peptide_help = QtWidgets.QToolButton(self.widget_Peptide2taxafunc)
- self.toolButton__final_peptide_help.setObjectName("toolButton__final_peptide_help")
- self.gridLayout_3.addWidget(self.toolButton__final_peptide_help, 2, 1, 1, 1)
- self.label_6 = QtWidgets.QLabel(self.widget_Peptide2taxafunc)
- self.label_6.setObjectName("label_6")
- self.gridLayout_3.addWidget(self.label_6, 2, 0, 1, 1)
- self.label_5 = QtWidgets.QLabel(self.widget_Peptide2taxafunc)
- self.label_5.setObjectName("label_5")
- self.gridLayout_3.addWidget(self.label_5, 1, 0, 1, 1)
- self.label_8 = QtWidgets.QLabel(self.widget_Peptide2taxafunc)
- self.label_8.setObjectName("label_8")
- self.gridLayout_3.addWidget(self.label_8, 4, 0, 1, 1)
- self.pushButton_get_final_peptide_path = QtWidgets.QPushButton(self.widget_Peptide2taxafunc)
- self.pushButton_get_final_peptide_path.setObjectName("pushButton_get_final_peptide_path")
- self.gridLayout_3.addWidget(self.pushButton_get_final_peptide_path, 2, 3, 1, 1)
- self.lineEdit_final_peptide_path = QtWidgets.QLineEdit(self.widget_Peptide2taxafunc)
- self.lineEdit_final_peptide_path.setObjectName("lineEdit_final_peptide_path")
- self.gridLayout_3.addWidget(self.lineEdit_final_peptide_path, 2, 2, 1, 1)
- self.pushButton_get_db_path = QtWidgets.QPushButton(self.widget_Peptide2taxafunc)
- self.pushButton_get_db_path.setObjectName("pushButton_get_db_path")
- self.gridLayout_3.addWidget(self.pushButton_get_db_path, 1, 3, 1, 1)
- self.toolButton_db_path_help = QtWidgets.QToolButton(self.widget_Peptide2taxafunc)
- self.toolButton_db_path_help.setObjectName("toolButton_db_path_help")
- self.gridLayout_3.addWidget(self.toolButton_db_path_help, 1, 1, 1, 1)
- self.pushButton_get_taxafunc_save_path = QtWidgets.QPushButton(self.widget_Peptide2taxafunc)
- self.pushButton_get_taxafunc_save_path.setObjectName("pushButton_get_taxafunc_save_path")
- self.gridLayout_3.addWidget(self.pushButton_get_taxafunc_save_path, 3, 3, 1, 1)
+ self.gridLayout_42.addWidget(self.doubleSpinBox_LCA_threshold, 3, 2, 1, 2)
+ self.label_7 = QtWidgets.QLabel(self.tab_17)
+ self.label_7.setObjectName("label_7")
+ self.gridLayout_42.addWidget(self.label_7, 2, 0, 1, 1)
+ self.pushButton_run_peptide2taxafunc = QtWidgets.QPushButton(self.tab_17)
+ self.pushButton_run_peptide2taxafunc.setObjectName("pushButton_run_peptide2taxafunc")
+ self.gridLayout_42.addWidget(self.pushButton_run_peptide2taxafunc, 4, 0, 1, 4)
+ self.tabWidget_6.addTab(self.tab_17, "")
+ self.tab_18 = QtWidgets.QWidget()
+ self.tab_18.setObjectName("tab_18")
+ self.gridLayout_43 = QtWidgets.QGridLayout(self.tab_18)
+ self.gridLayout_43.setObjectName("gridLayout_43")
+ self.pushButton_run_metalab_maxq_annotate = QtWidgets.QPushButton(self.tab_18)
+ self.pushButton_run_metalab_maxq_annotate.setObjectName("pushButton_run_metalab_maxq_annotate")
+ self.gridLayout_43.addWidget(self.pushButton_run_metalab_maxq_annotate, 2, 0, 1, 3)
+ self.toolBox_metalab_res_anno = QtWidgets.QToolBox(self.tab_18)
+ self.toolBox_metalab_res_anno.setObjectName("toolBox_metalab_res_anno")
+ self.page_3 = QtWidgets.QWidget()
+ self.page_3.setGeometry(QtCore.QRect(0, 0, 943, 332))
+ self.page_3.setObjectName("page_3")
+ self.gridLayout_45 = QtWidgets.QGridLayout(self.page_3)
+ self.gridLayout_45.setObjectName("gridLayout_45")
+ self.pushButton_open_metalab_res_folder = QtWidgets.QPushButton(self.page_3)
+ self.pushButton_open_metalab_res_folder.setObjectName("pushButton_open_metalab_res_folder")
+ self.gridLayout_45.addWidget(self.pushButton_open_metalab_res_folder, 0, 3, 1, 1)
+ self.lineEdit_metalab_res_folder = QtWidgets.QLineEdit(self.page_3)
+ self.lineEdit_metalab_res_folder.setObjectName("lineEdit_metalab_res_folder")
+ self.gridLayout_45.addWidget(self.lineEdit_metalab_res_folder, 0, 2, 1, 1)
+ self.label_161 = QtWidgets.QLabel(self.page_3)
+ self.label_161.setObjectName("label_161")
+ self.gridLayout_45.addWidget(self.label_161, 0, 0, 1, 1)
+ self.toolButton_metalab_res_folder_help = QtWidgets.QToolButton(self.page_3)
+ self.toolButton_metalab_res_folder_help.setObjectName("toolButton_metalab_res_folder_help")
+ self.gridLayout_45.addWidget(self.toolButton_metalab_res_folder_help, 0, 1, 1, 1)
+ self.toolBox_metalab_res_anno.addItem(self.page_3, "")
+ self.page_4 = QtWidgets.QWidget()
+ self.page_4.setGeometry(QtCore.QRect(0, 0, 943, 332))
+ self.page_4.setObjectName("page_4")
+ self.gridLayout_44 = QtWidgets.QGridLayout(self.page_4)
+ self.gridLayout_44.setObjectName("gridLayout_44")
+ self.label_metalab_anno_built_in_taxa = QtWidgets.QLabel(self.page_4)
+ self.label_metalab_anno_built_in_taxa.setObjectName("label_metalab_anno_built_in_taxa")
+ self.gridLayout_44.addWidget(self.label_metalab_anno_built_in_taxa, 1, 0, 1, 1)
+ self.lineEdit_metalab_anno_functions = QtWidgets.QLineEdit(self.page_4)
+ self.lineEdit_metalab_anno_functions.setObjectName("lineEdit_metalab_anno_functions")
+ self.gridLayout_44.addWidget(self.lineEdit_metalab_anno_functions, 2, 1, 1, 1)
+ self.pushButton_open_metalab_anno_functions = QtWidgets.QPushButton(self.page_4)
+ self.pushButton_open_metalab_anno_functions.setObjectName("pushButton_open_metalab_anno_functions")
+ self.gridLayout_44.addWidget(self.pushButton_open_metalab_anno_functions, 2, 2, 1, 1)
+ self.lineEdit_metalab_anno_peptides_report = QtWidgets.QLineEdit(self.page_4)
+ self.lineEdit_metalab_anno_peptides_report.setObjectName("lineEdit_metalab_anno_peptides_report")
+ self.gridLayout_44.addWidget(self.lineEdit_metalab_anno_peptides_report, 0, 1, 1, 1)
+ self.label_metalab_anno_peptides_report = QtWidgets.QLabel(self.page_4)
+ self.label_metalab_anno_peptides_report.setObjectName("label_metalab_anno_peptides_report")
+ self.gridLayout_44.addWidget(self.label_metalab_anno_peptides_report, 0, 0, 1, 1)
+ self.label_metalab_anno_functions = QtWidgets.QLabel(self.page_4)
+ self.label_metalab_anno_functions.setObjectName("label_metalab_anno_functions")
+ self.gridLayout_44.addWidget(self.label_metalab_anno_functions, 2, 0, 1, 1)
+ self.lineEdit_metalab_anno_built_in_taxa = QtWidgets.QLineEdit(self.page_4)
+ self.lineEdit_metalab_anno_built_in_taxa.setObjectName("lineEdit_metalab_anno_built_in_taxa")
+ self.gridLayout_44.addWidget(self.lineEdit_metalab_anno_built_in_taxa, 1, 1, 1, 1)
+ self.pushButton_open_metalab_anno_built_in_taxa = QtWidgets.QPushButton(self.page_4)
+ self.pushButton_open_metalab_anno_built_in_taxa.setObjectName("pushButton_open_metalab_anno_built_in_taxa")
+ self.gridLayout_44.addWidget(self.pushButton_open_metalab_anno_built_in_taxa, 1, 2, 1, 1)
+ self.pushButton_open_metalab_anno_peptides_report = QtWidgets.QPushButton(self.page_4)
+ self.pushButton_open_metalab_anno_peptides_report.setObjectName("pushButton_open_metalab_anno_peptides_report")
+ self.gridLayout_44.addWidget(self.pushButton_open_metalab_anno_peptides_report, 0, 2, 1, 1)
+ self.label_metalab_anno_otf_save_path = QtWidgets.QLabel(self.page_4)
+ self.label_metalab_anno_otf_save_path.setObjectName("label_metalab_anno_otf_save_path")
+ self.gridLayout_44.addWidget(self.label_metalab_anno_otf_save_path, 3, 0, 1, 1)
+ self.lineEdit_metalab_anno_otf_save_path = QtWidgets.QLineEdit(self.page_4)
+ self.lineEdit_metalab_anno_otf_save_path.setObjectName("lineEdit_metalab_anno_otf_save_path")
+ self.gridLayout_44.addWidget(self.lineEdit_metalab_anno_otf_save_path, 3, 1, 1, 1)
+ self.pushButton_open_metalab_anno_otf_save_path = QtWidgets.QPushButton(self.page_4)
+ self.pushButton_open_metalab_anno_otf_save_path.setObjectName("pushButton_open_metalab_anno_otf_save_path")
+ self.gridLayout_44.addWidget(self.pushButton_open_metalab_anno_otf_save_path, 3, 2, 1, 1)
+ self.toolBox_metalab_res_anno.addItem(self.page_4, "")
+ self.gridLayout_43.addWidget(self.toolBox_metalab_res_anno, 0, 0, 1, 3)
+ self.tabWidget_6.addTab(self.tab_18, "")
+ self.gridLayout_3.addWidget(self.tabWidget_6, 1, 0, 1, 1)
self.label_47 = QtWidgets.QLabel(self.widget_Peptide2taxafunc)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed)
sizePolicy.setHorizontalStretch(0)
@@ -3836,7 +3915,7 @@ def setupUi(self, metaX_main):
self.label_47.setFont(font)
self.label_47.setAlignment(QtCore.Qt.AlignCenter)
self.label_47.setObjectName("label_47")
- self.gridLayout_3.addWidget(self.label_47, 0, 0, 1, 4)
+ self.gridLayout_3.addWidget(self.label_47, 0, 0, 1, 1)
self.gridLayout_21.addWidget(self.widget_Peptide2taxafunc, 0, 0, 1, 1)
self.stackedWidget.addWidget(self.page_pep_to_taxafunc)
self.page_dbbuilder = QtWidgets.QWidget()
@@ -4082,7 +4161,7 @@ def setupUi(self, metaX_main):
self.statusbar.setObjectName("statusbar")
metaX_main.setStatusBar(self.statusbar)
self.menuBar = QtWidgets.QMenuBar(metaX_main)
- self.menuBar.setGeometry(QtCore.QRect(0, 0, 1021, 21))
+ self.menuBar.setGeometry(QtCore.QRect(0, 0, 1021, 23))
self.menuBar.setObjectName("menuBar")
self.menuTools = QtWidgets.QMenu(self.menuBar)
self.menuTools.setObjectName("menuTools")
@@ -4139,12 +4218,14 @@ def setupUi(self, metaX_main):
self.retranslateUi(metaX_main)
self.stackedWidget.setCurrentIndex(0)
- self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(3)
+ self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(0)
self.toolBox_2.setCurrentIndex(0)
- self.tabWidget_4.setCurrentIndex(1)
+ self.tabWidget_4.setCurrentIndex(0)
self.tabWidget_3.setCurrentIndex(0)
self.tabWidget.setCurrentIndex(0)
self.tabWidget_2.setCurrentIndex(0)
+ self.tabWidget_6.setCurrentIndex(1)
+ self.toolBox_metalab_res_anno.setCurrentIndex(0)
self.tabWidget_5.setCurrentIndex(0)
self.checkBox_co_expression_in_condition.clicked['bool'].connect(self.comboBox_co_expression_condition_meta.setEnabled) # type: ignore
self.checkBox_co_expression_in_condition.clicked['bool'].connect(self.comboBox_co_expression_condition_group.setEnabled) # type: ignore
@@ -4172,14 +4253,6 @@ def setupUi(self, metaX_main):
self.checkBox_pca_if_show_lable.clicked['bool'].connect(self.checkBox_sunburst_show_all_lables.setEnabled) # type: ignore
self.checkBox_pca_if_show_lable.clicked['bool'].connect(self.doubleSpinBox_basic_pca_label_font_transparency.setEnabled) # type: ignore
QtCore.QMetaObject.connectSlotsByName(metaX_main)
- metaX_main.setTabOrder(self.toolButton__final_peptide_help, self.lineEdit_final_peptide_path)
- metaX_main.setTabOrder(self.lineEdit_final_peptide_path, self.pushButton_get_final_peptide_path)
- metaX_main.setTabOrder(self.pushButton_get_final_peptide_path, self.lineEdit_peptide2taxafunc_outpath)
- metaX_main.setTabOrder(self.lineEdit_peptide2taxafunc_outpath, self.pushButton_get_taxafunc_save_path)
- metaX_main.setTabOrder(self.pushButton_get_taxafunc_save_path, self.toolButton_lca_threshould_help)
- metaX_main.setTabOrder(self.toolButton_lca_threshould_help, self.doubleSpinBox_LCA_threshold)
- metaX_main.setTabOrder(self.doubleSpinBox_LCA_threshold, self.pushButton_run_peptide2taxafunc)
- metaX_main.setTabOrder(self.pushButton_run_peptide2taxafunc, self.comboBox_taxa_level_to_stast)
metaX_main.setTabOrder(self.comboBox_taxa_level_to_stast, self.toolButton_meta_table_help)
metaX_main.setTabOrder(self.toolButton_meta_table_help, self.comboBox_function_to_stast)
metaX_main.setTabOrder(self.comboBox_function_to_stast, self.pushButton_get_meta_path)
@@ -4188,9 +4261,6 @@ def setupUi(self, metaX_main):
metaX_main.setTabOrder(self.pushButton_get_taxafunc_path, self.toolButton_taxafunc_table_help)
metaX_main.setTabOrder(self.toolButton_taxafunc_table_help, self.listWidget_table_list)
metaX_main.setTabOrder(self.listWidget_table_list, self.pushButton_view_table)
- metaX_main.setTabOrder(self.pushButton_view_table, self.toolButton_db_path_help)
- metaX_main.setTabOrder(self.toolButton_db_path_help, self.lineEdit_db_path)
- metaX_main.setTabOrder(self.lineEdit_db_path, self.pushButton_get_db_path)
def retranslateUi(self, metaX_main):
_translate = QtCore.QCoreApplication.translate
@@ -4738,17 +4808,38 @@ def retranslateUi(self, metaX_main):
self.tabWidget_TaxaFuncAnalyzer.setTabText(self.tabWidget_TaxaFuncAnalyzer.indexOf(self.tab_others_stats), _translate("metaX_main", "Taxa-Func Link"))
self.pushButton_view_table.setText(_translate("metaX_main", "View Table"))
self.tabWidget_TaxaFuncAnalyzer.setTabText(self.tabWidget_TaxaFuncAnalyzer.indexOf(self.tab_table_review), _translate("metaX_main", "Table Review"))
- self.label_7.setText(_translate("metaX_main", "Output Save Path"))
- self.pushButton_run_peptide2taxafunc.setText(_translate("metaX_main", "GO"))
- self.toolButton_lca_threshould_help.setText(_translate("metaX_main", "?"))
- self.toolButton__final_peptide_help.setText(_translate("metaX_main", "?"))
- self.label_6.setText(_translate("metaX_main", "Peptide Table"))
+ self.toolButton_db_path_help.setText(_translate("metaX_main", "?"))
self.label_5.setText(_translate("metaX_main", "Database"))
self.label_8.setText(_translate("metaX_main", "LCA Threshold"))
- self.pushButton_get_final_peptide_path.setText(_translate("metaX_main", "Open"))
self.pushButton_get_db_path.setText(_translate("metaX_main", "Open"))
- self.toolButton_db_path_help.setText(_translate("metaX_main", "?"))
+ self.toolButton_lca_threshould_help.setText(_translate("metaX_main", "?"))
+ self.label_6.setText(_translate("metaX_main", "Peptide Table"))
+ self.pushButton_get_final_peptide_path.setText(_translate("metaX_main", "Open"))
+ self.toolButton__final_peptide_help.setText(_translate("metaX_main", "?"))
self.pushButton_get_taxafunc_save_path.setText(_translate("metaX_main", "Open"))
+ self.label_7.setText(_translate("metaX_main", "OTFs Save To"))
+ self.pushButton_run_peptide2taxafunc.setText(_translate("metaX_main", "GO"))
+ self.tabWidget_6.setTabText(self.tabWidget_6.indexOf(self.tab_17), _translate("metaX_main", "MAG"))
+ self.pushButton_run_metalab_maxq_annotate.setText(_translate("metaX_main", "GO"))
+ self.pushButton_open_metalab_res_folder.setText(_translate("metaX_main", "Open"))
+ self.label_161.setToolTip(_translate("metaX_main", "MetaLab Resul Folder Wich contain \"maxquant_search\" filder"))
+ self.label_161.setText(_translate("metaX_main", "MetaLab 2.3 Result Folder"))
+ self.toolButton_metalab_res_folder_help.setText(_translate("metaX_main", "?"))
+ self.toolBox_metalab_res_anno.setItemText(self.toolBox_metalab_res_anno.indexOf(self.page_3), _translate("metaX_main", "Set Rsults Folder"))
+ self.label_metalab_anno_built_in_taxa.setToolTip(_translate("metaX_main", "In the maxquant_search/taxonomy_analysis/"))
+ self.label_metalab_anno_built_in_taxa.setText(_translate("metaX_main", "BuiltIn.pepTaxa.csv"))
+ self.pushButton_open_metalab_anno_functions.setText(_translate("metaX_main", "Open"))
+ self.label_metalab_anno_peptides_report.setToolTip(_translate("metaX_main", "In the maxquant_search/combined/txt/"))
+ self.label_metalab_anno_peptides_report.setText(_translate("metaX_main", "peptides_report.txt"))
+ self.label_metalab_anno_functions.setToolTip(_translate("metaX_main", "In the maxquant_search/functional_annotation/"))
+ self.label_metalab_anno_functions.setText(_translate("metaX_main", "functions.tsv"))
+ self.pushButton_open_metalab_anno_built_in_taxa.setText(_translate("metaX_main", "Open"))
+ self.pushButton_open_metalab_anno_peptides_report.setText(_translate("metaX_main", "Open"))
+ self.label_metalab_anno_otf_save_path.setToolTip(_translate("metaX_main", "Path to Save Output"))
+ self.label_metalab_anno_otf_save_path.setText(_translate("metaX_main", "OTFs Save To"))
+ self.pushButton_open_metalab_anno_otf_save_path.setText(_translate("metaX_main", "Open"))
+ self.toolBox_metalab_res_anno.setItemText(self.toolBox_metalab_res_anno.indexOf(self.page_4), _translate("metaX_main", "Set Path"))
+ self.tabWidget_6.setTabText(self.tabWidget_6.indexOf(self.tab_18), _translate("metaX_main", "MetaLab v2.3"))
self.label_47.setText(_translate("metaX_main", "Peptide Annotator"))
self.label_48.setText(_translate("metaX_main", "Database Builder"))
self.label.setText(_translate("metaX_main", "MGnify Database Type"))
diff --git a/utils/metalab2otf.py b/utils/metalab2otf.py
new file mode 100644
index 0000000..2b9c04e
--- /dev/null
+++ b/utils/metalab2otf.py
@@ -0,0 +1,248 @@
+# This script is used to convert the MetaLab 2.3 results to OTF table.
+# input:
+# peptide_file: maxquant_search/combined/txt/peptides_report.txt -> for the pep2pro_dict
+# pepTaxa_file: maxquant_search/taxonomy_analysis/BuiltIn.pepTaxa.csv -> for the peptide taxonomy and intensity
+# functions_file: maxquant_search/functional_annotation/functions.tsv
+# output:
+# - OTF.tsv
+
+import pandas as pd
+from tqdm import tqdm
+from typing import Optional, Dict, List
+from collections import Counter
+import os
+
+class MetaLab2OTF:
+ def __init__(self, peptide_file, pepTaxa_file, functions_file, save_path: Optional[str] = None):
+ self.peptide_file = peptide_file
+ self.pepTaxa_file = pepTaxa_file
+ self.functions_file = functions_file
+ self.save_path = save_path
+ self.check_files()
+
+ self.pep2pro_dict: Dict[str, List[str]] = {} # AAAAAPEAPVCIGR: ['HT14A_GL0083014', 'V1.CD54-0_GL0054240']
+ self.pepTaxa_df: Optional[pd.DataFrame] = None # peptide taxonomy dataframe
+ self.df_anno: Optional[pd.DataFrame] = None # protein annotation dataframe, index is the protein name, each column is a function
+ self.func_list: List[str] = []
+ self.anno_protein_list: List[str] = []
+
+
+
+ def check_files(self):
+ files = [self.peptide_file, self.pepTaxa_file, self.functions_file]
+ for file in files:
+ if not os.path.isfile(file):
+ raise FileNotFoundError(f'{file} is not found!')
+ print('All files are found!')
+
+ # check the save_path parent directory exists
+ if self.save_path:
+ save_dir = os.path.dirname(self.save_path)
+ if not os.path.isdir(save_dir):
+ # create the directory if it does not exist
+ os.makedirs(save_dir)
+ print(f'Created the directory: {save_dir}')
+
+
+ def create_pep2pro_dict(self):
+ print('Creating the peptide to proteins dictionary...')
+ df = pd.read_csv(self.peptide_file, sep='\t')
+
+ # # Split the proteins by ';'
+ df['Proteins'] = df['Proteins'].str.split(';')
+
+ # set the index to the peptide sequence and convert the dataframe to a dictionary
+ self.pep2pro_dict = df.set_index('Sequence')['Proteins'].to_dict()
+
+ print(f'Total number of peptides: {len(self.pep2pro_dict)}')
+
+
+
+ # Format taxonomy column
+ def format_taxonomy(self, row):
+ # use row.get('column_name', '') to return '' if the column is not found or empty
+ taxon = f"d__{row['Superkingdom']}|p__{row['Phylum']}|c__{row['Class']}|o__{row['Order']}|f__{row['Family']}|g__{row['Genus']}|s__{row['Species']}"
+ taxon = taxon.replace('nan', '')
+ return taxon
+
+ def create_pepTaxa_df(self):
+ '''
+ Process a CSV file containing peptide taxonomy data and transform it into a
+ simplified DataFrame with essential `taxonomy`, `rank information` and `intensity values`.
+
+ input:
+
+ | Peptide id | Sequence | LCA | Rank | Superkingdom | Kingdom | Phylum | Class | Order | Family | Genus | Species |LFQ intensity F1|
+ |------------|--------------------|-----------------|---------|--------------|---------|--------------|-------------|--------------|----------------|------------|------------------|----------------|
+ | 1 | AAAAAKDVIELAK | Bacteroides | Genus | Bacteria | | Bacteroidetes| Bacteroidia | Bacteroidales| Bacteroidaceae | Bacteroides| |100 |
+ | 2 | AAAAAPEAPVCIGR | Blautia sp. YL58| Species | Bacteria | | Firmicutes | Clostridia | Eubacteriales| Lachnospiraceae| Blautia | Blautia sp. YL58 |0 |
+ | 3 | AAAAAQHHLYGTTSGK | Bacteroides | Genus | Bacteria | | Bacteroidetes| Bacteroidia | Bacteroidales| Bacteroidaceae | Bacteroides| |200 |
+
+ return:
+
+ taxa_df:
+ | Sequence | LCA_level | Taxon | Taxon_prop |Intensity F1| ...
+ |---------------------|-----------|-------------------------------------------------------|------------|------------|----
+ | 0 AAAAAKDVIELAK | genus | d__Bacteria|p__Bacteroidetes|c__Bacteroidia|o_... | 1 |100 | ...
+ | 1 AAAAAPEAPVCIGR | species | d__Bacteria|p__Firmicutes|c__Clostridia|o__Eub... | 1 |0 | ...
+ | 2 AAAAAQHHLYGTTSGK | genus | d__Bacteria|p__Bacteroidetes|c__Bacteroidia|o_... | 1 |200 | ...
+
+ '''
+ print('Reading the peptide taxonomy file...')
+
+ pepTaxa_df = pd.read_csv(self.pepTaxa_file)
+ print(f'The number of peptides: {len(pepTaxa_df)}')
+
+
+ # create a df with only taxonomy information
+ rank_df = pepTaxa_df[['Rank']].value_counts().reset_index()
+ rank_df.columns = ['Rank', 'Count']
+ rank_df['Percentage'] = rank_df['Count'] / rank_df['Count'].sum() * 100
+ print(rank_df) # print the rank distribution
+
+ extract_list = ['Sequence', 'Rank', 'Superkingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species']
+ samples_list = [col for col in pepTaxa_df.columns if "LFQ intensity" in col]
+ extract_list += samples_list
+
+ df_pep_taxa = pepTaxa_df[extract_list]
+
+ # Process each row to format the taxonomy information
+ tqdm.pandas(desc="Formatting taxonomy")
+ df_pep_taxa = df_pep_taxa.copy() # use copy to avoid SettingWithCopyWarning
+
+ df_pep_taxa.loc[:, 'Taxon'] = df_pep_taxa.progress_apply(self.format_taxonomy, axis=1)
+ df_pep_taxa.loc[:, 'LCA_level'] = df_pep_taxa['Rank'].apply(lambda x: x.lower().replace('superkingdom', 'domain'))
+ df_pep_taxa.drop(columns=['Rank', 'Superkingdom', 'Phylum', 'Class', 'Order', 'Family', 'Genus', 'Species'], inplace=True)
+ df_pep_taxa.loc[:, 'Taxon_prop'] = 1
+
+ # move samples to the end
+ df_pep_taxa = df_pep_taxa[['Sequence', 'LCA_level', 'Taxon', 'Taxon_prop'] + samples_list]
+ df_pep_taxa.columns = [col.replace("LFQ intensity", "Intensity") for col in df_pep_taxa.columns]
+
+ self.pepTaxa_df = df_pep_taxa
+ print('Peptide taxonomy dataframe is created!')
+
+
+ def create_df_anno(self):
+ '''
+ Read the functional annotation file and return a DataFrame with the protein name as the index, each column is a function.
+ '''
+ print('Reading the functions file...')
+ df_anno = pd.read_csv(self.functions_file, sep='\t')
+ df_anno.set_index('Name', inplace=True) # "Name" is the protein name
+
+ df_anno.columns
+
+ extract_list = [
+ 'Preferred name', 'Gene_Ontology_id', 'Gene_Ontology_name',
+ 'Gene_Ontology_namespace', 'EC_id', 'EC_de',
+ 'EC_an', 'EC_ca', 'KEGG_ko', 'KEGG_Pathway_Entry', 'KEGG_Pathway_Name',
+ 'KEGG_Module', 'KEGG_Reaction', 'KEGG_rclass', 'BRITE', 'KEGG_TC',
+ 'CAZy', 'BiGG_Reaction', 'COG accession', 'COG category', 'COG name',
+ 'NOG accession', 'NOG category', 'NOG name'
+ ]
+
+ # check if the columns are in the dataframe
+ available_columns = [col for col in extract_list if col in df_anno.columns]
+ self.func_list = available_columns
+
+ # print(f'Etracting the following columns: {available_columns}')
+ df_anno = df_anno[available_columns]
+ print(f'The number of proteins: {len(df_anno)}')
+ self.df_anno = df_anno
+ self.anno_protein_list = df_anno.index.tolist()
+
+
+ def get_func_dict(self, protein_list):
+ df_anno = self.df_anno
+ funcs = self.func_list # save as local variable to avoid multiple lookups in the loop
+
+ func_dict = {func: [] for func in funcs}
+
+ for protein in protein_list:
+ if protein in df_anno.index:
+ for func in funcs:
+ func_query_result = df_anno.at[protein, func]
+ func_dict[func].append(func_query_result if pd.notnull(func_query_result) else '-')
+ else:
+ for func in funcs:
+ func_dict[func].append('-')
+
+ return func_dict
+
+ # find the most common annotation and its percentage
+ def stats_fun(self, func_dict):
+ '''
+ input:
+ re_dict: {'Preferred name': ['tccB'], 'Gene_Ontology_id': [nan], ...}
+ return:
+ {'Preferred name': ('tccB', 1.0), 'Gene_Ontology_id': (nan, 1.0), ...}
+ '''
+ stats = {}
+ for func_type, anno_list in func_dict.items():
+ count = Counter(anno_list)
+ most_common, count_most_common = count.most_common(1)[0]
+ stats[func_type] = (most_common, count_most_common / len(anno_list))
+ return stats
+
+
+
+ def get_func_res_dict_from_pep(self, pep_seq: str):
+ protein_list = self.pep2pro_dict.get(pep_seq, [])
+ func_dict = self.get_func_dict(protein_list)
+ function_results = self.stats_fun(func_dict)
+
+ re_out = {'Proteins': ";".join(protein_list)}
+ for function, (result, proportion) in function_results.items():
+ re_out[function] = result
+ re_out[f'{function}_prop'] = proportion
+
+ return re_out
+
+
+
+ def run_pep2taxafunc(self) -> pd.DataFrame:
+ def anno_func_by_row(row):
+ peptide = row.Sequence
+ func_dict = self.get_func_res_dict_from_pep(peptide)
+ return pd.Series(func_dict)
+
+ print('Processing peptides to taxonomy and functional annotation...')
+ df_pep_taxa = self.pepTaxa_df.copy()
+ # df_pep_taxa = df_pep_taxa.head(2000)
+
+ tqdm.pandas(desc="Processing peptides")
+ df_func_re = df_pep_taxa.progress_apply(anno_func_by_row, axis=1, result_type='expand')
+
+ print('Merging the final dataframe...')
+ # replace the space with underscore
+ df_func_re.columns = df_func_re.columns.str.replace(' ', '_')
+ # fill the NaN values with '-'
+ df_func_re.fillna('-', inplace=True)
+
+ # merge the peptide intensity dataframe with the functional annotation dataframe
+
+ df_re = pd.concat([df_pep_taxa, df_func_re], axis=1)
+
+ return df_re
+
+ def main(self, save_path: Optional[str] = None):
+ self.create_pep2pro_dict()
+ self.create_pepTaxa_df()
+ self.create_df_anno()
+ df_re = self.run_pep2taxafunc()
+ save_path = save_path if save_path else self.save_path # if save_path is not provided, use the default save_path
+ if save_path:
+ df_re.to_csv(save_path, sep='\t', index=False)
+ print(f'OTFs table is saved to: {save_path}')
+ return df_re
+
+if __name__ == '__main__':
+ path = "./Maxquant_workflow"
+ pepTaxa_file = f"{path}/maxquant_search/taxonomy_analysis/BuiltIn.pepTaxa.csv"
+ peptide_file = f"{path}/maxquant_search/combined/txt/peptides_report.txt"
+ functions_file = f"{path}/maxquant_search/functional_annotation/functions.tsv"
+ save_path = f"{path}/OTF.tsv"
+
+ m2o = MetaLab2OTF(peptide_file, pepTaxa_file, functions_file, save_path)
+ m2o.main()
\ No newline at end of file
diff --git a/utils/version.py b/utils/version.py
index 1ba3211..16dd769 100644
--- a/utils/version.py
+++ b/utils/version.py
@@ -1,2 +1,2 @@
-__version__ = '1.106.1'
+__version__ = '1.107.0'
API_version = '1'
\ No newline at end of file