diff --git a/.gitignore b/.gitignore index ba16070..c28577a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ .vscode/ .idea/ .trunk/ -local_tests/ +.local_tests/ *.pyc *.db .gitignore diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md index 5a239dc..521a444 100644 --- a/Docs/ChangeLog.md +++ b/Docs/ChangeLog.md @@ -1,3 +1,29 @@ +# Version: 1.118.2 +## Date: 2024-11-8 +### Changes: +- Change: changed the "Sum normalization" to "Percentages Scaling" in the data preprossing part to avoid the confusion with the normalization method. + + +# Version: 1.118.1 +## Date: 2024-11-7 +### Changes: +- New: added 'half_same_trends' optional to extract the table of each group has the same trend (all positive or all negative non-NA values) and at east 50% of the values are non-NA of the result of group-control in condition. +- Change: refined the layout of the setting window. + + +# Version: 1.118.0 +## Date: 2024-11-7 +### Changes: +- New: added a debug console to run any python code in the MetaX to modify the object (FOR DEVELOPER ONLY). +- Change: Load the some Setting from last profile when open the MetaX so that the user doesn't need to set the setting every time. +- Fix: Fixed the bug of when plot mean of heatmap while selcect the sub meta, the mean calculation was not correct. + +# Version: 1.117.2 +## Date: 2024-11-5 +### Changes: +- Change: add a column of focus list to the taxa-function network table. + + # Version: 1.117.1 ## Date: 2024-11-5 ### Changes: diff --git a/metax/gui/main_gui.py b/metax/gui/main_gui.py index bf0bdf5..cf0a19f 100644 --- a/metax/gui/main_gui.py +++ b/metax/gui/main_gui.py @@ -86,6 +86,7 @@ from metax.gui.metax_gui.extended_combo_box import ExtendedComboBox from metax.gui.metax_gui.show_plt import ExportablePlotDialog from metax.gui.metax_gui.input_window import InputWindow + from metax.gui.metax_gui.command_window import CommandWindow from metax.gui.metax_gui.user_agreement_dialog import UserAgreementDialog from metax.gui.metax_gui.settings_widget import SettingsWidget from metax.gui.metax_gui.cmap_combo_box import CmapComboBox @@ -129,6 +130,7 @@ from .metax_gui.extended_combo_box import ExtendedComboBox from .metax_gui.show_plt import ExportablePlotDialog from .metax_gui.input_window import InputWindow + from .metax_gui.command_window import CommandWindow from .metax_gui.user_agreement_dialog import UserAgreementDialog from .metax_gui.settings_widget import SettingsWidget from .metax_gui.cmap_combo_box import CmapComboBox @@ -234,6 +236,7 @@ def __init__(self, MainWindow): self.actionSave_As.setIcon(qta.icon('mdi.content-save')) self.actionExport_Log_File.setIcon(qta.icon('mdi.export')) self.actionHide_Show_Console.setIcon(qta.icon('mdi.console')) + self.actionDebug_Console.setIcon(qta.icon('fa5b.dev')) self.actionAny_Table_Mode.setIcon(qta.icon('mdi.table')) self.actionCheck_Update.setIcon(qta.icon('mdi.update')) self.actionSettings.setIcon(qta.icon('mdi.cog')) @@ -251,6 +254,7 @@ def __init__(self, MainWindow): self.actionExport_Log_File.triggered.connect(self.export_log_file) self.console_visible = False self.actionHide_Show_Console.triggered.connect(self.show_hide_console) + self.actionDebug_Console.triggered.connect(self.show_command_line_window) self.actionAny_Table_Mode.triggered.connect(self.set_any_table_mode) self.actionCheck_Update.triggered.connect(lambda: self.check_update(show_message=True, manual_check_trigger=True)) self.actionSettings.triggered.connect(self.show_settings_window) @@ -713,6 +717,14 @@ def change_event_comboBox_condition_group(comboBox, group_name): def show_settings_window(self): + def get_stat_mean_by_zero_dominant(): + if hasattr(self, 'tfa.stat_mean_by_zero_dominant'): + return self.tfa.stat_mean_by_zero_dominant + elif self.settings.contains("stat_mean_by_zero_dominant") and self.settings.value("stat_mean_by_zero_dominant", type=bool): + return True + else: + return False + if self.settings_dialog is None: self.settings_dialog = QDialog(self.MainWindow) self.settings_dialog.setWindowTitle("Settings") @@ -725,6 +737,7 @@ def show_settings_window(self): parent=self.settings_dialog, update_branch=self.update_branch, auto_check_update=self.auto_check_update, + stat_mean_by_zero_dominant = get_stat_mean_by_zero_dominant(), QSettings=self.settings, ) settings_widget.update_mode_changed.connect(self.on_update_mode_changed) @@ -741,7 +754,10 @@ def show_settings_window(self): self.settings_dialog.setLayout(layout) self.settings_dialog.show() - + + def show_command_line_window(self): + self.command_window = CommandWindow(self.MainWindow, main_gui=self) + self.command_window.show() # handle the update mode changed from settings window def on_update_mode_changed(self, mode): @@ -768,11 +784,11 @@ def on_html_theme_changed(self, theme): def on_stat_mean_by_zero_dominant_changed(self, mode): # chcek if self.tfa exists - if not hasattr(self, 'tfa'): + if not hasattr(self.tfa, 'stat_mean_by_zero_dominant'): print("Please load the data first.") return - - self.tfa.stat_mean_by_zero_dominant = mode + self.tfa.stat_mean_by_zero_dominant = mode + self.settings.setValue("stat_mean_by_zero_dominant", mode) print(f"Stat mean by zero dominant changed to: {mode}") def on_protein_infer_method_changed(self, method): @@ -1798,8 +1814,11 @@ def run_after_set_multi_tables(self): # add "protein" "Custom" to comboBoxs to plot self.add_or_remove_protein_custom_label() - + #set stat_mean_by_zero_dominant mode by QSettings + if self.settings.contains("stat_mean_by_zero_dominant"): + self.tfa.stat_mean_by_zero_dominant = self.settings.value("stat_mean_by_zero_dominant", type=bool) + # add tables to table dict if self.table_dict == {}: if self.tfa.any_df_mode: @@ -2655,7 +2674,7 @@ def set_multi_table(self, restore_taxafunc=False, saved_obj=None): "Standard Scaling (Z-Score)": "zscore", "Min-Max Scaling": "minmax", "Pareto Scaling": "pareto", - "Normalization by sum": "sum", + "Percentages Scaling": "percentage", } normalize_method = normalize_dict[normalize_method] transform_method = transform_dict[transform_method] @@ -3616,10 +3635,13 @@ def plot_basic_list(self, plot_type='heatmap'): df = dft else: df = dft.loc[self.basic_heatmap_list] - + # Done for creating the dataframe for the heatmap # try: if plot_type == 'heatmap': + df, sample_to_group_dict = self.tfa.BasicStats.get_df_by_mean_and_submeta(df = df, + sub_meta = sub_meta, + plot_mean = plot_mean) if row_cluster or (scale =='row'): df = self.delete_zero_rows(df) if col_cluster or (scale =='col'): @@ -3634,14 +3656,14 @@ def plot_basic_list(self, plot_type='heatmap'): return else: pass - + # plot heatmap self.show_message(f'Plotting {plot_type}...') HeatmapPlot(self.tfa, **self.heatmap_params_dict).plot_basic_heatmap(df=df, title=title, fig_size=(int(width), int(height)), scale=scale, row_cluster=row_cluster, col_cluster=col_cluster, cmap=cmap, rename_taxa=rename_taxa, font_size=font_size, - show_all_labels=show_all_labels, rename_sample=rename_sample, - plot_mean = plot_mean, sub_meta = sub_meta, return_type = 'fig') + show_all_labels=show_all_labels, return_type = 'fig', + sample_to_group_dict = sample_to_group_dict) elif plot_type == 'bar': @@ -5586,7 +5608,9 @@ def plot_tflink_heatmap(self, return_type = 'fig'): QMessageBox.warning(self.MainWindow, 'Warning', 'No data!, please reselect!') return None - + df, sample_to_group_dict = self.tfa.BasicStats.get_df_by_mean_and_submeta(df = df, + sub_meta = sub_meta, + plot_mean = plot_mean) if row_cluster or (scale == 'row'): df = self.delete_zero_rows(df) @@ -5598,8 +5622,7 @@ def plot_tflink_heatmap(self, return_type = 'fig'): fig_res = HeatmapPlot(self.tfa, **self.heatmap_params_dict).plot_basic_heatmap(df=df, title=title, fig_size=(int(width), int(height)), scale=scale, row_cluster=row_cluster, col_cluster=col_cluster, cmap=cmap, rename_taxa=rename_taxa, font_size=font_size, show_all_labels=show_all_labels, - rename_sample=rename_sample, sub_meta=sub_meta, - plot_mean=plot_mean, return_type = return_type + return_type = return_type, sample_to_group_dict = sample_to_group_dict ) if return_type == 'table': @@ -5632,8 +5655,11 @@ def delete_zero_columns(self, dataframe): zero_columns = dataframe.columns[(dataframe == 0).all(axis=0)] if not zero_columns.empty: dataframe = dataframe.drop(zero_columns, axis=1) - # add group name to zero_columns - zero_columns = [f'{i} ({self.tfa.get_group_of_a_sample(i)})' for i in zero_columns] + # show the message with group name + try: # add group name to zero_columns if possible + zero_columns = [f'{i} ({self.tfa.get_group_of_a_sample(i)})' for i in zero_columns] + except Exception: + print('The column name is not a sample name, Skip adding group name to the column name!') col_str = '\n'.join(zero_columns) if len(zero_columns) > 10: # use InputWindow to show the deleted rows diff --git a/metax/gui/metax_gui/command_window.py b/metax/gui/metax_gui/command_window.py new file mode 100644 index 0000000..a114c3b --- /dev/null +++ b/metax/gui/metax_gui/command_window.py @@ -0,0 +1,105 @@ +import sys +import io +from contextlib import redirect_stdout +from PyQt5.QtWidgets import QApplication, QMainWindow, QTextEdit, QVBoxLayout, QWidget, QPushButton +from PyQt5.QtCore import Qt, QEvent +from PyQt5.QtGui import QTextCursor + +class PlainTextEditor(QTextEdit): + def insertFromMimeData(self, source): + if source.hasText(): + self.insertPlainText(source.text()) + +class OutputRedirector(io.StringIO): + def __init__(self, output_widget): + super().__init__() + self.output_widget = output_widget + + def write(self, string): + super().write(string) + self.output_widget.append(string) # Append output to QTextEdit + + def flush(self): + pass + +class CommandWindow(QMainWindow): + def __init__(self, parent=None, main_gui=None): + super(CommandWindow, self).__init__(parent) + self.main_gui = main_gui # Ensure main_gui is properly handled if None + self.initUI() + self.local_context = {'metax': main_gui} if main_gui else {} + self.history = [] + self.history_index = 0 + + def initUI(self): + self.setWindowTitle('Debug Console') + self.resize(900, 600) + self.central_widget = QWidget() + self.setCentralWidget(self.central_widget) + layout = QVBoxLayout(self.central_widget) + + self.output = QTextEdit() + self.output.setReadOnly(True) + layout.addWidget(self.output) + + self.input = PlainTextEditor() + self.input.setFixedHeight(100) + layout.addWidget(self.input) + + self.sendButton = QPushButton("Send") + self.sendButton.clicked.connect(self.process_command) + layout.addWidget(self.sendButton) + + self.input.installEventFilter(self) + + def process_command(self): + command = self.input.toPlainText().strip() + if command: + self.output.append(f"> {command}") + self.input.clear() + self.history.append(command) + self.history_index = len(self.history) + + redirector = OutputRedirector(self.output) # 创建输出重定向器 + with redirect_stdout(redirector): # 使用 redirect_stdout + try: + # 尝试作为表达式执行 + result = eval(command, globals(), self.local_context) + if result is not None: # 如果有结果,显示它 + print(result) + except SyntaxError: + # 如果表达式执行失败,尝试作为语句执行 + try: + exec(command, globals(), self.local_context) + except Exception as e: + self.output.append(f"Error: {str(e)}") + except Exception as e: + self.output.append(f"Error: {str(e)}") + + def eventFilter(self, source, event): + if source == self.input and event.type() == QEvent.KeyPress: + if event.key() == Qt.Key_Return: + if event.modifiers() & Qt.ShiftModifier: + self.input.insertPlainText('\n') + return True + else: + self.process_command() + return True + elif event.key() == Qt.Key_Up or event.key() == Qt.Key_Down: + if self.history_index > 0 and event.key() == Qt.Key_Up: + self.history_index -= 1 + self.input.setText(self.history[self.history_index]) + self.input.moveCursor(QTextCursor.End) + return True + elif self.history_index < len(self.history) - 1 and event.key() == Qt.Key_Down: + self.history_index += 1 + self.input.setText(self.history[self.history_index]) + self.input.moveCursor(QTextCursor.End) + return True + return super(CommandWindow, self).eventFilter(source, event) + +if __name__ == "__main__": + app = QApplication(sys.argv) + win = CommandWindow() + win.show() + sys.exit(app.exec_()) diff --git a/metax/gui/metax_gui/main_window.ui b/metax/gui/metax_gui/main_window.ui index c6e3adb..68359a1 100644 --- a/metax/gui/metax_gui/main_window.ui +++ b/metax/gui/metax_gui/main_window.ui @@ -46,7 +46,7 @@ Qt::LeftToRight - 3 + 2 false @@ -245,8 +245,8 @@ 0 0 - 391 - 80 + 528 + 595 @@ -976,7 +976,7 @@ - Normalization by sum + Percentages Scaling @@ -2789,8 +2789,8 @@ 0 0 - 1016 - 184 + 621 + 152 @@ -3806,7 +3806,7 @@ 0 0 - 878 + 1020 128 @@ -4495,10 +4495,10 @@ - 3 + 1 - 30.000000000000000 + 99.000000000000000 @@ -4602,6 +4602,13 @@ 16777215 + + - 'all_sig': DataFrame containing all significant rows across all groups, Non-significant values are replaced with NA. +- 'half_same_trends': DataFrame containing rows where each group has the same trend (all positive or all negative non-NA values) and at least 50% of the values are non-NA. +- 'no_na': DataFrame containing rows with no NA values in each group. +- 'same_trends': DataFrame containing rows with no NA values, and all values in each group follow the same trend (all positive or all negative). + + all_sig @@ -4612,6 +4619,11 @@ no_na + + + half_same_trends + + same_trends @@ -4828,7 +4840,7 @@ QTabWidget::Triangular - 2 + 3 @@ -5700,8 +5712,8 @@ 0 0 - 535 - 94 + 996 + 164 @@ -5906,7 +5918,7 @@ 1 - 20.000000000000000 + 99.000000000000000 @@ -7481,8 +7493,8 @@ 0 0 - 620 - 65 + 1016 + 166 @@ -9302,8 +9314,8 @@ 0 0 - 383 - 68 + 1016 + 179 @@ -10470,8 +10482,12 @@ Dev + + + + @@ -10549,6 +10565,11 @@ Tutorial + + + Debug Console + + comboBox_taxa_level_to_stast diff --git a/metax/gui/metax_gui/setting_window.ui b/metax/gui/metax_gui/setting_window.ui index 3851027..28ae700 100644 --- a/metax/gui/metax_gui/setting_window.ui +++ b/metax/gui/metax_gui/setting_window.ui @@ -17,61 +17,15 @@ - 1 + 0 - - - - 0 - 0 - 748 - 340 - - - - General - - - - - - - - Auto Check Update - - - true - - - - - - - Stable - - - true - - - - - - - Beta - - - - - - - 0 0 748 - 340 + 367 @@ -739,7 +693,7 @@ 0 0 748 - 340 + 367 @@ -778,6 +732,37 @@ + + + + + + Auto Check Update + + + true + + + + + + + Stable + + + true + + + + + + + Beta + + + + + diff --git a/metax/gui/metax_gui/settings_widget.py b/metax/gui/metax_gui/settings_widget.py index 53ebfb7..ceb9146 100644 --- a/metax/gui/metax_gui/settings_widget.py +++ b/metax/gui/metax_gui/settings_widget.py @@ -11,7 +11,7 @@ class SettingsWidget(QWidget): protein_infer_method_changed = pyqtSignal(str) stat_mean_by_zero_dominant_changed = pyqtSignal(bool) - def __init__(self, parent=None, update_branch="main", auto_check_update=True, QSettings=None): + def __init__(self, parent=None, update_branch="main", auto_check_update=True, stat_mean_by_zero_dominant=False, QSettings=None): super().__init__(parent) self.update_mode = update_branch self.auto_check_update = auto_check_update @@ -22,7 +22,7 @@ def __init__(self, parent=None, update_branch="main", auto_check_update=True, QS self.ui = Ui_Settings() self.ui.setupUi(self) - self.init_ui(self.update_mode, self.auto_check_update, QSettings) + self.init_ui(self.update_mode, self.auto_check_update, stat_mean_by_zero_dominant, QSettings) # resize the window, 800 as default self.resize(800, 400) @@ -70,13 +70,16 @@ def __init__(self, parent=None, update_branch="main", auto_check_update=True, QS self.ui.checkBox_stat_mean_by_zero_dominant.stateChanged.connect(self.handle_stat_mean_by_zero_dominant_changed) - def init_ui(self, update_mode, auto_check_update, QSettings=None): + def init_ui(self, update_mode, auto_check_update, stat_mean_by_zero_dominant, QSettings=None,): if update_mode == "main": self.ui.radioButton_update_stable.setChecked(True) elif update_mode == "dev": self.ui.radioButton_update_beta.setChecked(True) self.ui.checkBox_auto_check_update.setChecked(auto_check_update) + # set the default values for stat_mean_by_zero_dominant + self.ui.checkBox_stat_mean_by_zero_dominant.setChecked(stat_mean_by_zero_dominant) + if QSettings: method = QSettings.value('protein_infer_greedy_mode', 'fast') selected_method = 'normal' if method == 'greedy' else 'fast' diff --git a/metax/gui/metax_gui/ui_main_window.py b/metax/gui/metax_gui/ui_main_window.py index 0e713a3..f4a0cfc 100644 --- a/metax/gui/metax_gui/ui_main_window.py +++ b/metax/gui/metax_gui/ui_main_window.py @@ -147,7 +147,7 @@ def setupUi(self, metaX_main): self.toolBox_2.setMaximumSize(QtCore.QSize(1677, 16777215)) self.toolBox_2.setObjectName("toolBox_2") self.page_2 = QtWidgets.QWidget() - self.page_2.setGeometry(QtCore.QRect(0, 0, 391, 80)) + self.page_2.setGeometry(QtCore.QRect(0, 0, 528, 595)) self.page_2.setObjectName("page_2") self.gridLayout_27 = QtWidgets.QGridLayout(self.page_2) self.gridLayout_27.setObjectName("gridLayout_27") @@ -1411,7 +1411,7 @@ def setupUi(self, metaX_main): self.scrollArea_2.setWidgetResizable(True) self.scrollArea_2.setObjectName("scrollArea_2") self.scrollAreaWidgetContents_2 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 1016, 184)) + self.scrollAreaWidgetContents_2.setGeometry(QtCore.QRect(0, 0, 621, 152)) self.scrollAreaWidgetContents_2.setObjectName("scrollAreaWidgetContents_2") self.gridLayout_50 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_2) self.gridLayout_50.setObjectName("gridLayout_50") @@ -1967,7 +1967,7 @@ def setupUi(self, metaX_main): self.scrollArea_cross_heatmap_settings.setWidgetResizable(True) self.scrollArea_cross_heatmap_settings.setObjectName("scrollArea_cross_heatmap_settings") self.scrollAreaWidgetContents_3 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_3.setGeometry(QtCore.QRect(0, 0, 878, 128)) + self.scrollAreaWidgetContents_3.setGeometry(QtCore.QRect(0, 0, 1020, 128)) self.scrollAreaWidgetContents_3.setObjectName("scrollAreaWidgetContents_3") self.gridLayout_38 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_3) self.gridLayout_38.setObjectName("gridLayout_38") @@ -2263,8 +2263,8 @@ def setupUi(self, metaX_main): self.doubleSpinBox_max_log2fc_heatmap.setEnabled(False) self.doubleSpinBox_max_log2fc_heatmap.setMinimumSize(QtCore.QSize(0, 0)) self.doubleSpinBox_max_log2fc_heatmap.setMaximumSize(QtCore.QSize(16777215, 16777215)) - self.doubleSpinBox_max_log2fc_heatmap.setDecimals(3) - self.doubleSpinBox_max_log2fc_heatmap.setProperty("value", 30.0) + self.doubleSpinBox_max_log2fc_heatmap.setDecimals(1) + self.doubleSpinBox_max_log2fc_heatmap.setProperty("value", 99.0) self.doubleSpinBox_max_log2fc_heatmap.setObjectName("doubleSpinBox_max_log2fc_heatmap") self.horizontalLayout_12.addWidget(self.doubleSpinBox_max_log2fc_heatmap) self.gridLayout_51.addLayout(self.horizontalLayout_12, 3, 2, 1, 1) @@ -2309,6 +2309,7 @@ def setupUi(self, metaX_main): self.comboBox_cross_3_level_plot_df_type.addItem("") self.comboBox_cross_3_level_plot_df_type.addItem("") self.comboBox_cross_3_level_plot_df_type.addItem("") + self.comboBox_cross_3_level_plot_df_type.addItem("") self.gridLayout_51.addWidget(self.comboBox_cross_3_level_plot_df_type, 3, 4, 1, 1) self.checkBox_cross_3_level_plot_remove_zero_col = QtWidgets.QCheckBox(self.scrollAreaWidgetContents_3) sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Preferred, QtWidgets.QSizePolicy.Fixed) @@ -2901,7 +2902,7 @@ def setupUi(self, metaX_main): self.scrollArea_3.setWidgetResizable(True) self.scrollArea_3.setObjectName("scrollArea_3") self.scrollAreaWidgetContents_4 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_4.setGeometry(QtCore.QRect(0, 0, 535, 94)) + self.scrollAreaWidgetContents_4.setGeometry(QtCore.QRect(0, 0, 996, 164)) self.scrollAreaWidgetContents_4.setObjectName("scrollAreaWidgetContents_4") self.gridLayout_68 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_4) self.gridLayout_68.setObjectName("gridLayout_68") @@ -3014,7 +3015,7 @@ def setupUi(self, metaX_main): sizePolicy.setHeightForWidth(self.doubleSpinBox_deseq2_log2fc_max.sizePolicy().hasHeightForWidth()) self.doubleSpinBox_deseq2_log2fc_max.setSizePolicy(sizePolicy) self.doubleSpinBox_deseq2_log2fc_max.setDecimals(1) - self.doubleSpinBox_deseq2_log2fc_max.setProperty("value", 20.0) + self.doubleSpinBox_deseq2_log2fc_max.setProperty("value", 99.0) self.doubleSpinBox_deseq2_log2fc_max.setObjectName("doubleSpinBox_deseq2_log2fc_max") self.gridLayout_53.addWidget(self.doubleSpinBox_deseq2_log2fc_max, 0, 7, 1, 1) self.label_156 = QtWidgets.QLabel(self.scrollAreaWidgetContents_4) @@ -3847,7 +3848,7 @@ def setupUi(self, metaX_main): self.scrollArea_5.setWidgetResizable(True) self.scrollArea_5.setObjectName("scrollArea_5") self.scrollAreaWidgetContents_6 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 620, 65)) + self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 1016, 166)) self.scrollAreaWidgetContents_6.setObjectName("scrollAreaWidgetContents_6") self.gridLayout_57 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_6) self.gridLayout_57.setObjectName("gridLayout_57") @@ -4856,7 +4857,7 @@ def setupUi(self, metaX_main): self.scrollArea_7.setWidgetResizable(True) self.scrollArea_7.setObjectName("scrollArea_7") self.scrollAreaWidgetContents_8 = QtWidgets.QWidget() - self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 383, 68)) + self.scrollAreaWidgetContents_8.setGeometry(QtCore.QRect(0, 0, 1016, 179)) self.scrollAreaWidgetContents_8.setObjectName("scrollAreaWidgetContents_8") self.gridLayout_66 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_8) self.gridLayout_66.setObjectName("gridLayout_66") @@ -5464,6 +5465,8 @@ def setupUi(self, metaX_main): self.actionSettings.setObjectName("actionSettings") self.actionTutorial = QtWidgets.QAction(metaX_main) self.actionTutorial.setObjectName("actionTutorial") + self.actionDebug_Console = QtWidgets.QAction(metaX_main) + self.actionDebug_Console.setObjectName("actionDebug_Console") self.menuTools.addAction(self.actionTaxaFuncAnalyzer) self.menuTools.addAction(self.actionPeptide_to_TaxaFunc) self.menuTools.addAction(self.actionDatabase_Builder) @@ -5475,8 +5478,12 @@ def setupUi(self, metaX_main): self.menuOthers.addAction(self.actionRestore_From) self.menuOthers.addAction(self.actionSave_As) self.menuDev.addAction(self.actionExport_Log_File) + self.menuDev.addSeparator() self.menuDev.addAction(self.actionHide_Show_Console) + self.menuDev.addAction(self.actionDebug_Console) + self.menuDev.addSeparator() self.menuDev.addAction(self.actionAny_Table_Mode) + self.menuDev.addSeparator() self.menuDev.addAction(self.actionSettings) self.menuBar.addAction(self.menuTools.menuAction()) self.menuBar.addAction(self.menuOthers.menuAction()) @@ -5485,10 +5492,10 @@ def setupUi(self, metaX_main): self.retranslateUi(metaX_main) self.stackedWidget.setCurrentIndex(0) - self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(3) + self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(2) self.toolBox_2.setCurrentIndex(0) self.tabWidget_4.setCurrentIndex(0) - self.tabWidget_3.setCurrentIndex(2) + self.tabWidget_3.setCurrentIndex(3) self.tabWidget.setCurrentIndex(1) self.tabWidget_2.setCurrentIndex(1) self.tabWidget_6.setCurrentIndex(0) @@ -5632,7 +5639,7 @@ def retranslateUi(self, metaX_main): self.comboBox_set_data_normalization.setItemText(3, _translate("metaX_main", "Min-Max Scaling")) self.comboBox_set_data_normalization.setItemText(4, _translate("metaX_main", "Pareto Scaling")) self.comboBox_set_data_normalization.setItemText(5, _translate("metaX_main", "Mean centering")) - self.comboBox_set_data_normalization.setItemText(6, _translate("metaX_main", "Normalization by sum")) + self.comboBox_set_data_normalization.setItemText(6, _translate("metaX_main", "Percentages Scaling")) self.comboBox_remove_batch_effect.setItemText(0, _translate("metaX_main", "None")) self.label_41.setText(_translate("metaX_main", "Data Normalization")) self.label_43.setText(_translate("metaX_main", "Batch Effect Correction")) @@ -5865,9 +5872,15 @@ def retranslateUi(self, metaX_main): self.label_139.setText(_translate("metaX_main", "To")) self.label_58.setText(_translate("metaX_main", "Top Number")) self.label_141.setText(_translate("metaX_main", "Plot Type")) + self.comboBox_cross_3_level_plot_df_type.setToolTip(_translate("metaX_main", "- \'all_sig\': DataFrame containing all significant rows across all groups, Non-significant values are replaced with NA.\n" +"- \'half_same_trends\': DataFrame containing rows where each group has the same trend (all positive or all negative non-NA values) and at least 50% of the values are non-NA.\n" +"- \'no_na\': DataFrame containing rows with no NA values in each group.\n" +"- \'same_trends\': DataFrame containing rows with no NA values, and all values in each group follow the same trend (all positive or all negative).\n" +"")) self.comboBox_cross_3_level_plot_df_type.setItemText(0, _translate("metaX_main", "all_sig")) self.comboBox_cross_3_level_plot_df_type.setItemText(1, _translate("metaX_main", "no_na")) - self.comboBox_cross_3_level_plot_df_type.setItemText(2, _translate("metaX_main", "same_trends")) + self.comboBox_cross_3_level_plot_df_type.setItemText(2, _translate("metaX_main", "half_same_trends")) + self.comboBox_cross_3_level_plot_df_type.setItemText(3, _translate("metaX_main", "same_trends")) self.checkBox_cross_3_level_plot_remove_zero_col.setText(_translate("metaX_main", "Remove Zero Col")) self.label_30.setText(_translate("metaX_main", "with")) self.comboBox_top_heatmap_scale_method.setItemText(0, _translate("metaX_main", "maxmin")) @@ -6295,3 +6308,4 @@ def retranslateUi(self, metaX_main): self.actionAny_Table_Mode.setText(_translate("metaX_main", "Any Table Mode")) self.actionSettings.setText(_translate("metaX_main", "Settings")) self.actionTutorial.setText(_translate("metaX_main", "Tutorial")) + self.actionDebug_Console.setText(_translate("metaX_main", "Debug Console")) diff --git a/metax/gui/metax_gui/ui_setting_window.py b/metax/gui/metax_gui/ui_setting_window.py index a9108fb..83c9c45 100644 --- a/metax/gui/metax_gui/ui_setting_window.py +++ b/metax/gui/metax_gui/ui_setting_window.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Form implementation generated from reading ui file 'c:\Users\Qing\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\metax\gui\metax_gui\setting_window.ui' +# Form implementation generated from reading ui file 'c:\Users\max\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\metax\gui\metax_gui\setting_window.ui' # # Created by: PyQt5 UI code generator 5.15.9 # @@ -19,28 +19,8 @@ def setupUi(self, Settings): self.gridLayout.setObjectName("gridLayout") self.toolBox = QtWidgets.QToolBox(Settings) self.toolBox.setObjectName("toolBox") - self.page = QtWidgets.QWidget() - self.page.setGeometry(QtCore.QRect(0, 0, 748, 340)) - self.page.setObjectName("page") - self.gridLayout_3 = QtWidgets.QGridLayout(self.page) - self.gridLayout_3.setObjectName("gridLayout_3") - self.gridLayout_2 = QtWidgets.QGridLayout() - self.gridLayout_2.setObjectName("gridLayout_2") - self.checkBox_auto_check_update = QtWidgets.QCheckBox(self.page) - self.checkBox_auto_check_update.setChecked(True) - self.checkBox_auto_check_update.setObjectName("checkBox_auto_check_update") - self.gridLayout_2.addWidget(self.checkBox_auto_check_update, 0, 0, 1, 1) - self.radioButton_update_stable = QtWidgets.QRadioButton(self.page) - self.radioButton_update_stable.setChecked(True) - self.radioButton_update_stable.setObjectName("radioButton_update_stable") - self.gridLayout_2.addWidget(self.radioButton_update_stable, 0, 1, 1, 1) - self.radioButton_update_beta = QtWidgets.QRadioButton(self.page) - self.radioButton_update_beta.setObjectName("radioButton_update_beta") - self.gridLayout_2.addWidget(self.radioButton_update_beta, 0, 2, 1, 1) - self.gridLayout_3.addLayout(self.gridLayout_2, 0, 0, 1, 1) - self.toolBox.addItem(self.page, "") self.page_2 = QtWidgets.QWidget() - self.page_2.setGeometry(QtCore.QRect(0, 0, 748, 340)) + self.page_2.setGeometry(QtCore.QRect(0, 0, 748, 367)) self.page_2.setObjectName("page_2") self.gridLayout_4 = QtWidgets.QGridLayout(self.page_2) self.gridLayout_4.setObjectName("gridLayout_4") @@ -303,7 +283,7 @@ def setupUi(self, Settings): self.gridLayout_4.addLayout(self.gridLayout_12, 2, 3, 1, 1) self.toolBox.addItem(self.page_2, "") self.page_3 = QtWidgets.QWidget() - self.page_3.setGeometry(QtCore.QRect(0, 0, 748, 340)) + self.page_3.setGeometry(QtCore.QRect(0, 0, 748, 367)) self.page_3.setObjectName("page_3") self.gridLayout_11 = QtWidgets.QGridLayout(self.page_3) self.gridLayout_11.setObjectName("gridLayout_11") @@ -321,20 +301,30 @@ def setupUi(self, Settings): self.label_24.setObjectName("label_24") self.gridLayout_10.addWidget(self.label_24, 0, 0, 1, 1) self.gridLayout_11.addLayout(self.gridLayout_10, 1, 1, 1, 1) + self.gridLayout_2 = QtWidgets.QGridLayout() + self.gridLayout_2.setObjectName("gridLayout_2") + self.checkBox_auto_check_update = QtWidgets.QCheckBox(self.page_3) + self.checkBox_auto_check_update.setChecked(True) + self.checkBox_auto_check_update.setObjectName("checkBox_auto_check_update") + self.gridLayout_2.addWidget(self.checkBox_auto_check_update, 0, 0, 1, 1) + self.radioButton_update_stable = QtWidgets.QRadioButton(self.page_3) + self.radioButton_update_stable.setChecked(True) + self.radioButton_update_stable.setObjectName("radioButton_update_stable") + self.gridLayout_2.addWidget(self.radioButton_update_stable, 0, 1, 1, 1) + self.radioButton_update_beta = QtWidgets.QRadioButton(self.page_3) + self.radioButton_update_beta.setObjectName("radioButton_update_beta") + self.gridLayout_2.addWidget(self.radioButton_update_beta, 0, 2, 1, 1) + self.gridLayout_11.addLayout(self.gridLayout_2, 0, 0, 1, 2) self.toolBox.addItem(self.page_3, "") self.gridLayout.addWidget(self.toolBox, 0, 0, 1, 1) self.retranslateUi(Settings) - self.toolBox.setCurrentIndex(1) + self.toolBox.setCurrentIndex(0) QtCore.QMetaObject.connectSlotsByName(Settings) def retranslateUi(self, Settings): _translate = QtCore.QCoreApplication.translate Settings.setWindowTitle(_translate("Settings", "Settings")) - self.checkBox_auto_check_update.setText(_translate("Settings", "Auto Check Update")) - self.radioButton_update_stable.setText(_translate("Settings", "Stable")) - self.radioButton_update_beta.setText(_translate("Settings", "Beta")) - self.toolBox.setItemText(self.toolBox.indexOf(self.page), _translate("Settings", "General")) self.label_4.setText(_translate("Settings", "Taxa-Functions Link Network")) self.label_11.setText(_translate("Settings", "Line Width")) self.label_15.setToolTip(_translate("Settings", "The larger the value the greater the repulsion")) @@ -418,4 +408,7 @@ def retranslateUi(self, Settings): self.comboBox_protein_infer_greedy_mode.setItemText(0, _translate("Settings", "fast")) self.comboBox_protein_infer_greedy_mode.setItemText(1, _translate("Settings", "normal")) self.label_24.setText(_translate("Settings", "Greedy Mode in Razor Method")) + self.checkBox_auto_check_update.setText(_translate("Settings", "Auto Check Update")) + self.radioButton_update_stable.setText(_translate("Settings", "Stable")) + self.radioButton_update_beta.setText(_translate("Settings", "Beta")) self.toolBox.setItemText(self.toolBox.indexOf(self.page_3), _translate("Settings", "Others")) diff --git a/metax/taxafunc_analyzer/analyzer_utils/basic_stats.py b/metax/taxafunc_analyzer/analyzer_utils/basic_stats.py index 5772024..b0fda3f 100644 --- a/metax/taxafunc_analyzer/analyzer_utils/basic_stats.py +++ b/metax/taxafunc_analyzer/analyzer_utils/basic_stats.py @@ -6,6 +6,26 @@ class BasicStats: def __init__(self, tfa): self.tfa = tfa + def _get_mean_by_zero_dominant(self, df: pd.DataFrame) -> pd.Series: + """ + Optimized function to calculate the mean of non-zero values in each row if the number of zero values + is less than half of the total values; otherwise, return 0. + + Args: + df (pd.DataFrame): Input DataFrame. + + Returns: + pd.Series: A Series with mean values based on the zero-dominant condition. + """ + # 计算每行的零值数量 + zero_counts = (df == 0).sum(axis=1) + # 判断每行零值是否超过一半,超过的行直接设为0 + mean_series = pd.Series(0, index=df.index) + non_zero_rows = zero_counts <= (df.shape[1] / 2) + # 对非零主导的行计算非零均值 + mean_series[non_zero_rows] = df[non_zero_rows].replace(0, pd.NA).mean(axis=1, skipna=True) + return mean_series + # get a mean df by group def get_stats_mean_df_by_group(self, df: pd.DataFrame, condition: list|None = None, zero_dominant: bool|None = None) -> pd.DataFrame: """ @@ -20,31 +40,12 @@ def get_stats_mean_df_by_group(self, df: pd.DataFrame, condition: list|None = No pd.DataFrame: A DataFrame containing the mean values of the groups. """ - def get_mean_by_zero_dominant(df: pd.DataFrame) -> pd.Series: - """ - Optimized function to calculate the mean of non-zero values in each row if the number of zero values - is less than half of the total values; otherwise, return 0. - - Args: - df (pd.DataFrame): Input DataFrame. - - Returns: - pd.Series: A Series with mean values based on the zero-dominant condition. - """ - # 计算每行的零值数量 - zero_counts = (df == 0).sum(axis=1) - # 判断每行零值是否超过一半,超过的行直接设为0 - mean_series = pd.Series(0, index=df.index) - non_zero_rows = zero_counts <= (df.shape[1] / 2) - # 对非零主导的行计算非零均值 - mean_series[non_zero_rows] = df[non_zero_rows].replace(0, pd.NA).mean(axis=1, skipna=True) - return mean_series if zero_dominant is None: zero_dominant = self.tfa.stat_mean_by_zero_dominant print(f"Caculating mean by zero_dominant: [{zero_dominant}]") - mean_method = get_mean_by_zero_dominant if zero_dominant else lambda x: x.mean(axis=1) + mean_method = self._get_mean_by_zero_dominant if zero_dominant else lambda x: x.mean(axis=1) data = df.copy() @@ -187,7 +188,8 @@ def get_combined_sub_meta_df( sub_meta: str, rename_sample: bool = False, plot_mean: bool = False, - ) -> tuple[pd.DataFrame, list[str]]: + zero_dominant: bool|None = None + ) -> tuple[pd.DataFrame, dict[str, str]]: """ Combines the sub-meta information with the main meta information in the given DataFrame and returns the combined DataFrame and a list of sub-meta groups. @@ -196,9 +198,10 @@ def get_combined_sub_meta_df( sub_meta (str): The sub-meta information to be combined with the main meta information. rename_sample (bool, optional): Whether to rename the samples in the DataFrame. Defaults to False. plot_mean (bool, optional): Whether to plot the mean values. Defaults to False. + zero_dominant (bool, optional): Whether to calculate the mean of non-zero values in each group(return 0 if the >50% values are zero). Defaults to None. Returns: - tuple[pd.DataFrame, list[str]]: A tuple containing the combined DataFrame and a list of sub-meta groups. + tuple[pd.DataFrame, Dict[str, str]]: A tuple containing the combined DataFrame and a dictionary with the sample names as keys and the group names as values. """ if sub_meta != 'None': @@ -206,7 +209,14 @@ def get_combined_sub_meta_df( sub_groups = {sample: self.tfa.get_group_of_a_sample(sample, sub_meta) for sample in df.columns} # Combine samples with the same meta and sub-meta, and calculate the mean value - grouped_data = df.T.groupby([sample_groups, sub_groups]).mean().T + if zero_dominant is None: + zero_dominant = self.tfa.stat_mean_by_zero_dominant + print(f"Caculating mean by zero_dominant: [{zero_dominant}]") + + if zero_dominant: + grouped_data = df.T.groupby([sample_groups, sub_groups]).apply(lambda x: self._get_mean_by_zero_dominant(x.T)).T + else: + grouped_data = df.T.groupby([sample_groups, sub_groups]).mean().T # group_list is the sub-meta group group_list = [i[1] for i in grouped_data.columns] if not plot_mean else grouped_data.columns.tolist() @@ -222,7 +232,10 @@ def get_combined_sub_meta_df( else: group_list = [self.tfa.get_group_of_a_sample(i) for i in df.columns] if not plot_mean else df.columns.tolist() - return df, group_list + # create a group_dict, key is column name, value is the group name bsed on the group_list + sample_to_group_dict = {col: group_list[i] for i, col in enumerate(df.columns)} + + return df, sample_to_group_dict # Shapiro-Wilk Test def shapiro_test(self, df: pd.DataFrame, alpha=0.05) : @@ -246,3 +259,24 @@ def shapiro_test(self, df: pd.DataFrame, alpha=0.05) : shapiro_results[sample] = {'p_value': p, 'is_normal': p > alpha} return shapiro_results + + + + def get_df_by_mean_and_submeta(self, df, sub_meta:str = 'None', rename_sample:bool = True, plot_mean:bool = False): + """ + Prepares a DataFrame for baisc heatmap plotting. + Parameters: + df (pd.DataFrame): The input DataFrame containing the sample data. + sub_meta (str): The sub-metadata to be used for grouping. Default is 'None'. + rename_sample (bool): Whether to rename the samples. Default is True. + plot_mean (bool): Whether to plot the mean values. Default is False. + Returns: + tuple: A tuple containing the processed DataFrame and the group list for each column. + """ + if plot_mean and sub_meta == 'None': #! if sub_meta is not None, plot_mean will be set to False + df = self.tfa.BasicStats.get_stats_mean_df_by_group(df) + sample_to_group_dict = {col: col for col in df.columns} + else: + df, sample_to_group_dict = self.tfa.BasicStats.get_combined_sub_meta_df(df=df, sub_meta=sub_meta, rename_sample=rename_sample, plot_mean=plot_mean) + + return df, sample_to_group_dict \ No newline at end of file diff --git a/metax/taxafunc_analyzer/analyzer_utils/cross_test.py b/metax/taxafunc_analyzer/analyzer_utils/cross_test.py index ecc9b9b..e042a0d 100644 --- a/metax/taxafunc_analyzer/analyzer_utils/cross_test.py +++ b/metax/taxafunc_analyzer/analyzer_utils/cross_test.py @@ -1,5 +1,6 @@ # T-Test , ANOVA, Tukey HSD, Deseq2 import pandas as pd +import numpy as np from statsmodels.stats.multicomp import pairwise_tukeyhsd from scipy.stats import f_oneway from scipy.stats import ttest_ind @@ -726,43 +727,94 @@ def extrcat_significant_fc_from_deseq2all(self, df, p_value=0.05, log2fc_min=1, return dft # return a dict of 3 dataframe: df_all, df_no_na, df_same_trends - def extrcat_significant_fc_from_all_3_levels(self, df, p_value=0.05, log2fc_min=1, log2fc_max=30, p_type='padj', df_type:str='deseq2') -> dict: - def filter_rows(group): - # 保留所有值都为正或者都为负的行 - return group[(group > 0).all(axis=1) | (group < 0).all(axis=1)] - - res_df_dict = {} - - first_level_values = df.columns.get_level_values(0).unique() - res_dict = {} - for value in first_level_values: # iterate over first level values - sub_df = df[value] - print(f"\nExtracting significant Stats from '{value}':") - if df_type == 'dunnett': - dft = self.extrcat_significant_stat_from_dunnett(sub_df, p_value=p_value, p_type=p_type) - elif df_type == 'deseq2': - dft = self.extrcat_significant_fc_from_deseq2all(sub_df, p_value=p_value, log2fc_min=log2fc_min, log2fc_max=log2fc_max, p_type=p_type) - else: - raise ValueError("df_type must be in ['dunnett', 'deseq2']") - - res_dict[value] = dft - df = pd.concat(res_dict, axis=1) - df_swapped = df.swaplevel(axis=1) - df_swapped = df_swapped.sort_index(axis=1) - print(f"\nTotal number of all_siginificant: [{df_swapped.shape[0]}]") - res_df_dict['all_sig'] = df_swapped - - df_no_na = df_swapped.groupby(level=0, axis=1).apply(lambda x: x.dropna()) - df_no_na = df_no_na.droplevel(1, axis=1) - print(f"Total number of no_na_in_one_group: [{df_no_na.shape[0]}]") - res_df_dict['no_na'] = df_no_na - - # Only keep rows that have all values positive or all values negative - df_same_trends = df_no_na.groupby(level=0, axis=1).apply(filter_rows) - # dropna level 0 index - df_same_trends.columns = df_same_trends.columns.droplevel(1) - print(f"Total number of same_trends_in_one_group: [{df_same_trends.shape[0]}]") - res_df_dict['same_trends'] = df_same_trends + def extrcat_significant_fc_from_all_3_levels(self, df, p_value=0.05, log2fc_min=1, log2fc_max=99, + p_type='padj', df_type:str='deseq2') -> dict: + """ + Extracts significant fold change data from a multi-level DataFrame and categorizes it based on different filtering criteria. + + Parameters: + ----------- + df : pd.DataFrame + A multi-level DataFrame containing statistical data for different groups. + + p_value : float, optional, default=0.05 + The threshold for significance based on p-values. Only rows with p-values below this threshold will be considered significant. + + log2fc_min : float, optional, default=1 + The minimum log2 fold change to consider a row significant. + + log2fc_max : float, optional, default=99 + The maximum log2 fold change to consider a row significant. + + p_type : str, optional, default='padj' + The type of p-value to use for filtering. Typically 'padj' or 'pvalue'. + + df_type : str, optional, default='deseq2' + Specifies the type of statistical method used. Must be either 'dunnett' or 'deseq2'. + + Returns: + -------- + dict + A dictionary containing three DataFrames: + - 'all_sig': DataFrame containing all significant rows across all groups, Non-significant values are replaced with NA. + - 'half_same_trends': DataFrame containing rows where each group has the same trend (all positive or all negative non-NA values) + and at least 50% of the values are non-NA. + - 'no_na': DataFrame containing rows with no NA values in each group. + - 'same_trends': DataFrame containing rows with no NA values, and all values in each group follow the same trend (all positive or all negative). + """ + def filter_rows_with_same_trends_and_half_na(group): + # 筛选出所有非NA值都为正或都为负的行 + filtered = group[group.apply(lambda row: (row.dropna() > 0).all() or (row.dropna() < 0).all(), axis=1)] + # 检查每行非NA的值是否超过一半,若非NA值少于一半则将该行置为NA + filtered = filtered.apply(lambda x: x if x.notna().sum() > len(x) / 2 else pd.Series([np.nan] * len(x), index=x.index), axis=1) + return filtered + def filter_rows(group): + # 保留所有值都为正或者都为负的行, 且不包含NA + return group[(group > 0).all(axis=1) | (group < 0).all(axis=1)] + + res_df_dict = {} + + first_level_values = df.columns.get_level_values(0).unique() + res_dict = {} + for value in first_level_values: # iterate over first level values + sub_df = df[value] + print(f"\nExtracting significant Stats from '{value}':") + if df_type == 'dunnett': + dft = self.extrcat_significant_stat_from_dunnett(sub_df, p_value=p_value, p_type=p_type) + elif df_type == 'deseq2': + dft = self.extrcat_significant_fc_from_deseq2all(sub_df, p_value=p_value, log2fc_min=log2fc_min, log2fc_max=log2fc_max, p_type=p_type) + else: + raise ValueError("df_type must be in ['dunnett', 'deseq2']") - return res_df_dict \ No newline at end of file + res_dict[value] = dft + df = pd.concat(res_dict, axis=1) + df_swapped = df.swaplevel(axis=1) + df_swapped = df_swapped.sort_index(axis=1) + print(f"\nTotal number of all_siginificant: [{df_swapped.shape[0]}]") + res_df_dict['all_sig'] = df_swapped + #TODO extract half of the columns in each group has no na and same trends + + + # 按groupby(level=0)分组,过滤每组符合条件的行 + df_half = pd.concat([filter_rows_with_same_trends_and_half_na(group) for _, group in df_swapped.groupby(level=0, axis=1)], axis=1) + # df_half.columns = df_half.columns.droplevel(1) # 删除多余的层级 + df_half = df_half.dropna(how='all') # 删除所有值都为NA的行 + print(f"Total number of half_same_trends: [{df_half.shape[0]}]") + res_df_dict['half_same_trends'] = df_half + + + df_no_na = df_swapped.groupby(level=0, axis=1).apply(lambda x: x.dropna()) + df_no_na = df_no_na.droplevel(1, axis=1) + print(f"Total number of no_na: [{df_no_na.shape[0]}]") + res_df_dict['no_na'] = df_no_na + + # Only keep rows that have all values positive or all values negative + df_same_trends = df_no_na.groupby(level=0, axis=1).apply(filter_rows) + # dropna level 0 index + df_same_trends.columns = df_same_trends.columns.droplevel(1) + print(f"Total number of same_trends: [{df_same_trends.shape[0]}]") + res_df_dict['same_trends'] = df_same_trends + + + return res_df_dict \ No newline at end of file diff --git a/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py b/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py index 210e013..0efc2fc 100644 --- a/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py +++ b/metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py @@ -118,7 +118,7 @@ def trace_shift(x): normalize_operations = { 'None': lambda x: x, 'mean': lambda x: x - x.mean(), - 'sum': lambda x: x / (x.sum() + epsilon), + 'percentage': lambda x: x / (x.sum() + epsilon) * 100, 'minmax': lambda x: (x - x.min()) / (x.max() - x.min()), 'zscore': lambda x: (x - x.mean()) / (x.std() + epsilon), 'pareto': lambda x: (x - x.mean()) / (np.sqrt(x.std() + epsilon)), @@ -143,7 +143,7 @@ def trace_shift(x): print(f'Data normalized by [{normalize_method}]') else: - raise ValueError('normalize_method must be in [None, mean, sum, minmax, zscore, pareto]') + raise ValueError(f'normalize_method must be in {list(normalize_operations.keys())}') # move the data to positive df_mat = df_mat - df_mat.min() @@ -637,7 +637,7 @@ def data_preprocess(self, df: pd.DataFrame, normalize_method: str|None = None, - `None`: No normalization. - `trace_shift`: Trace shift normalization inspired by DirectLFQ. - `mean`: Mean normalization. - - `sum`: Sum normalization. + - `percentage`: Percentage normalization, then *100. - `minmax`: Min-max normalization. - `zscore`: Z-score normalization. - `pareto`: Pareto scaling. diff --git a/metax/taxafunc_ploter/heatmap_plot.py b/metax/taxafunc_ploter/heatmap_plot.py index 0cdbd21..6a46d5f 100644 --- a/metax/taxafunc_ploter/heatmap_plot.py +++ b/metax/taxafunc_ploter/heatmap_plot.py @@ -349,18 +349,11 @@ def plot_basic_heatmap_of_test_res(self, df, top_number:int = 100, value_type:st def plot_basic_heatmap(self, df, title = 'Heatmap',fig_size:tuple|None = None, scale = None, col_cluster:bool = True, row_cluster:bool = True, cmap:str|None = None, rename_taxa:bool = True, font_size:int = 10, - show_all_labels:tuple = (False, False), rename_sample:bool = True, plot_mean:bool = False, - sub_meta: str = "None", scale_method:str = 'maxmin', return_type:str = 'fig' - ): - ''' - sub_meta is higher plot_mean, if sub_meta provided, plot_mean is False - ''' - - if plot_mean and sub_meta == 'None': # if sub_meta is not None, plot_mean is False - print('Plot the mean of the data, set rename_sample to False') - rename_sample = False - - df = self.tfa.BasicStats.get_stats_mean_df_by_group(df) + show_all_labels:tuple = (False, False), scale_method:str = 'maxmin', return_type:str = 'fig', + sample_to_group_dict:dict|None = None): + + # check if any row or column is all 0 + if (df == 0).all().any(): # remove all 0 rows row_num = len(df) df = df.loc[~(df==0).all(axis=1)] if row_cluster else df @@ -375,19 +368,12 @@ def plot_basic_heatmap(self, df, title = 'Heatmap',fig_size:tuple|None = None, col_cluster = False - mat = df.copy() - mat = self.scale_data(df = mat, scale_by = scale, method = scale_method) + df = self.scale_data(df = df, scale_by = scale, method = scale_method) # if index is Taxon, rename index if rename_taxa: - mat = self.rename_taxa(mat) + df = self.rename_taxa(df) - mat, group_list = self.tfa.BasicStats.get_combined_sub_meta_df(df=mat, sub_meta=sub_meta, rename_sample=rename_sample, plot_mean=plot_mean) - - # if only one column, remove col_cluster, set scale to None - if len(mat.columns) < 2: - col_cluster = False - # scale = None if return_type == 'table': sns_params = { @@ -396,16 +382,16 @@ def plot_basic_heatmap(self, df, title = 'Heatmap',fig_size:tuple|None = None, "method": self.linkage_method, "metric": self.distance_metric, } - fig = sns.clustermap(mat, **sns_params) + fig = sns.clustermap(df, **sns_params) # get the sorted dataframe if row_cluster and not col_cluster: - sorted_df = mat.iloc[fig.dendrogram_row.reordered_ind, :] + sorted_df = df.iloc[fig.dendrogram_row.reordered_ind, :] elif col_cluster and not row_cluster: - sorted_df = mat.iloc[:, fig.dendrogram_col.reordered_ind] + sorted_df = df.iloc[:, fig.dendrogram_col.reordered_ind] elif row_cluster and col_cluster: - sorted_df = mat.iloc[fig.dendrogram_row.reordered_ind, fig.dendrogram_col.reordered_ind] + sorted_df = df.iloc[fig.dendrogram_row.reordered_ind, fig.dendrogram_col.reordered_ind] else: - sorted_df = mat + sorted_df = df plt.close(fig.figure) return sorted_df @@ -414,8 +400,12 @@ def plot_basic_heatmap(self, df, title = 'Heatmap',fig_size:tuple|None = None, cmap = 'YlOrRd' if fig_size is None: fig_size = (30,30) + if sample_to_group_dict is not None: + group_list = [sample_to_group_dict.get(i, i) for i in df.columns] + color_list = self.assign_colors(group_list) + else: + color_list = None - color_list = self.assign_colors(group_list) sns_params = { # "center": 0, "cmap": cmap, @@ -428,11 +418,11 @@ def plot_basic_heatmap(self, df, title = 'Heatmap',fig_size:tuple|None = None, "row_cluster": row_cluster, "method": self.linkage_method, "metric": self.distance_metric, - "col_colors": color_list if not plot_mean else None, + "col_colors": color_list, "xticklabels": True if show_all_labels[0] else "auto", "yticklabels": True if show_all_labels[1] else "auto", } - fig = sns.clustermap(mat, **sns_params) + fig = sns.clustermap(df, **sns_params) fig.ax_heatmap.set_xticklabels( fig.ax_heatmap.get_xmajorticklabels(), @@ -490,7 +480,7 @@ def plot_heatmap_of_all_condition_res(self, df, pvalue:float = 0.05,scale:str|N - return_type (str): The type of the return value. Default is 'fig'. options: 'fig', 'table' - res_df_type (str): The type of the result DataFrame. Default is 'deseq2'. - p_type (str): The type of pvalue. Default is 'padj'. options: 'pvalue', 'padj' - - three_levels_df_type (str): The type of the three levels DataFrame. Default is 'same_trends'. options: 'all_sig', 'no_na', 'same_trends' + - three_levels_df_type (str): The type of the three levels DataFrame. Default is 'same_trends'. options: 'all_sig', 'no_na', 'half_same_trnds', 'same_trends' - show_col_colors (bool): Whether to show column colors. Default is True. - remove_zero_col (bool): Whether to remove zero columns. Default is True. diff --git a/metax/taxafunc_ploter/network_plot.py b/metax/taxafunc_ploter/network_plot.py index f857861..9c24810 100644 --- a/metax/taxafunc_ploter/network_plot.py +++ b/metax/taxafunc_ploter/network_plot.py @@ -113,6 +113,12 @@ def create_nodes_links( - categories (list): Categories for nodes, used for coloring in the graph. - cytoscape_df (DataFrame): DataFrame containing nodes and links for Cytoscape export. """ + def update_focus(row): + focus_taxa = 'Y' if row['taxa'] in focus_list else 'N' + focus_func = 'Y' if row['function'] in focus_list else 'N' + focus = 'Y' if focus_taxa == 'Y' or focus_func == 'Y' else 'N' + return pd.Series([focus_taxa, focus_func, focus]) + df = self.tfa.taxa_func_df.copy() if self.rename_taxa: print("Renaming taxa to last level") @@ -134,8 +140,10 @@ def create_nodes_links( network_df.columns = ['taxa', 'function'] + network_df.columns.tolist()[2:] taxa_dict = network_df.drop('function', axis=1).groupby('taxa').sum().to_dict() func_dict = network_df.drop('taxa', axis=1).groupby('function').sum().to_dict() - network_df['focus_taxa'] = network_df['taxa'].apply(lambda x: 'Y' if x in focus_list else 'N') - network_df['focus_func'] = network_df['function'].apply(lambda x: 'Y' if x in focus_list else 'N') + + network_df[['focus_taxa', 'focus_func', 'focus']] = network_df.apply(update_focus, axis=1) + + # cerate attributes_df attributes_taxa_df = pd.DataFrame(network_df[['taxa']]) attributes_taxa_df.drop_duplicates(inplace=True) @@ -157,7 +165,7 @@ def create_nodes_links( # concatenate the taxa and function attributes_df attributes_df = pd.concat([attributes_taxa_df, attributes_func_df]) - attributes_df['mean'] = attributes_df.drop(['node', 'focus', 'type'], axis=1).mean(axis=1) + attributes_df['mean_value'] = attributes_df.drop(['node', 'focus', 'type'], axis=1).mean(axis=1) # Done creating network_df and attributes_df for export to cytoscape df['mean'] = df.mean(axis=1) diff --git a/metax/taxafunc_ploter/sankey_plot.py b/metax/taxafunc_ploter/sankey_plot.py index 9913ca9..286db0f 100644 --- a/metax/taxafunc_ploter/sankey_plot.py +++ b/metax/taxafunc_ploter/sankey_plot.py @@ -44,6 +44,9 @@ def convert_df_by_group_for_sankey(self,df, sub_meta, plot_mean) -> dict: df_dict = {} # add all samples to the dict df['sum'] = df.sum(axis=1) + df = df[df['sum'] != 0] + if len(df) == 0: + raise ValueError('\n\nAll values are 0!\n\nDo you calculate mean values for each group by Zero-Domainat methods?\n\nPlease check the settings.') df_dict['All'] = self.df_to_sankey_df(df, value_col='sum') # add samples for each group to the dict for group, samples in group_dict.items(): @@ -55,9 +58,10 @@ def convert_df_by_group_for_sankey(self,df, sub_meta, plot_mean) -> dict: df_temp['sum'] = df_temp.sum(axis=1) # remove values that are 0 df_temp = df_temp[df_temp['sum'] != 0] + if len(df_temp) == 0: + continue df_temp = self.df_to_sankey_df(df_temp, value_col='sum') df_dict[group] = df_temp - return df_dict diff --git a/metax/utils/version.py b/metax/utils/version.py index f987022..d467f5e 100644 --- a/metax/utils/version.py +++ b/metax/utils/version.py @@ -1,2 +1,2 @@ -__version__ = '1.117.1' +__version__ = '1.118.2' API_version = '3' \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b89d208..fd170fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "MetaXTools" -version = "1.117.1" +version = "1.118.2" description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics." readme = "README_PyPi.md" license = { text = "NorthOmics" }