Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
byemaxx committed Sep 23, 2024
2 parents ac25654 + 9e23875 commit 4faa5d0
Show file tree
Hide file tree
Showing 10 changed files with 163 additions and 80 deletions.
11 changes: 11 additions & 0 deletions Docs/ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
# Version: 1.114.4
## Date: 2024-09-23
### Changes:
- Fix: Fixed the bug of create taxa table and function table from the OTF only,the option didn't work correctly.
- Change: Changed the message of the result of creating Taxon-Function Table.

# Version: 1.114.3
## Date: 2024-09-22
### Changes:
- Fix: Fixed the razor method of sum peptide to protein, the intensity was not stable in different runs.

# Version: 1.114.2
## Date: 2024-09-22
### Changes:
Expand Down
130 changes: 93 additions & 37 deletions metax/gui/main_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -1852,46 +1852,102 @@ def run_after_set_multi_tables(self):


# Final message
outlier_detect_method = self.comboBox_outlier_detection.currentText()

if outlier_detect_method != 'None':
nan_stats_str = '\n\nLeft row after data preprocessing:\n'
for i, j in self.tfa.outlier_status.items():
if i not in ['peptide', 'custom']:
continue
if j:
nan_stats_str += f'{i}: [{j}]\n'
# print(nan_stats_str)
else:
nan_stats_str = ''

if self.tfa.any_df_mode:
num_item = self.tfa.custom_df.shape[0]
msg = f'Custom data is ready! \
\n{nan_stats_str}\
\n\nNumber of item: [{num_item}]'
msg = f"""<html>
<body>
<p>Custom data is ready!</p>
<p>{nan_stats_str}</p>
<p>Number of items: [{num_item}]</p>
</body>
</html>
"""
else:
msg = f'Operational Taxa-Functions (OTF) data is ready! \
\n{nan_stats_str}\
\n\nFunction: [{self.tfa.func_name}]\
\nNumber of peptide: [{num_peptide} ({num_peptide/self.tfa.original_df.shape[0]*100:.2f}% of all peptides)]\
\nNumber of function: [{num_func}]\
\nNumber of taxa: [{num_taxa}]\
\nNumber of taxa-function: [{num_taxa_func}]\
\nNumber of protein: [{num_protein}]'

print(f'\n----Multi Table Result----\n{msg}\n---------------------------\n')
self.logger.write_log(msg.replace('\n', ''))
QMessageBox.information(self.MainWindow, 'Information', msg )

print("\n---------------------------------- Set Multi Table End ----------------------------------\n")
# go to basic analysis tab and the first tab
self.stackedWidget.setCurrentIndex(0) # go to page_analyzer
self.tabWidget_TaxaFuncAnalyzer.setCurrentIndex(3)
self.tabWidget_4.setCurrentIndex(0)
self.pushButton_set_multi_table.setEnabled(True)


original_num_peptide = self.tfa.original_df.shape[0]

msg = f"""<html>
<head>
<style>
table {{
border-collapse: collapse;
width: 100%;
}}
th, td {{
border: 1px solid black;
padding: 8px;
text-align: left;
}}
h2 {{
text-align: center;
}}
</style>
</head>
<body>
<h2>Operational Taxa-Functions (OTF) data is ready!</h2>
<p>Taxa Level: <b>{self.tfa.taxa_level}</b></p>
<p>Function Category: <b>{self.tfa.func_name}</b></p>
<hr>
<table>
<tr>
<th>Category</th>
<th>Number</th>
<th>Used Peptides</th>
<th>% of All Peptides</th>
</tr>
<tr>
<td>Taxa</td>
<td>{num_taxa}</td>
<td>{self.tfa.peptide_num_used["taxa"]}</td>
<td>{self.tfa.peptide_num_used["taxa"] / original_num_peptide * 100:.2f}%</td>
</tr>
<tr>
<td>Functions</td>
<td>{num_func}</td>
<td>{self.tfa.peptide_num_used["func"]}</td>
<td>{self.tfa.peptide_num_used["func"] / original_num_peptide * 100:.2f}%</td>
</tr>
<tr>
<td>OTFs</td>
<td>{num_taxa_func}</td>
<td>{self.tfa.peptide_num_used["taxa_func"]}</td>
<td>{self.tfa.peptide_num_used["taxa_func"] / original_num_peptide * 100:.2f}%</td>
</tr>
<tr>
<td>Clean Peptides</td>
<td>{num_peptide}</td>
<td>-</td>
<td>{num_peptide / original_num_peptide * 100:.2f}%</td>
</tr>"""

# add protein number if protein df is not None
if num_protein != 'NA':
msg += f"""
<tr>
<td>Proteins</td>
<td>{num_protein}</td>
<td>{self.tfa.peptide_num_used["protein"]}</td>
<td>{self.tfa.peptide_num_used["protein"] / original_num_peptide * 100:.2f}%</td>
</tr>"""

# close the HTML
msg += """
</table>
</body>
</html>"""

msg_for_print = f'''
Taxa Level: {self.tfa.taxa_level}
Function Category: {self.tfa.func_name}
Number of Taxa: {num_taxa} (Peptides Used: {self.tfa.peptide_num_used["taxa"]})
Number of Functions: {num_func} (Peptides Used: {self.tfa.peptide_num_used["func"]})
Number of OTFs: {num_taxa_func} (Peptides Used: {self.tfa.peptide_num_used["taxa_func"]})
Number of Peptides: {num_peptide} ({num_peptide / original_num_peptide * 100:.2f}%)
'''

print(f'\n----Multi Table Result----\n{msg_for_print}\n---------------------------\n')
self.logger.write_log(msg_for_print.replace('\n', ''))
QMessageBox.information(self.MainWindow, 'Result', msg)


## Database builder by own Table
def show_toolButton_db_own_anno_help(self):
Expand Down
10 changes: 5 additions & 5 deletions metax/gui/metax_gui/main_window.ui
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@
<x>0</x>
<y>0</y>
<width>528</width>
<height>530</height>
<height>534</height>
</rect>
</property>
<attribute name="label">
Expand Down Expand Up @@ -1385,7 +1385,7 @@
<string>Create Taxa and Functions only from OTFs</string>
</property>
<property name="checked">
<bool>true</bool>
<bool>false</bool>
</property>
</widget>
</item>
Expand Down Expand Up @@ -5644,7 +5644,7 @@
<x>0</x>
<y>0</y>
<width>996</width>
<height>99</height>
<height>103</height>
</rect>
</property>
<layout class="QGridLayout" name="gridLayout_68">
Expand Down Expand Up @@ -7425,7 +7425,7 @@
<x>0</x>
<y>0</y>
<width>1016</width>
<height>101</height>
<height>105</height>
</rect>
</property>
<layout class="QGridLayout" name="gridLayout_57">
Expand Down Expand Up @@ -10240,7 +10240,7 @@
<x>0</x>
<y>0</y>
<width>1122</width>
<height>23</height>
<height>21</height>
</rect>
</property>
<widget class="QMenu" name="menuTools">
Expand Down
12 changes: 6 additions & 6 deletions metax/gui/metax_gui/ui_main_window.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'c:\Users\max\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\metax\gui\metax_gui\main_window.ui'
# Form implementation generated from reading ui file 'c:\Users\Qing\OneDrive - University of Ottawa\code\TaxaFunc\MetaX\metax\gui\metax_gui\main_window.ui'
#
# Created by: PyQt5 UI code generator 5.15.9
#
Expand Down Expand Up @@ -147,7 +147,7 @@ def setupUi(self, metaX_main):
self.toolBox_2.setMaximumSize(QtCore.QSize(1677, 16777215))
self.toolBox_2.setObjectName("toolBox_2")
self.page_2 = QtWidgets.QWidget()
self.page_2.setGeometry(QtCore.QRect(0, 0, 528, 530))
self.page_2.setGeometry(QtCore.QRect(0, 0, 528, 534))
self.page_2.setObjectName("page_2")
self.gridLayout_27 = QtWidgets.QGridLayout(self.page_2)
self.gridLayout_27.setObjectName("gridLayout_27")
Expand Down Expand Up @@ -682,7 +682,7 @@ def setupUi(self, metaX_main):
self.gridLayout_17.addLayout(self.horizontalLayout, 1, 3, 1, 1)
self.checkBox_set_otf_taxa_and_func_only_from_otf = QtWidgets.QCheckBox(self.tab_set_taxa_func)
self.checkBox_set_otf_taxa_and_func_only_from_otf.setStatusTip("")
self.checkBox_set_otf_taxa_and_func_only_from_otf.setChecked(True)
self.checkBox_set_otf_taxa_and_func_only_from_otf.setChecked(False)
self.checkBox_set_otf_taxa_and_func_only_from_otf.setObjectName("checkBox_set_otf_taxa_and_func_only_from_otf")
self.gridLayout_17.addWidget(self.checkBox_set_otf_taxa_and_func_only_from_otf, 1, 4, 1, 1)
self.gridLayout_25.addLayout(self.gridLayout_17, 1, 0, 1, 1)
Expand Down Expand Up @@ -2879,7 +2879,7 @@ def setupUi(self, metaX_main):
self.scrollArea_3.setWidgetResizable(True)
self.scrollArea_3.setObjectName("scrollArea_3")
self.scrollAreaWidgetContents_4 = QtWidgets.QWidget()
self.scrollAreaWidgetContents_4.setGeometry(QtCore.QRect(0, 0, 996, 99))
self.scrollAreaWidgetContents_4.setGeometry(QtCore.QRect(0, 0, 996, 103))
self.scrollAreaWidgetContents_4.setObjectName("scrollAreaWidgetContents_4")
self.gridLayout_68 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_4)
self.gridLayout_68.setObjectName("gridLayout_68")
Expand Down Expand Up @@ -3825,7 +3825,7 @@ def setupUi(self, metaX_main):
self.scrollArea_5.setWidgetResizable(True)
self.scrollArea_5.setObjectName("scrollArea_5")
self.scrollAreaWidgetContents_6 = QtWidgets.QWidget()
self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 1016, 101))
self.scrollAreaWidgetContents_6.setGeometry(QtCore.QRect(0, 0, 1016, 105))
self.scrollAreaWidgetContents_6.setObjectName("scrollAreaWidgetContents_6")
self.gridLayout_57 = QtWidgets.QGridLayout(self.scrollAreaWidgetContents_6)
self.gridLayout_57.setObjectName("gridLayout_57")
Expand Down Expand Up @@ -5345,7 +5345,7 @@ def setupUi(self, metaX_main):
self.statusbar.setObjectName("statusbar")
metaX_main.setStatusBar(self.statusbar)
self.menuBar = QtWidgets.QMenuBar(metaX_main)
self.menuBar.setGeometry(QtCore.QRect(0, 0, 1122, 23))
self.menuBar.setGeometry(QtCore.QRect(0, 0, 1122, 21))
self.menuBar.setObjectName("menuBar")
self.menuTools = QtWidgets.QMenu(self.menuBar)
self.menuTools.setObjectName("menuTools")
Expand Down
32 changes: 18 additions & 14 deletions metax/taxafunc_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,7 @@ def __init__(
self.protein_df: Optional[pd.DataFrame] = None
self.any_df_mode = any_df_mode # if True, the consider the TaxaFunc df as other_df
self.custom_df: Optional[pd.DataFrame] = None # other df, any df that user want to add
self.outlier_status = {'peptide': None, 'taxa': None, 'func': None,
'taxa_func': None, 'protein': None, 'custom': None}
self.peptide_num_used = {'taxa': 0, 'func': 0, 'taxa_func': 0, 'protein': 0}

self.split_func_status:bool = False
self.split_func_sep:str = ''
Expand Down Expand Up @@ -498,7 +497,6 @@ def set_any_df_table(self,
data_preprocess_params: dict = {'normalize_method': None, 'transform_method': None,
'batch_meta': None, 'processing_order': None}):
df = self.original_df.copy()
self.outlier_status['custom'] = None # reset outlier_status
df =self.detect_and_handle_outliers(df=df, **outlier_params)
df = self.data_preprocess(df=df,df_name = 'custom', **data_preprocess_params)
# set index as first column
Expand Down Expand Up @@ -699,22 +697,22 @@ def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00,
return

#! fllowing code is for the normal mode

# add 'peptide_num_threshold' to 'data_preprocess_params
data_preprocess_params['peptide_num_threshold'] = peptide_num_threshold

#2. sum the protein intensity
if sum_protein:
# data preprocess for peptide table
print("---Starting to create protein table---")
self.peptide_num_used['protein'] = 0
df_peptide_for_protein = self.detect_and_handle_outliers(df=self.original_df, **outlier_params)
self.protein_df = SumProteinIntensity(taxa_func_analyzer=self, df=df_peptide_for_protein).sum_protein_intensity( **sum_protein_params)
self.protein_df = self.data_preprocess(df=self.protein_df,df_name = 'protein',
**data_preprocess_params)


# reset outlier_status
self.outlier_status = {'peptide': None, 'taxa': None, 'func': None, 'taxa_func': None}
for df_name in ['taxa', 'func', 'taxa_func']:
self.peptide_num_used[df_name] = 0 # reset the peptide_num_used
# reset split_func status
self.split_func_status = split_func
self.split_func_sep = split_func_params['split_by']
Expand Down Expand Up @@ -767,27 +765,28 @@ def strip_taxa(x, level):
raise ValueError("Please input the correct taxa level (m, s, g, f, o, c, p, d, l)")


# extract 'taxa', sample intensity #! and 'peptide_num' fto avoid the duplicated items when handling outlier
df_taxa_pep = df_filtered_peptides[[self.peptide_col_name,'Taxon'] + self.sample_list]
# add column 'peptide_num' to df_taxa as 1
df_taxa_pep['peptide_num'] = 1

# if taxa_and_func_only_from_otf:
if True: # for testing
if not taxa_and_func_only_from_otf:
# extract 'taxa', sample intensity #! and 'peptide_col' to avoid the duplicated items when handling outlier
df_taxa_pep = df_filtered_peptides[[self.peptide_col_name,'Taxon'] + self.sample_list]
# add column 'peptide_num' to df_taxa as 1
df_taxa_pep['peptide_num'] = 1
# groupby 'Taxon' and sum the sample intensity
print("\n-----Starting to perform outlier detection and handling for [Peptide-Taxon] table...-----")
df_taxa_pep = self.detect_and_handle_outliers(df=df_taxa_pep, **outlier_params)
self.peptide_num_used['taxa'] = len(df_taxa_pep)
df_taxa = df_taxa_pep.groupby('Taxon').sum(numeric_only=True)
print("\n-----Starting to perform data pre-processing for Taxa table...-----")
df_taxa = self.data_preprocess(df=df_taxa,df_name = 'taxa', **data_preprocess_params)
self.taxa_df = df_taxa
#-----Taxa Table End-----

# create func table
df_func_pep = self.filter_peptides_by_taxa_func(df= self.original_df, func_threshold=func_threshold,
keep_unknow_func=keep_unknow_func, filter_taxa=False)
df_func_pep = df_func_pep[[self.peptide_col_name, self.func_name] + self.sample_list]
print("\n-----Starting to perform outlier detection and handling for [Peptide-Function] table...-----")
df_func_pep = self.detect_and_handle_outliers(df=df_func_pep, **outlier_params)
self.peptide_num_used['func'] = len(df_func_pep)
df_func_pep['peptide_num'] = 1
df_func = df_func_pep.groupby(self.func_name).sum(numeric_only=True)

Expand Down Expand Up @@ -826,6 +825,11 @@ def strip_taxa(x, level):
# ----- create taxa_func table -----
df_taxa_func = df_half_processed_peptides[[self.peptide_col_name, 'Taxon', self.func_name] + self.sample_list]
df_taxa_func['peptide_num'] = 1

for key in ['taxa_func', 'taxa', 'func']:
self.peptide_num_used[key] = len(df_taxa_func) if self.peptide_num_used[key] == 0 else self.peptide_num_used[key]


df_taxa_func = df_taxa_func.groupby(['Taxon', self.func_name], as_index=True).sum(numeric_only=True)

# split the function before data preprocess
Expand Down
6 changes: 1 addition & 5 deletions metax/taxafunc_analyzer/analyzer_utils/data_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,10 +693,6 @@ def data_preprocess(self, df: pd.DataFrame, normalize_method: str|None = None,
raise ValueError('processing_order must be in [outlier, batch, transform, normalize]')
print(f'\n{self._get_current_time()} -----Data preprocessing of {df_name.upper()} finished.-----\n')

if df_name in {'peptide', 'taxa', 'func', 'taxa_func', 'protein', 'custom'}:
left_row_num = len(df)
# self.tfa.outlier_status[df_name] = f'{left_row_num}/{original_row_num} ({left_row_num/original_row_num*100:.2f}%)'
self.tfa.outlier_status[df_name] = f'{left_row_num} ({left_row_num/len(self.tfa.original_df)*100:.2f}%)'


return df

Loading

0 comments on commit 4faa5d0

Please sign in to comment.