From 4322deed4365442945aa786dd003594667a6bb1f Mon Sep 17 00:00:00 2001 From: Qing Date: Sat, 10 Aug 2024 23:59:48 -0400 Subject: [PATCH] Fixed the bug of function table didn't filter by taxa level. --- Docs/ChangeLog.md | 5 +++ metax/gui/main_gui.py | 5 ++- metax/taxafunc_analyzer/analyzer.py | 69 +++++++++++++++-------------- metax/utils/version.py | 2 +- 4 files changed, 45 insertions(+), 36 deletions(-) diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md index 2000a57..6b68082 100644 --- a/Docs/ChangeLog.md +++ b/Docs/ChangeLog.md @@ -1,3 +1,8 @@ +# Version: 1.109.12 +## Date: 2024-08-10 +### Changes: +- Fix: Fixed the function table didn't filter by taxa level. + # Version: 1.109.11 ## Date: 2024-08-10 ### Changes: diff --git a/metax/gui/main_gui.py b/metax/gui/main_gui.py index 841ac00..2d48dbe 100644 --- a/metax/gui/main_gui.py +++ b/metax/gui/main_gui.py @@ -2594,11 +2594,12 @@ def set_multi_table(self, restore_taxafunc=False, saved_obj=None): 'outlier_handle_by_group': outlier_handle_by_group, 'processing_order': processing_order} - set_multi_table_params = {'level': taxa_level, f'func_threshold': func_threshold, + set_multi_table_params = {'level': taxa_level, 'func_threshold': func_threshold, 'data_preprocess_params': data_preprocess_params, 'processing_after_sum': processing_after_sum, 'peptide_num_threshold': peptide_num_threshold, - 'sum_protein': sum_protein, 'sum_protein_params': sum_protein_params} + 'sum_protein': sum_protein, 'sum_protein_params': sum_protein_params, + 'keep_unknow_func': False} def callback_after_set_multi_tables(result, success): if success: diff --git a/metax/taxafunc_analyzer/analyzer.py b/metax/taxafunc_analyzer/analyzer.py index f0b8f1a..2c79fb4 100644 --- a/metax/taxafunc_analyzer/analyzer.py +++ b/metax/taxafunc_analyzer/analyzer.py @@ -533,7 +533,8 @@ def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00, 'by_sample': False, 'rank_method': 'unique_counts', 'greedy_method': 'heap', - } + }, + keep_unknow_func: bool = False ): """ Example Usage: @@ -543,7 +544,9 @@ def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00, 'outlier_detect_by_group': 'Individual', 'outlier_handle_by_group': None, 'processing_order': ['outlier', 'transform', 'normalize', 'batch']}, - peptide_num_threshold = {'taxa': 3, 'func': 3, 'taxa_func': 3}) + peptide_num_threshold = {'taxa': 3, 'func': 3, 'taxa_func': 3}, + sum_protein = False, sum_protein_params = {'method': 'razor', 'by_sample': False, 'rank_method': 'unique_counts', 'greedy_method': 'heap'}, + keep_unknow_func = False) """ # for any_df_mode, the df is considered as other_df if self.any_df_mode: @@ -571,33 +574,6 @@ def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00, - - - print("Starting to set Function table...") - # filter prop = 100% and func are not (NULL, -, NaN) - df_func = df[(df[f'{self.func_name}_prop'] >= func_threshold) & - (df[self.func_name].notnull()) & - (df[self.func_name] != 'not_found') & - (df[self.func_name] != '-') & - (df[self.func_name] != 'NaN') & - (df[self.func_name] != 'unknown') #! uncomment this line if needed show the peptide annotated to unknown function - ].copy() - - df_func = df_func.groupby(self.func_name).sum(numeric_only=True)[self.sample_list] - if processing_after_sum: - print("\n-----Starting to perform data pre-processing for Function table...-----") - df_func = self.data_preprocess(df=df_func,df_name = 'func', **data_preprocess_params) - - # add column 'peptide_num' to df_func - df_func['peptide_num'] = df.groupby(self.func_name).count()[self.peptide_col_name] - # move the column 'peptide_num' to the first column - cols = list(df_func.columns) - cols = [cols[-1]] + cols[:-1] - df_func = df_func[cols] - # filter the df_func by peptide_num_threshold - df_func = df_func[df_func['peptide_num'] >= peptide_num_threshold['func']] - print(f"Function number with prop >= [{func_threshold}], peptide_num >= [{peptide_num_threshold['func']}]: {df_func.shape[0]}") - print("Starting to set Taxa table...") # select taxa level and create dfc (df clean) def strip_taxa(x, level): @@ -656,11 +632,33 @@ def strip_taxa(x, level): dfc[self.func_name].notnull() & (dfc[self.func_name] != 'not_found') & (dfc[self.func_name] != '-') & - (dfc[self.func_name] != 'unknown') & - (dfc[self.func_name] != 'NaN') + (dfc[self.func_name] != 'NaN') ) + + if not keep_unknow_func: + filter_conditions = filter_conditions & (dfc[self.func_name] != 'unknown') dfc = dfc[filter_conditions] + + + print("Starting to set Function table...") + df_func = dfc.copy() + df_func = df_func.groupby(self.func_name).sum(numeric_only=True)[self.sample_list] + if processing_after_sum: + print("\n-----Starting to perform data pre-processing for Function table...-----") + df_func = self.data_preprocess(df=df_func,df_name = 'func', **data_preprocess_params) + + # add column 'peptide_num' to df_func + df_func['peptide_num'] = dfc.groupby(self.func_name).count()[self.peptide_col_name] + # move the column 'peptide_num' to the first column + cols = list(df_func.columns) + cols = [cols[-1]] + cols[:-1] + df_func = df_func[cols] + # filter the df_func by peptide_num_threshold + df_func = df_func[df_func['peptide_num'] >= peptide_num_threshold['func']] + print(f"Function number with prop >= [{func_threshold}], peptide_num >= [{peptide_num_threshold['func']}]: {df_func.shape[0]}") + + # create clean peptide table if processing_after_sum: print("\n-----Starting to perform data pre-processing for dfc...-----") @@ -801,16 +799,21 @@ def get_df(self, table_name:str = 'taxa'): if __name__ == '__main__': + import os + current_path = os.path.dirname(os.path.abspath(__file__)) df_path = '../data/example_data/Example_OTF.tsv' meta_path = '../data/example_data/Example_Meta.tsv' + df_path = os.path.join(current_path, df_path) + meta_path = os.path.join(current_path, meta_path) + sw = TaxaFuncAnalyzer(df_path, meta_path) - sw.set_func('KEGG_Pathway_name') + sw.set_func('dbcan_EC') sw.set_multi_tables(level='m', data_preprocess_params = {'normalize_method': None, 'transform_method': "log10", 'batch_meta': None, 'outlier_detect_method': None, 'outlier_handle_method': None, 'outlier_detect_by_group': None, 'outlier_handle_by_group': None, 'processing_order': None}, - peptide_num_threshold = {'taxa': 3, 'func': 1, 'taxa_func': 1},) + peptide_num_threshold = {'taxa': 3, 'func': 3, 'taxa_func': 3},) sw.check_attributes() \ No newline at end of file diff --git a/metax/utils/version.py b/metax/utils/version.py index 09dc3dc..3896684 100644 --- a/metax/utils/version.py +++ b/metax/utils/version.py @@ -1,2 +1,2 @@ -__version__ = '1.109.11' +__version__ = '1.109.12' API_version = '2' \ No newline at end of file