Skip to content

Commit

Permalink
- Fix: Fixed the bug of extrcting the peptides of taxa, funcs or taxa…
Browse files Browse the repository at this point in the history
…-funcs, when split the function items.
  • Loading branch information
byemaxx committed Aug 22, 2024
1 parent 4e3b361 commit 5dfc08b
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 30 deletions.
7 changes: 6 additions & 1 deletion Docs/ChangeLog.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
# Version: 1.111.6
## Date: 2024-08-21
### Changes:
- Fix: Fixed the bug of extrcting the peptides of taxa, funcs or taxa-funcs, when split the function items.


# Version: 1.111.5
## Date: 2024-08-21
### Changes:
- Change: Optimized the x-axis and y-axis labels of the heatmap plot to make the labels more clear.


# Version: 1.111.4
## Date: 2024-08-21
### Changes:
Expand Down
27 changes: 9 additions & 18 deletions metax/gui/main_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -3479,28 +3479,19 @@ def plot_basic_list(self, plot_type='heatmap'):
df = self.tfa.peptide_df.copy()

else:
peptides_list = []

if table_name == 'Taxa':
df = self.tfa.clean_df.loc[self.tfa.clean_df['Taxon'].isin(self.basic_heatmap_list)]
df.index = df[self.tfa.peptide_col_name]
for i in self.basic_heatmap_list:
peptides_list.extend(self.tfa.peptides_linked_dict['taxa'][i])

elif table_name == 'Functions':
df = self.tfa.clean_df.loc[self.tfa.clean_df[self.tfa.func_name].isin(self.basic_heatmap_list)]
df.index = df[self.tfa.peptide_col_name]
for i in self.basic_heatmap_list:
peptides_list.extend(self.tfa.peptides_linked_dict['func'][i])

elif table_name == 'Taxa-Functions':
df_list = []
for i in self.basic_heatmap_list:
taxon, func = i.split(' <')
func = func[:-1]
dft = self.tfa.clean_df.loc[(self.tfa.clean_df['Taxon'] == taxon) & (self.tfa.clean_df[self.tfa.func_name] == func)]
df_list.append(dft)

if df_list:
df_all = pd.concat(df_list)
df_all.index = df_all[self.tfa.peptide_col_name]
df = df_all
else:
raise ValueError('No valid taxa-function belongs to the selected taxa-function!')
peptides_list.extend(self.tfa.peptides_linked_dict['taxa_func'][i])

elif table_name == 'Proteins':
QMessageBox.warning(self.MainWindow, 'Warning',
Expand All @@ -3511,9 +3502,9 @@ def plot_basic_list(self, plot_type='heatmap'):
return

else: # Peptide
df = self.tfa.peptide_df.copy()
df = df.loc[self.basic_heatmap_list]
peptides_list = self.basic_heatmap_list

df = self.tfa.peptide_df.loc[peptides_list]
df = df[sample_list]

else:
Expand Down
75 changes: 72 additions & 3 deletions metax/taxafunc_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,15 @@ def __init__(
self.func_taxa_df: Optional[pd.DataFrame] = None
self.taxa_func_linked_dict: Optional[Dict[str, List[tuple]]] = None
self.func_taxa_linked_dict: Optional[Dict[str, List[tuple]]] = None
self.peptides_linked_dict = {'taxa': {}, 'func': {}, 'taxa_func': {}}
self.protein_df: Optional[pd.DataFrame] = None
self.any_df_mode = any_df_mode # if True, the consider the TaxaFunc df as other_df
self.custom_df: Optional[pd.DataFrame] = None # other df, any df that user want to add
self.outlier_status = {'peptide': None, 'taxa': None, 'func': None,
'taxa_func': None, 'protein': None, 'custom': None}

self.split_func_status:bool = False
self.split_func_sep:str = ''

# load function
self.BasicStats = BasicStats(self)
Expand Down Expand Up @@ -560,6 +564,7 @@ def split_func(self, taxa_func_df, split_func_params: dict = {'split_by': ',', '
num_splits = len(split_funcs_list)

for new_func in split_funcs_list:
new_func = new_func.strip()
split_row = row[sample_list] / num_splits if share_intensity else row[sample_list]
split_row[func_col] = new_func
split_row[taxon_col] = row[taxon_col]
Expand All @@ -577,6 +582,65 @@ def split_func(self, taxa_func_df, split_func_params: dict = {'split_by': ',', '

return new_data

def create_peptides_dict_in_taxa_func(self, dfc):
"""
Creates a dictionary of peptides in taxa, func, and taxa_func.
Parameters:
dfc (DataFrame): The input DataFrame containing the peptide, taxon, and function columns.
Returns:
self.peptides_linked_dict (dict): A dictionary containing the peptides in taxa, func, and taxa_func.
"""
print("Creating peptides_linked_dict in taxa, func, and taxa_func...")
df = dfc.copy()[[self.peptide_col_name, 'Taxon', self.func_name]]
peptide_col = self.peptide_col_name
taxa_col = 'Taxon'
func_col = self.func_name

peptides_in_taxa_func = {}
peptides_in_taxa = {}
peptides_in_func = {}

if self.split_func_status:
for _, row in tqdm(df.iterrows(), total=len(df), desc="Creating peptides_dict"):
peptide = row[peptide_col]
taxa = row[taxa_col]
func_list = [f.strip() for f in row[func_col].split(self.split_func_sep)]

if taxa not in peptides_in_taxa:
peptides_in_taxa[taxa] = []
peptides_in_taxa[taxa].append(peptide)

for f in func_list:
if f not in peptides_in_func:
peptides_in_func[f] = []
peptides_in_func[f].append(peptide)
taxa_func = f'{taxa} <{f}>'
if taxa_func not in peptides_in_taxa_func:
peptides_in_taxa_func[taxa_func] = []
peptides_in_taxa_func[taxa_func].append(peptide)
else:
for _, row in tqdm(df.iterrows(), total=len(df), desc="Creating peptides_dict"):
peptide = row[peptide_col]
taxa = row[taxa_col]
func = row[func_col]

if taxa not in peptides_in_taxa:
peptides_in_taxa[taxa] = []
peptides_in_taxa[taxa].append(peptide)

if func not in peptides_in_func:
peptides_in_func[func] = []
peptides_in_func[func].append(peptide)

taxa_func = f'{taxa} <{func}>'
if taxa_func not in peptides_in_taxa_func:
peptides_in_taxa_func[taxa_func] = []
peptides_in_taxa_func[taxa_func].append(peptide)


self.peptides_linked_dict = {'taxa': peptides_in_taxa, 'func': peptides_in_func, 'taxa_func': peptides_in_taxa_func}
return self.peptides_linked_dict


def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00,
processing_after_sum: bool = False,
Expand Down Expand Up @@ -615,7 +679,10 @@ def set_multi_tables(self, level: str = 's', func_threshold:float = 1.00,
#! fllowing code is for the normal mode
# reset outlier_status
self.outlier_status = {'peptide': None, 'taxa': None, 'func': None, 'taxa_func': None}

# reset split_func status
self.split_func_status = split_func
self.split_func_sep = split_func_params['split_by']

df = self.original_df.copy()
# perform data pre-processing
if not processing_after_sum:
Expand Down Expand Up @@ -727,7 +794,9 @@ def strip_taxa(x, level):
df_taxa = df_taxa[df_taxa['peptide_num'] >= peptide_num_threshold['taxa']]
print(f"Taxa number with '{level}' level, peptide_num >= [{peptide_num_threshold['taxa']}]: {df_taxa.shape[0]}")
#-----Taxa Table End-----


#------create peptides_dict in taxa, func and taxa_func------
self.create_peptides_dict_in_taxa_func(dfc)

# ----- create taxa_func table -----
df_taxa_func = dfc.copy()
Expand Down Expand Up @@ -879,7 +948,7 @@ def get_df(self, table_name:str = 'taxa'):
'processing_order': None},
peptide_num_threshold = {'taxa': 1, 'func': 1, 'taxa_func': 1},
keep_unknow_func=False, sum_protein=False, sum_protein_params = {'method': 'razor', 'by_sample': False, 'rank_method': 'unique_counts', 'greedy_method': 'heap'},
split_func=True, split_func_params = {'split_by': ';', 'share_intensity': False}
split_func=True, split_func_params = {'split_by': '|', 'share_intensity': False}
)

sw.check_attributes()
15 changes: 9 additions & 6 deletions metax/taxafunc_analyzer/analyzer_utils/get_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ class GetMatrix:
def __init__(self, tfa):
self.tfa = tfa

def get_intensity_matrix(self, func_name: str = None, taxon_name: str = None,
peptide_seq: str = None, sample_list: list = None, condition:list = None) -> pd.DataFrame:
def get_intensity_matrix(self, func_name: str|None = None, taxon_name: str|None = None,
peptide_seq: str|None = None, sample_list: list|None = None, condition:list|None = None) -> pd.DataFrame:
# input: a taxon with its function, a function with its taxon,
# and the peptides in the function or taxon
# output: a matrix of the intensity of the taxon or function or peptide in each sample
Expand All @@ -21,10 +21,13 @@ def get_intensity_matrix(self, func_name: str = None, taxon_name: str = None,
if taxon_name is None:
dft = dft[dft[self.tfa.func_name] == func_name]
dft.set_index('Taxon', inplace=True)
if taxon_name is not None:
dft = self.tfa.clean_df[(self.tfa.clean_df['Taxon'] == taxon_name) & (
self.tfa.clean_df[self.tfa.func_name] == func_name)]
dft.set_index(self.tfa.peptide_col_name, inplace=True)

if taxon_name is not None: #all peptides in the taxon-function
# get the intensity matrix of the taxon with its function
taxa_func = f'{taxon_name} <{func_name}>'
peptides_list = self.tfa.peptides_linked_dict['taxa_func'][taxa_func]
dft = self.tfa.peptide_df.loc[peptides_list]


elif taxon_name is not None and peptide_seq is None:
dft = self.tfa.func_taxa_df.copy()
Expand Down
2 changes: 1 addition & 1 deletion metax/utils/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = '1.111.5'
__version__ = '1.111.6'
API_version = '2'
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "MetaXTools"
version = "1.111.5"
version = "1.111.6"
description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics."
readme = "README_PyPi.md"
license = { text = "NorthOmics" }
Expand Down

0 comments on commit 5dfc08b

Please sign in to comment.