From 0ddde2651753d4e12310a88e1d7806f0f4e68451 Mon Sep 17 00:00:00 2001
From: Qing <44231502+byemaxx@users.noreply.github.com>
Date: Mon, 28 Oct 2024 16:06:32 -0400
Subject: [PATCH] - Fix: Fixed the bug of when plot the heatmap of taxa-funcs
with t-ststistic and f-statistic, the value still selected as p-value. -
Change: Updated the cookbook.
---
Docs/ChangeLog.md | 7 +-
Docs/MetaX_Cookbook.md | 270 +++++++++++++-------------
metax/taxafunc_ploter/heatmap_plot.py | 13 +-
metax/utils/version.py | 2 +-
pyproject.toml | 2 +-
5 files changed, 152 insertions(+), 142 deletions(-)
diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md
index c7897c3..a565514 100644
--- a/Docs/ChangeLog.md
+++ b/Docs/ChangeLog.md
@@ -1,9 +1,14 @@
+# Version: 1.116.1
+## Date: 2024-10-28
+### Changes:
+- Fix: Fixed the bug of when plot the heatmap of taxa-funcs with t-ststistic and f-statistic, the value still selected as p-value.
+- Change: Updated the cookbook.
+
# Version: 1.116.0
## Date: 2024-10-18
### Changes:
- New: 1. Added Advanced Parameters for Peptide Annotator 2. Add [Fill Zero] method for handling missing values in the data preprossing part.
-
# Version: 1.115.5
## Date: 2024-10-16
### Changes:
diff --git a/Docs/MetaX_Cookbook.md b/Docs/MetaX_Cookbook.md
index 1808fe0..9f7b81d 100644
--- a/Docs/MetaX_Cookbook.md
+++ b/Docs/MetaX_Cookbook.md
@@ -13,7 +13,7 @@ MetaX also features statistical modules and plotting tools for ana
# Project Page
-Visit **Github** to get more information:
+Visit **GitHub** to get more information:
[https://github.com/byemaxx/MetaX](https://github.com/byemaxx/MetaX)
@@ -35,144 +35,17 @@ Visit **Github** to get more information:
-# Preparing Your Data
-
-## Module 1. Database Builder
-
-**Note:** The results from **MetaLab v2.3** MaxQuant workflow do not require database building. However, we do not recommend using these results as input to MetaX, as many peptides may be discarded.
-
-- Build the database for the **first time** using the Database Builder.
-
- **Option 1: Build Database Using MGnify Data**
-
- Ensure you download the correct database type corresponding to your data.
-
- ![dbbuilder](./MetaX_Cookbook.assets/dbbuilder.png)
-
- **Option 2: Build Database Using Own Data**
-
- 1. **Annotation Table:** A TSV table (tab-separated), with the first column as protein name joined with Genome by "_", e.g., "Genome1_protein1", and other columns containing annotation information.
-
- ![dbbuilder_own](./MetaX_Cookbook.assets/dbbuilder_own.png)
-
- 2. **Taxa Table:** A TSV table (tab-separated), with the first column as Genome name, e.g., "Genome1", and the second column as taxa.
-
- **Example Annotation Table:**
-
- | Query | Preferred_name | EC | KEGG_ko |
- | ------------------- | -------------- | ----------------- | ------------------- |
- | MGYG000000001_00696 | mfd | - | ko:K03723 |
- | MGYG000000001_02838 | hxlR | - | - |
- | MGYG000000001_01674 | ispG | 1.17.7.1,1.17.7.3 | ko:K03526 |
- | MGYG000000001_02710 | glsA | 3.5.1.2 | ko:K01425 |
- | MGYG000000001_01356 | mutS2 | - | ko:K07456 |
- | MGYG000000001_02630 | - | - | - |
- | MGYG000000001_02418 | ackA | 2.7.2.1 | ko:K00925 |
- | MGYG000000001_00728 | atpA | 3.6.3.14 | ko:K02111 |
- | MGYG000000001_00695 | pth | 3.1.1.29 | ko:K01056 |
- | MGYG000000001_02907 | - | - | ko:K03086 |
- | MGYG000000001_02592 | rplC | - | ko:K02906 |
- | MGYG000000001_00137 | - | - | ko:K03480,ko:K03488 |
-
- **Example Taxa Table:**
-
- | Genome | Lineage |
- | ------------- | ------------------------------------------------------------ |
- | MGYG000000001 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_GCA-900066495;s_GCA-900066495 sp902362365 |
- | MGYG000000002 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Lachnospirales;f_Lachnospiraceae;g_Blautia_A;s_Blautia_A faecis |
- | MGYG000000003 | d_Bacteria;p_Bacteroidota;c_Bacteroidia;o_Bacteroidales;f_Rikenellaceae;g_Alistipes;s_Alistipes shahii |
- | MGYG000000004 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Oscillospirales;f_Ruminococcaceae;g_Anaerotruncus;s_Anaerotruncus colihominis |
- | MGYG000000005 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_Terrisporobacter;s_Terrisporobacter glycolicus_A |
- | MGYG000000006 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Staphylococcales;f_Staphylococcaceae;g_Staphylococcus;s_Staphylococcus xylosus |
- | MGYG000000007 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus intestinalis |
- | MGYG000000008 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus johnsonii |
- | MGYG000000009 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Ligilactobacillus;s_Ligilactobacillus murinus |
-
-## Module 2. Database Updater
-
-The **Database Updater** allows updating the database built by the **Database Builder** or adding more annotations. This step is **optional**.
-
-- Update the built database and extend annotations.
-
- ![db_updater](./MetaX_Cookbook.assets/db_updater.png)
-
- **Option 1: Built-in Mode**
-
- We recommend some extended databases, such as [dbCAN_seq](https://bcb.unl.edu/dbCAN_seq).
-
- **Option 2: TSV Table**
-
- Extend the database by adding a new database to the database table. Ensure the column separator is a tab and the first column is the Protein name, with other columns containing function annotations.
-
- **Example:**
-
- | Protein ID | COG | KEGG | ... |
- | ------------------- | ---------- | ---------- | ---- |
- | MGYG000000001_02630 | Function 1 | Function 1 | ... |
- | MGYG000000001_01475 | Function 2 | Function 1 | ... |
- | MGYG000000001_01539 | Function 3 | Function 1 | ... |
-
-## Module 3. Peptide Annotator
-
-### 1. Results from MAG Workflow
-
-The peptide results use Metagenome-assembled genomes (MAGs) as the reference database for protein searches, e.g., MetaLab-MAG, MetaLab-DIA and other workflows wich using MAG databases like MGnify or customized MAGs Database.
-
-- Annotate the peptide to Operational Taxa-Functions (OTF) Table before analysis using the Peptide Annotator.
-
- ![peptide2taxafunc](./MetaX_Cookbook.assets/peptide2taxafunc.png)
-
- **Required:**
-
- - **Database**: The database created by Database Builder
-
- - **Peptide Table**:
-
- - *Option 1*: From MetaLab-MAG results (final_peptides.tsv)
-
- - *Option 2*: Create it manually, with the first column as the ID (e.g., peptide sequence) and the second column as the proteins ID of MGnify (e.g., MGYG000003683_00301; MGYG000001490_01143) or your database, and other columns as the intensity of each sample.
-
- **Example:**
-
- | Sequence | Proteins | Intensity_V1_01 | Intensity_V1_02 | Intensity_V1_03 | Intensity_V1_04 |
- | ----------------------------------- | ------------------------------------------------------------ | --------------- | --------------- | --------------- | --------------- |
- | (Acetyl)KGGVEPQSETVWR | MGYG000002716_01681;MGYG000000195_00452;MGYG000001616_00519;MGYG000002258_01582;MGYG000001300_00281;MGYG000002926_00231;... | 714650 | 0 | 0 | 0 |
- | (Acetyl)KVIPELNGK | MGYG000003589_01892;MGYG000001560_01812;MGYG000001789_00244;... | 0 | 0 | 0 | 0 |
- | (Acetyl)LAELGAKAVTLSGPDGYIYDPDGITTK | MGYG000001199_02893 | 0 | 0 | 0 | 0 |
- | (Acetyl)LLTGLPDAYGR | MGYG000001757_01206;MGYG000004547_02135;MGYG000001283_00124;MGYG000004758_00803;MGYG000002486_00845;MGYG000000271_01269 | 0 | 307519 | 0 | 0 |
- | (Acetyl)MDFTLDKK | MGYG000000076_01275;MGYG000003694_00879;MGYG000000312_02425;MGYG000000271_02102;MGYG000004271_00233;MGYG000002517_00542;MGYG000000489_01025 | 306231 | 0 | 0 | 1214497 |
-
- - **Output Save Path**: The location to save the result table.
-
- - **LCA Threshold**: Find the LCA with the proportion threshold for each peptide. The default is 1.00 (100%).
-
- ![LCA_prop](./MetaX_Cookbook.assets/LCA_prop.png)
-
-### 2. Results from MaxQuant Workflow
-
-The peptide results from **MetaLab 2.3** MaxQuant workflow.
-
-- Select the **MetaLab** result folder, which contains the **maxquant_search** folder.
-
- ![peptide2taxafunc_tab2_1](MetaX_Cookbook.assets/peptide2taxafunc_tab2_1.png)
-
-- The **Peptide Annotator** will automatically find the **peptides_report.txt**, **BuiltIn.pepTaxa.csv**, and **functions.tsv** in the **maxquant_search** folder. Alternatively, you can select the files manually.
-
- - Select **OTFs Save To** to set the location to save the result table.
-
- ![peptide2taxafunc_tab2_2](MetaX_Cookbook.assets/peptide2taxafunc_tab2_2.png)
-
-
-
# Exploring Data with MetaX
-## Module 4. OTF Analyzer
+See the **[Preparing Your Data](#Preparing-Your-Data)** section to build the database and annotate peptides to OTFs before starting.
+
+## Module 1. OTF Analyzer
-After obtaining the **Operational Taxa-Functions (OTF) Table** using the **Peptide Annotator**, you can perform downstream analysis with the **OTF Analyzer**.
+After obtaining the **Operational Taxa-Functions (OTF) Table** using the **[Peptide Annotator](##Module-4.-Peptide-Annotator)**, you can perform downstream analysis with the **OTF Analyzer**.
## 1. Data Preparation
-**OTFs (Operational Taxa-Functions) Table:** Obtained from the Peptide Annotator module.
+**OTFs (Operational Taxa-Functions) Table:** Obtained from the [Peptide Annotator](##Module-4.-Peptide-Annotator) module.
**Meta Table:** The first column is sample names, and the other columns represent different groups. If no meta table is provided, meta info will be generated automatically: (1) all samples are in the same group; (2) each sample is a separate group.
@@ -784,6 +657,137 @@ We can select **meta** **groups** or **samples** (default a
+# Preparing Your Data
+
+## Module 2. Database Builder
+
+**Note:** The results from **MetaLab v2.3** MaxQuant workflow do not require database building. However, we do not recommend using these results as input to MetaX, as many peptides may be discarded.
+
+- Build the database for the **first time** using the Database Builder.
+
+ **Option 1: Build Database Using MGnify Data**
+
+ Ensure you download the correct database type corresponding to your data.
+
+ ![dbbuilder](./MetaX_Cookbook.assets/dbbuilder.png)
+
+ **Option 2: Build Database Using Own Data**
+
+ 1. **Annotation Table:** A TSV table (tab-separated), with the first column as protein name joined with Genome by "_", e.g., "Genome1_protein1", and other columns containing annotation information.
+
+ ![dbbuilder_own](./MetaX_Cookbook.assets/dbbuilder_own.png)
+
+ 2. **Taxa Table:** A TSV table (tab-separated), with the first column as Genome name, e.g., "Genome1", and the second column as taxa.
+
+ **Example Annotation Table:**
+
+ | Query | Preferred_name | EC | KEGG_ko |
+ | ------------------- | -------------- | ----------------- | ------------------- |
+ | MGYG000000001_00696 | mfd | - | ko:K03723 |
+ | MGYG000000001_02838 | hxlR | - | - |
+ | MGYG000000001_01674 | ispG | 1.17.7.1,1.17.7.3 | ko:K03526 |
+ | MGYG000000001_02710 | glsA | 3.5.1.2 | ko:K01425 |
+ | MGYG000000001_01356 | mutS2 | - | ko:K07456 |
+ | MGYG000000001_02630 | - | - | - |
+ | MGYG000000001_02418 | ackA | 2.7.2.1 | ko:K00925 |
+ | MGYG000000001_00728 | atpA | 3.6.3.14 | ko:K02111 |
+ | MGYG000000001_00695 | pth | 3.1.1.29 | ko:K01056 |
+ | MGYG000000001_02907 | - | - | ko:K03086 |
+ | MGYG000000001_02592 | rplC | - | ko:K02906 |
+ | MGYG000000001_00137 | - | - | ko:K03480,ko:K03488 |
+
+ **Example Taxa Table:**
+
+ | Genome | Lineage |
+ | ------------- | ------------------------------------------------------------ |
+ | MGYG000000001 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_GCA-900066495;s_GCA-900066495 sp902362365 |
+ | MGYG000000002 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Lachnospirales;f_Lachnospiraceae;g_Blautia_A;s_Blautia_A faecis |
+ | MGYG000000003 | d_Bacteria;p_Bacteroidota;c_Bacteroidia;o_Bacteroidales;f_Rikenellaceae;g_Alistipes;s_Alistipes shahii |
+ | MGYG000000004 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Oscillospirales;f_Ruminococcaceae;g_Anaerotruncus;s_Anaerotruncus colihominis |
+ | MGYG000000005 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_Terrisporobacter;s_Terrisporobacter glycolicus_A |
+ | MGYG000000006 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Staphylococcales;f_Staphylococcaceae;g_Staphylococcus;s_Staphylococcus xylosus |
+ | MGYG000000007 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus intestinalis |
+ | MGYG000000008 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus johnsonii |
+ | MGYG000000009 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Ligilactobacillus;s_Ligilactobacillus murinus |
+
+## Module 3. Database Updater
+
+The **Database Updater** allows updating the database built by the **Database Builder** or adding more annotations. This step is **optional**.
+
+- Update the built database and extend annotations.
+
+ ![db_updater](./MetaX_Cookbook.assets/db_updater.png)
+
+ **Option 1: Built-in Mode**
+
+ We recommend some extended databases, such as [dbCAN_seq](https://bcb.unl.edu/dbCAN_seq).
+
+ **Option 2: TSV Table**
+
+ Extend the database by adding a new database to the database table. Ensure the column separator is a tab and the first column is the Protein name, with other columns containing function annotations.
+
+ **Example:**
+
+ | Protein ID | COG | KEGG | ... |
+ | ------------------- | ---------- | ---------- | ---- |
+ | MGYG000000001_02630 | Function 1 | Function 1 | ... |
+ | MGYG000000001_01475 | Function 2 | Function 1 | ... |
+ | MGYG000000001_01539 | Function 3 | Function 1 | ... |
+
+## Module 4. Peptide Annotator
+
+### 1. Results from MAG Workflow
+
+The peptide results use Metagenome-assembled genomes (MAGs) as the reference database for protein searches, e.g., MetaLab-MAG, MetaLab-DIA and other workflows wich using MAG databases like MGnify or customized MAGs Database.
+
+- Annotate the peptide to the Operational Taxa-Functions (OTF) Table before analysis using the Peptide Annotator.
+
+ ![peptide2taxafunc](./MetaX_Cookbook.assets/peptide2taxafunc.png)
+
+ **Required:**
+
+ - **Database**: The database created by [Database Builder](##Module-2.-Database-Builder)
+
+ - **Peptide Table**:
+
+ - *Option 1*: From MetaLab-MAG results (final_peptides.tsv)
+
+ - *Option 2*: Create it manually, with the first column as the ID (e.g., peptide sequence) and the second column as the proteins ID of MGnify (e.g., MGYG000003683_00301; MGYG000001490_01143) or your database, and other columns as the intensity of each sample.
+
+ **Example:**
+
+ | Sequence | Proteins | Intensity_V1_01 | Intensity_V1_02 | Intensity_V1_03 | Intensity_V1_04 |
+ | ----------------------------------- | ------------------------------------------------------------ | --------------- | --------------- | --------------- | --------------- |
+ | (Acetyl)KGGVEPQSETVWR | MGYG000002716_01681;MGYG000000195_00452;MGYG000001616_00519;MGYG000002258_01582;MGYG000001300_00281;MGYG000002926_00231;... | 714650 | 0 | 0 | 0 |
+ | (Acetyl)KVIPELNGK | MGYG000003589_01892;MGYG000001560_01812;MGYG000001789_00244;... | 0 | 0 | 0 | 0 |
+ | (Acetyl)LAELGAKAVTLSGPDGYIYDPDGITTK | MGYG000001199_02893 | 0 | 0 | 0 | 0 |
+ | (Acetyl)LLTGLPDAYGR | MGYG000001757_01206;MGYG000004547_02135;MGYG000001283_00124;MGYG000004758_00803;MGYG000002486_00845;MGYG000000271_01269 | 0 | 307519 | 0 | 0 |
+ | (Acetyl)MDFTLDKK | MGYG000000076_01275;MGYG000003694_00879;MGYG000000312_02425;MGYG000000271_02102;MGYG000004271_00233;MGYG000002517_00542;MGYG000000489_01025 | 306231 | 0 | 0 | 1214497 |
+
+ - **Output Save Path**: The location to save the result table.
+
+ - **LCA Threshold**: Find the LCA with the proportion threshold for each peptide. The default is 1.00 (100%).
+
+ ![LCA_prop](./MetaX_Cookbook.assets/LCA_prop.png)
+
+### 2. Results from MaxQuant Workflow
+
+The peptide results from **MetaLab 2.3** MaxQuant workflow.
+
+- Select the **MetaLab** result folder, which contains the **maxquant_search** folder.
+
+ ![peptide2taxafunc_tab2_1](MetaX_Cookbook.assets/peptide2taxafunc_tab2_1.png)
+
+- The **Peptide Annotator** will automatically find the **peptides_report.txt**, **BuiltIn.pepTaxa.csv**, and **functions.tsv** in the **maxquant_search** folder. Alternatively, you can select the files manually.
+
+ - Select **OTFs Save To** to set the location to save the result table.
+
+ ![peptide2taxafunc_tab2_2](MetaX_Cookbook.assets/peptide2taxafunc_tab2_2.png)
+
+
+
+
+
# Developer Tools
diff --git a/metax/taxafunc_ploter/heatmap_plot.py b/metax/taxafunc_ploter/heatmap_plot.py
index 79d1d14..5190664 100644
--- a/metax/taxafunc_ploter/heatmap_plot.py
+++ b/metax/taxafunc_ploter/heatmap_plot.py
@@ -93,6 +93,7 @@ def plot_top_taxa_func_heatmap_of_test_res(self, df, top_number:int|str= 100,
cmap = type_map.get(value_type, "None")[1] if cmap is None else cmap
+ value_col_name = type_map.get(value_type, "None")[0]
@@ -110,7 +111,7 @@ def plot_top_taxa_func_heatmap_of_test_res(self, df, top_number:int|str= 100,
if rename_taxa:
df_top['Taxon'] = df_top['Taxon'].apply(lambda x: x.split('|')[-1])
# df_top = self.rename_taxa(df_top)
- df_top = df_top.pivot(index=func_name, columns='Taxon', values=p_type)
+ df_top = df_top.pivot(index=func_name, columns='Taxon', values=value_col_name)
print(f"Top [{top_number}] significant: Taxa ({df_top.shape[1]}), Functions ({df_top.shape[0]})")
df_plot = df_top.fillna(1) if plot_type in ['pvalue', 'padj'] else df_top.fillna(0)
@@ -154,11 +155,11 @@ def plot_top_taxa_func_heatmap_of_test_res(self, df, top_number:int|str= 100,
fig.ax_heatmap.set_xlabel("Taxa")
fig.ax_heatmap.set_ylabel("Functions")
- scale_title = f"scaled by {scale}" if scale in ['row', 'column', 'all'] else ''
+ scale_title = f", scaled by {scale}" if scale in ['row', 'column', 'all'] else ''
if title == "":
- title = f"Significant Differences in Taxa-Function (Top {top_number} sorted by {plot_type}, filtered by {p_type}, {scale_title})"
+ title = f"Significant Differences in Taxa-Function (Top {top_number} sorted by {plot_type}, filtered by {p_type}{scale_title})"
else:
- title = f"{title} (Top {top_number} sorted by {plot_type}, filtered by {p_type}, {scale_title})"
+ title = f"{title} (Top {top_number} sorted by {plot_type}, filtered by {p_type}{scale_title})"
plt.suptitle(title)
@@ -321,9 +322,9 @@ def plot_basic_heatmap_of_test_res(self, df, top_number:int = 100, value_type:st
va = self.get_y_labels_va()
)
- scale_title = f"scaled by {scale}" if scale in ['row', 'column', 'all'] else ''
+ scale_title = f", scaled by {scale}" if scale in ['row', 'column', 'all'] else ''
plt.suptitle(
- f"The intensity of Significant differences (top {len(mat)} sorted by {sort_by.split('(')[0]}, filtered by {p_type}, {scale_title})"
+ f"The intensity of Significant differences (top {len(mat)} sorted by {sort_by.split('(')[0]}, filtered by {p_type}{scale_title})"
)
cbar = fig.ax_heatmap.collections[0].colorbar
cbar.set_label("Intensity", rotation=90, labelpad=1)
diff --git a/metax/utils/version.py b/metax/utils/version.py
index f4be0ce..d936c4f 100644
--- a/metax/utils/version.py
+++ b/metax/utils/version.py
@@ -1,2 +1,2 @@
-__version__ = '1.116.0'
+__version__ = '1.116.1'
API_version = '3'
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index 3fa8034..209f360 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "MetaXTools"
-version = "1.116.0"
+version = "1.116.1"
description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics."
readme = "README_PyPi.md"
license = { text = "NorthOmics" }