From 0ddde2651753d4e12310a88e1d7806f0f4e68451 Mon Sep 17 00:00:00 2001 From: Qing <44231502+byemaxx@users.noreply.github.com> Date: Mon, 28 Oct 2024 16:06:32 -0400 Subject: [PATCH] - Fix: Fixed the bug of when plot the heatmap of taxa-funcs with t-ststistic and f-statistic, the value still selected as p-value. - Change: Updated the cookbook. --- Docs/ChangeLog.md | 7 +- Docs/MetaX_Cookbook.md | 270 +++++++++++++------------- metax/taxafunc_ploter/heatmap_plot.py | 13 +- metax/utils/version.py | 2 +- pyproject.toml | 2 +- 5 files changed, 152 insertions(+), 142 deletions(-) diff --git a/Docs/ChangeLog.md b/Docs/ChangeLog.md index c7897c3..a565514 100644 --- a/Docs/ChangeLog.md +++ b/Docs/ChangeLog.md @@ -1,9 +1,14 @@ +# Version: 1.116.1 +## Date: 2024-10-28 +### Changes: +- Fix: Fixed the bug of when plot the heatmap of taxa-funcs with t-ststistic and f-statistic, the value still selected as p-value. +- Change: Updated the cookbook. + # Version: 1.116.0 ## Date: 2024-10-18 ### Changes: - New: 1. Added Advanced Parameters for Peptide Annotator 2. Add [Fill Zero] method for handling missing values in the data preprossing part. - # Version: 1.115.5 ## Date: 2024-10-16 ### Changes: diff --git a/Docs/MetaX_Cookbook.md b/Docs/MetaX_Cookbook.md index 1808fe0..9f7b81d 100644 --- a/Docs/MetaX_Cookbook.md +++ b/Docs/MetaX_Cookbook.md @@ -13,7 +13,7 @@ MetaX also features statistical modules and plotting tools for ana # Project Page -Visit **Github** to get more information: +Visit **GitHub** to get more information: [https://github.com/byemaxx/MetaX](https://github.com/byemaxx/MetaX) @@ -35,144 +35,17 @@ Visit **Github** to get more information:
-# Preparing Your Data - -## Module 1. Database Builder - -**Note:** The results from **MetaLab v2.3** MaxQuant workflow do not require database building. However, we do not recommend using these results as input to MetaX, as many peptides may be discarded. - -- Build the database for the **first time** using the Database Builder. - - **Option 1: Build Database Using MGnify Data** - - Ensure you download the correct database type corresponding to your data. - - ![dbbuilder](./MetaX_Cookbook.assets/dbbuilder.png) - - **Option 2: Build Database Using Own Data** - - 1. **Annotation Table:** A TSV table (tab-separated), with the first column as protein name joined with Genome by "_", e.g., "Genome1_protein1", and other columns containing annotation information. - - ![dbbuilder_own](./MetaX_Cookbook.assets/dbbuilder_own.png) - - 2. **Taxa Table:** A TSV table (tab-separated), with the first column as Genome name, e.g., "Genome1", and the second column as taxa. - - **Example Annotation Table:** - - | Query | Preferred_name | EC | KEGG_ko | - | ------------------- | -------------- | ----------------- | ------------------- | - | MGYG000000001_00696 | mfd | - | ko:K03723 | - | MGYG000000001_02838 | hxlR | - | - | - | MGYG000000001_01674 | ispG | 1.17.7.1,1.17.7.3 | ko:K03526 | - | MGYG000000001_02710 | glsA | 3.5.1.2 | ko:K01425 | - | MGYG000000001_01356 | mutS2 | - | ko:K07456 | - | MGYG000000001_02630 | - | - | - | - | MGYG000000001_02418 | ackA | 2.7.2.1 | ko:K00925 | - | MGYG000000001_00728 | atpA | 3.6.3.14 | ko:K02111 | - | MGYG000000001_00695 | pth | 3.1.1.29 | ko:K01056 | - | MGYG000000001_02907 | - | - | ko:K03086 | - | MGYG000000001_02592 | rplC | - | ko:K02906 | - | MGYG000000001_00137 | - | - | ko:K03480,ko:K03488 | - - **Example Taxa Table:** - - | Genome | Lineage | - | ------------- | ------------------------------------------------------------ | - | MGYG000000001 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_GCA-900066495;s_GCA-900066495 sp902362365 | - | MGYG000000002 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Lachnospirales;f_Lachnospiraceae;g_Blautia_A;s_Blautia_A faecis | - | MGYG000000003 | d_Bacteria;p_Bacteroidota;c_Bacteroidia;o_Bacteroidales;f_Rikenellaceae;g_Alistipes;s_Alistipes shahii | - | MGYG000000004 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Oscillospirales;f_Ruminococcaceae;g_Anaerotruncus;s_Anaerotruncus colihominis | - | MGYG000000005 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_Terrisporobacter;s_Terrisporobacter glycolicus_A | - | MGYG000000006 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Staphylococcales;f_Staphylococcaceae;g_Staphylococcus;s_Staphylococcus xylosus | - | MGYG000000007 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus intestinalis | - | MGYG000000008 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus johnsonii | - | MGYG000000009 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Ligilactobacillus;s_Ligilactobacillus murinus | - -## Module 2. Database Updater - -The **Database Updater** allows updating the database built by the **Database Builder** or adding more annotations. This step is **optional**. - -- Update the built database and extend annotations. - - ![db_updater](./MetaX_Cookbook.assets/db_updater.png) - - **Option 1: Built-in Mode** - - We recommend some extended databases, such as [dbCAN_seq](https://bcb.unl.edu/dbCAN_seq). - - **Option 2: TSV Table** - - Extend the database by adding a new database to the database table. Ensure the column separator is a tab and the first column is the Protein name, with other columns containing function annotations. - - **Example:** - - | Protein ID | COG | KEGG | ... | - | ------------------- | ---------- | ---------- | ---- | - | MGYG000000001_02630 | Function 1 | Function 1 | ... | - | MGYG000000001_01475 | Function 2 | Function 1 | ... | - | MGYG000000001_01539 | Function 3 | Function 1 | ... | - -## Module 3. Peptide Annotator - -### 1. Results from MAG Workflow - -The peptide results use Metagenome-assembled genomes (MAGs) as the reference database for protein searches, e.g., MetaLab-MAG, MetaLab-DIA and other workflows wich using MAG databases like MGnify or customized MAGs Database. - -- Annotate the peptide to Operational Taxa-Functions (OTF) Table before analysis using the Peptide Annotator. - - ![peptide2taxafunc](./MetaX_Cookbook.assets/peptide2taxafunc.png) - - **Required:** - - - **Database**: The database created by Database Builder - - - **Peptide Table**: - - - *Option 1*: From MetaLab-MAG results (final_peptides.tsv) - - - *Option 2*: Create it manually, with the first column as the ID (e.g., peptide sequence) and the second column as the proteins ID of MGnify (e.g., MGYG000003683_00301; MGYG000001490_01143) or your database, and other columns as the intensity of each sample. - - **Example:** - - | Sequence | Proteins | Intensity_V1_01 | Intensity_V1_02 | Intensity_V1_03 | Intensity_V1_04 | - | ----------------------------------- | ------------------------------------------------------------ | --------------- | --------------- | --------------- | --------------- | - | (Acetyl)KGGVEPQSETVWR | MGYG000002716_01681;MGYG000000195_00452;MGYG000001616_00519;MGYG000002258_01582;MGYG000001300_00281;MGYG000002926_00231;... | 714650 | 0 | 0 | 0 | - | (Acetyl)KVIPELNGK | MGYG000003589_01892;MGYG000001560_01812;MGYG000001789_00244;... | 0 | 0 | 0 | 0 | - | (Acetyl)LAELGAKAVTLSGPDGYIYDPDGITTK | MGYG000001199_02893 | 0 | 0 | 0 | 0 | - | (Acetyl)LLTGLPDAYGR | MGYG000001757_01206;MGYG000004547_02135;MGYG000001283_00124;MGYG000004758_00803;MGYG000002486_00845;MGYG000000271_01269 | 0 | 307519 | 0 | 0 | - | (Acetyl)MDFTLDKK | MGYG000000076_01275;MGYG000003694_00879;MGYG000000312_02425;MGYG000000271_02102;MGYG000004271_00233;MGYG000002517_00542;MGYG000000489_01025 | 306231 | 0 | 0 | 1214497 | - - - **Output Save Path**: The location to save the result table. - - - **LCA Threshold**: Find the LCA with the proportion threshold for each peptide. The default is 1.00 (100%). - - ![LCA_prop](./MetaX_Cookbook.assets/LCA_prop.png) - -### 2. Results from MaxQuant Workflow - -The peptide results from **MetaLab 2.3** MaxQuant workflow. - -- Select the **MetaLab** result folder, which contains the **maxquant_search** folder. - - ![peptide2taxafunc_tab2_1](MetaX_Cookbook.assets/peptide2taxafunc_tab2_1.png) - -- The **Peptide Annotator** will automatically find the **peptides_report.txt**, **BuiltIn.pepTaxa.csv**, and **functions.tsv** in the **maxquant_search** folder. Alternatively, you can select the files manually. - - - Select **OTFs Save To** to set the location to save the result table. - - ![peptide2taxafunc_tab2_2](MetaX_Cookbook.assets/peptide2taxafunc_tab2_2.png) - -
- # Exploring Data with MetaX -## Module 4. OTF Analyzer +See the **[Preparing Your Data](#Preparing-Your-Data)** section to build the database and annotate peptides to OTFs before starting. + +## Module 1. OTF Analyzer -After obtaining the **Operational Taxa-Functions (OTF) Table** using the **Peptide Annotator**, you can perform downstream analysis with the **OTF Analyzer**. +After obtaining the **Operational Taxa-Functions (OTF) Table** using the **[Peptide Annotator](##Module-4.-Peptide-Annotator)**, you can perform downstream analysis with the **OTF Analyzer**. ## 1. Data Preparation -**OTFs (Operational Taxa-Functions) Table:** Obtained from the Peptide Annotator module. +**OTFs (Operational Taxa-Functions) Table:** Obtained from the [Peptide Annotator](##Module-4.-Peptide-Annotator) module. **Meta Table:** The first column is sample names, and the other columns represent different groups. If no meta table is provided, meta info will be generated automatically: (1) all samples are in the same group; (2) each sample is a separate group. @@ -784,6 +657,137 @@ We can select **meta** **groups** or **samples** (default a +# Preparing Your Data + +## Module 2. Database Builder + +**Note:** The results from **MetaLab v2.3** MaxQuant workflow do not require database building. However, we do not recommend using these results as input to MetaX, as many peptides may be discarded. + +- Build the database for the **first time** using the Database Builder. + + **Option 1: Build Database Using MGnify Data** + + Ensure you download the correct database type corresponding to your data. + + ![dbbuilder](./MetaX_Cookbook.assets/dbbuilder.png) + + **Option 2: Build Database Using Own Data** + + 1. **Annotation Table:** A TSV table (tab-separated), with the first column as protein name joined with Genome by "_", e.g., "Genome1_protein1", and other columns containing annotation information. + + ![dbbuilder_own](./MetaX_Cookbook.assets/dbbuilder_own.png) + + 2. **Taxa Table:** A TSV table (tab-separated), with the first column as Genome name, e.g., "Genome1", and the second column as taxa. + + **Example Annotation Table:** + + | Query | Preferred_name | EC | KEGG_ko | + | ------------------- | -------------- | ----------------- | ------------------- | + | MGYG000000001_00696 | mfd | - | ko:K03723 | + | MGYG000000001_02838 | hxlR | - | - | + | MGYG000000001_01674 | ispG | 1.17.7.1,1.17.7.3 | ko:K03526 | + | MGYG000000001_02710 | glsA | 3.5.1.2 | ko:K01425 | + | MGYG000000001_01356 | mutS2 | - | ko:K07456 | + | MGYG000000001_02630 | - | - | - | + | MGYG000000001_02418 | ackA | 2.7.2.1 | ko:K00925 | + | MGYG000000001_00728 | atpA | 3.6.3.14 | ko:K02111 | + | MGYG000000001_00695 | pth | 3.1.1.29 | ko:K01056 | + | MGYG000000001_02907 | - | - | ko:K03086 | + | MGYG000000001_02592 | rplC | - | ko:K02906 | + | MGYG000000001_00137 | - | - | ko:K03480,ko:K03488 | + + **Example Taxa Table:** + + | Genome | Lineage | + | ------------- | ------------------------------------------------------------ | + | MGYG000000001 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_GCA-900066495;s_GCA-900066495 sp902362365 | + | MGYG000000002 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Lachnospirales;f_Lachnospiraceae;g_Blautia_A;s_Blautia_A faecis | + | MGYG000000003 | d_Bacteria;p_Bacteroidota;c_Bacteroidia;o_Bacteroidales;f_Rikenellaceae;g_Alistipes;s_Alistipes shahii | + | MGYG000000004 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Oscillospirales;f_Ruminococcaceae;g_Anaerotruncus;s_Anaerotruncus colihominis | + | MGYG000000005 | d_Bacteria;p_Firmicutes_A;c_Clostridia;o_Peptostreptococcales;f_Peptostreptococcaceae;g_Terrisporobacter;s_Terrisporobacter glycolicus_A | + | MGYG000000006 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Staphylococcales;f_Staphylococcaceae;g_Staphylococcus;s_Staphylococcus xylosus | + | MGYG000000007 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus intestinalis | + | MGYG000000008 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Lactobacillus;s_Lactobacillus johnsonii | + | MGYG000000009 | d_Bacteria;p_Firmicutes;c_Bacilli;o_Lactobacillales;f_Lactobacillaceae;g_Ligilactobacillus;s_Ligilactobacillus murinus | + +## Module 3. Database Updater + +The **Database Updater** allows updating the database built by the **Database Builder** or adding more annotations. This step is **optional**. + +- Update the built database and extend annotations. + + ![db_updater](./MetaX_Cookbook.assets/db_updater.png) + + **Option 1: Built-in Mode** + + We recommend some extended databases, such as [dbCAN_seq](https://bcb.unl.edu/dbCAN_seq). + + **Option 2: TSV Table** + + Extend the database by adding a new database to the database table. Ensure the column separator is a tab and the first column is the Protein name, with other columns containing function annotations. + + **Example:** + + | Protein ID | COG | KEGG | ... | + | ------------------- | ---------- | ---------- | ---- | + | MGYG000000001_02630 | Function 1 | Function 1 | ... | + | MGYG000000001_01475 | Function 2 | Function 1 | ... | + | MGYG000000001_01539 | Function 3 | Function 1 | ... | + +## Module 4. Peptide Annotator + +### 1. Results from MAG Workflow + +The peptide results use Metagenome-assembled genomes (MAGs) as the reference database for protein searches, e.g., MetaLab-MAG, MetaLab-DIA and other workflows wich using MAG databases like MGnify or customized MAGs Database. + +- Annotate the peptide to the Operational Taxa-Functions (OTF) Table before analysis using the Peptide Annotator. + + ![peptide2taxafunc](./MetaX_Cookbook.assets/peptide2taxafunc.png) + + **Required:** + + - **Database**: The database created by [Database Builder](##Module-2.-Database-Builder) + + - **Peptide Table**: + + - *Option 1*: From MetaLab-MAG results (final_peptides.tsv) + + - *Option 2*: Create it manually, with the first column as the ID (e.g., peptide sequence) and the second column as the proteins ID of MGnify (e.g., MGYG000003683_00301; MGYG000001490_01143) or your database, and other columns as the intensity of each sample. + + **Example:** + + | Sequence | Proteins | Intensity_V1_01 | Intensity_V1_02 | Intensity_V1_03 | Intensity_V1_04 | + | ----------------------------------- | ------------------------------------------------------------ | --------------- | --------------- | --------------- | --------------- | + | (Acetyl)KGGVEPQSETVWR | MGYG000002716_01681;MGYG000000195_00452;MGYG000001616_00519;MGYG000002258_01582;MGYG000001300_00281;MGYG000002926_00231;... | 714650 | 0 | 0 | 0 | + | (Acetyl)KVIPELNGK | MGYG000003589_01892;MGYG000001560_01812;MGYG000001789_00244;... | 0 | 0 | 0 | 0 | + | (Acetyl)LAELGAKAVTLSGPDGYIYDPDGITTK | MGYG000001199_02893 | 0 | 0 | 0 | 0 | + | (Acetyl)LLTGLPDAYGR | MGYG000001757_01206;MGYG000004547_02135;MGYG000001283_00124;MGYG000004758_00803;MGYG000002486_00845;MGYG000000271_01269 | 0 | 307519 | 0 | 0 | + | (Acetyl)MDFTLDKK | MGYG000000076_01275;MGYG000003694_00879;MGYG000000312_02425;MGYG000000271_02102;MGYG000004271_00233;MGYG000002517_00542;MGYG000000489_01025 | 306231 | 0 | 0 | 1214497 | + + - **Output Save Path**: The location to save the result table. + + - **LCA Threshold**: Find the LCA with the proportion threshold for each peptide. The default is 1.00 (100%). + + ![LCA_prop](./MetaX_Cookbook.assets/LCA_prop.png) + +### 2. Results from MaxQuant Workflow + +The peptide results from **MetaLab 2.3** MaxQuant workflow. + +- Select the **MetaLab** result folder, which contains the **maxquant_search** folder. + + ![peptide2taxafunc_tab2_1](MetaX_Cookbook.assets/peptide2taxafunc_tab2_1.png) + +- The **Peptide Annotator** will automatically find the **peptides_report.txt**, **BuiltIn.pepTaxa.csv**, and **functions.tsv** in the **maxquant_search** folder. Alternatively, you can select the files manually. + + - Select **OTFs Save To** to set the location to save the result table. + + ![peptide2taxafunc_tab2_2](MetaX_Cookbook.assets/peptide2taxafunc_tab2_2.png) + +
+ + + # Developer Tools diff --git a/metax/taxafunc_ploter/heatmap_plot.py b/metax/taxafunc_ploter/heatmap_plot.py index 79d1d14..5190664 100644 --- a/metax/taxafunc_ploter/heatmap_plot.py +++ b/metax/taxafunc_ploter/heatmap_plot.py @@ -93,6 +93,7 @@ def plot_top_taxa_func_heatmap_of_test_res(self, df, top_number:int|str= 100, cmap = type_map.get(value_type, "None")[1] if cmap is None else cmap + value_col_name = type_map.get(value_type, "None")[0] @@ -110,7 +111,7 @@ def plot_top_taxa_func_heatmap_of_test_res(self, df, top_number:int|str= 100, if rename_taxa: df_top['Taxon'] = df_top['Taxon'].apply(lambda x: x.split('|')[-1]) # df_top = self.rename_taxa(df_top) - df_top = df_top.pivot(index=func_name, columns='Taxon', values=p_type) + df_top = df_top.pivot(index=func_name, columns='Taxon', values=value_col_name) print(f"Top [{top_number}] significant: Taxa ({df_top.shape[1]}), Functions ({df_top.shape[0]})") df_plot = df_top.fillna(1) if plot_type in ['pvalue', 'padj'] else df_top.fillna(0) @@ -154,11 +155,11 @@ def plot_top_taxa_func_heatmap_of_test_res(self, df, top_number:int|str= 100, fig.ax_heatmap.set_xlabel("Taxa") fig.ax_heatmap.set_ylabel("Functions") - scale_title = f"scaled by {scale}" if scale in ['row', 'column', 'all'] else '' + scale_title = f", scaled by {scale}" if scale in ['row', 'column', 'all'] else '' if title == "": - title = f"Significant Differences in Taxa-Function (Top {top_number} sorted by {plot_type}, filtered by {p_type}, {scale_title})" + title = f"Significant Differences in Taxa-Function (Top {top_number} sorted by {plot_type}, filtered by {p_type}{scale_title})" else: - title = f"{title} (Top {top_number} sorted by {plot_type}, filtered by {p_type}, {scale_title})" + title = f"{title} (Top {top_number} sorted by {plot_type}, filtered by {p_type}{scale_title})" plt.suptitle(title) @@ -321,9 +322,9 @@ def plot_basic_heatmap_of_test_res(self, df, top_number:int = 100, value_type:st va = self.get_y_labels_va() ) - scale_title = f"scaled by {scale}" if scale in ['row', 'column', 'all'] else '' + scale_title = f", scaled by {scale}" if scale in ['row', 'column', 'all'] else '' plt.suptitle( - f"The intensity of Significant differences (top {len(mat)} sorted by {sort_by.split('(')[0]}, filtered by {p_type}, {scale_title})" + f"The intensity of Significant differences (top {len(mat)} sorted by {sort_by.split('(')[0]}, filtered by {p_type}{scale_title})" ) cbar = fig.ax_heatmap.collections[0].colorbar cbar.set_label("Intensity", rotation=90, labelpad=1) diff --git a/metax/utils/version.py b/metax/utils/version.py index f4be0ce..d936c4f 100644 --- a/metax/utils/version.py +++ b/metax/utils/version.py @@ -1,2 +1,2 @@ -__version__ = '1.116.0' +__version__ = '1.116.1' API_version = '3' \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 3fa8034..209f360 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "MetaXTools" -version = "1.116.0" +version = "1.116.1" description = "MetaXTools is a novel tool for linking peptide sequences with taxonomic and functional information in Metaproteomics." readme = "README_PyPi.md" license = { text = "NorthOmics" }