Metabologram fix (galaxyproject#6210)

* update(metabologram): add user output option 'show_values' to write the values in the plots or not * metabologram: deleted metabolites selector as is not pertinent * input explanation: only abundances and mean enrichment accepted; improved Help text * metabologram: lint error solved, also more improvement of Help * update macros dimet: citation and suffix version; also for differential analysis improved Help * Update tools/dimet/macros.xml --------- Co-authored-by: Björn Grüning <[email protected]>
pavanvidem · Aug 24, 2024 · b34f41d · b34f41d
1 parent e56f844
commit b34f41d
Show file tree

Hide file tree

Showing 5 changed files with 376 additions and 882 deletions.
diff --git a/tools/dimet/dimet_differential_analysis.xml b/tools/dimet/dimet_differential_analysis.xml
@@ -266,11 +266,11 @@ compartments names are, the longer the output files' names! Please pick short an
 
 You can precise how you want your analysis to be executed, with the parameters:
 
-- **conditions**: the conditions present in your data, to perform the pairwise comparison.
+- **datatypes** : the measures type(s) that you want to run (see above in Input data files section)
 
-- **comparisons** : the pairs of [condition, timepoint] groups to compare
+- **conditions**: the two 'Conditions' boxes must be filled in a coherent order, keeping in mind that the last specified condition is the reference or control.
 
-- **datatypes** : the measures type(s) that you want to run (see above in Input data files section)
+- **timepoint** : the time point at which the two conditions will be compared.
 
 - **statistical_test** : choose, by type of measure, the specific statistical test to be applied.
 

diff --git a/tools/dimet/dimet_metabologram.xml b/tools/dimet/dimet_metabologram.xml
@@ -32,7 +32,7 @@
             colors_divergent_palette: ['royalblue', 'white', 'red'],
             edge_color: ['black','black'],
             line_width:['1','1.2'],
-            display_label_and_value: true,
+            display_label_and_value: ${output_options.write_values},
             font_size: ${output_options.font_size},
             fig_width: ${output_options.fig_width},
             figure_format:${output_options.figure_format},
@@ -76,6 +76,7 @@
         <expand macro="input_parameters_metabologram"/>
         <expand macro="deg_list"/>
         <expand macro="compartments_metabologram"/>
+
         <param name="correction_method" type="select" value="bonferroni" display="radio" label="Select multiple test correction to apply" help="Please enter at max 1 method">
                 <option value="bonferroni">bonferroni</option>
                 <option value="holm-sidak">holm-sidak</option>
@@ -93,6 +94,8 @@
                 <option value="pdf">Pdf</option>
                 <option value="svg">Svg</option>
             </param>
+            <param name="write_values" type="boolean" value="false" label="Write the values alongside the metabolites and genes names (e.g. L-Alanine: -0.44)"
+                   help="Default value is false."/>
             <param name="fig_width" type="integer" min="5" max="20" value="7" label="width of figures"
                    help="Default value is 7."/>
             <param name="fig_height" type="integer" min="5" max="20" value="7" label="heigt of each figure"
@@ -115,7 +118,6 @@
             <param name="path_kegg_transcripts" ftype="tabular" value="pathways_kegg_transcripts.csv"/>
             <param name="abundance_file" ftype="tabular" value="rawAbundances.csv"/>
             <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/>
-            <param name="metabolites_list" value="Fumaric_acid,Glycine,L-Proline"/>
             <param name="statistical_test_type" value="parametric"/>
             <param name="stat_test" value="Tt"/>
             <repeat name="deg_list">
@@ -124,21 +126,22 @@
                 <param name="valuecol" ftype="integer" value="3"/>
                 <param name="timepoint" value='T0'/>
                 <repeat name="factor_list">
-                    <param name="condition" value="Control"/>
+                    <param name="condition" value="L-Cycloserine"/>
                 </repeat>
                 <repeat name="factor_list">
-                    <param name="condition" value="L-Cycloserine"/>
+                    <param name="condition" value="Control"/>
                 </repeat>
             </repeat>
             <section name="output_options">
+                <param name="show_values" value="false"/>
                 <param name="figure_format" value="svg"/>
                 <param name="figure_width" value="7"/>
                 <param name="figure_height" value="7"/>
                 <param name="font_size" value="12"/>
             </section>
             <output_collection name="report" type="list" count="3">
-                <element file="AMINOACIDS-Control-T0-L-Cycloserine-T0--DEG_comparison1-abundances-cell.svg" name="AMINOACIDS-Control-T0-L-Cycloserine-T0--DEG_comparison1-abundances-cell" ftype="svg" compare="sim_size" delta="100"/>
-                <element file="CENTRAL_CARBON_METABOLISM-Control-T0-L-Cycloserine-T0--DEG_comparison1-abundances-cell.svg" name="CENTRAL_CARBON_METABOLISM-Control-T0-L-Cycloserine-T0--DEG_comparison1-abundances-cell" ftype="svg" compare="sim_size" delta="100"/>
+                <element file="AMINOACIDS-L-Cycloserine-T0-Control-T0--DEG_comparison1-abundances-cell.svg" name="AMINOACIDS-L-Cycloserine-T0-Control-T0--DEG_comparison1-abundances-cell" ftype="svg" compare="sim_size" delta="100"/>
+                <element file="CENTRAL_CARBON_METABOLISM-L-Cycloserine-T0-Control-T0--DEG_comparison1-abundances-cell.svg" name="CENTRAL_CARBON_METABOLISM-L-Cycloserine-T0-Control-T0--DEG_comparison1-abundances-cell" ftype="svg" compare="sim_size" delta="100"/>
                 <element file="legend-abundances-cell.svg" name="legend-abundances-cell" ftype="svg" compare="sim_size" delta="100"/>
             </output_collection>
         </test>
@@ -171,7 +174,8 @@ This tool requires the following tab-delimited .csv files:
 
 2. **The transcriptomics data**:
 
-  2.1 The table with the results of the differential expression analysis (performed with an external tool)
+  2.1 The table with the results of the differential expression analysis (performed with an external tool). Please provide the file with the results of the differential expression analysis of your transcriptomics data. It is recommended to use only the statistically significant **Differentially Expressed Genes (DEG)**.  We provide examples in the zenodo (see section **Available data for testing**)
+
 
 
 3. **The pathways files**:
@@ -296,16 +300,17 @@ compartments names are, the longer the output files' names! Please pick short an
 
 **Running the analysis**
 
-You can precise how you want your analysis to be executed, with the parameters:
-
+You can precise how you want your analysis to be executed with the parameters, which are of two types:
 
-a. Parameters proper to the metabolomics analysis (that runs automatically before the integration):
+**a. Parameters proper to the metabolome differential analysis (that runs automatically before the integration)**:
 
-- **comparisons** : the pairs of [condition, timepoint] groups to compare
+- **Conditions and timepoint**:
 
-- **datatypes** : the measures type that you want to run, that must be only one (see above in **Input data files** section)
+For each comparison to run on the metabolomics data, you can define the Conditions to compare, at a chosen time point, through the box 'Deregulated gene set': follow the instructions and choose the correct order of Conditions (the last must be the reference or control).
+To add a second comparison, click the 'Insert Deregulated gene set' button.
+You can insert and define as many boxes ('Deregulated gene set') as comparisons you want to perform.
 
-- **statistical_test** : choose the specific statistical test to be applied.
+- **statistical_test**: choose the specific statistical test to be applied.
 
  Kruskal-Wallis, Mann-Whitney, Wilcoxon’s signed rank test, Wilcoxon’s rank sum test
  t-test, and permutation test are currently offered (we use the trusted functions from scipy library https://docs.scipy.org/doc/scipy/reference/stats.html).
@@ -318,19 +323,24 @@ considering a minimal acceptable "separation", and therefore, to be suitable for
 whereas if 'distance/span' < 0 there is no separation.
 To use with caution in case of important dispersion of your intra-group values. Default is -0.3 (not stringent)
 
-- **correction_method** : one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html).
+- **correction_method**: one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html).
 
-- **compartment** : one of the compartments present in your data.
+- **compartment**: one of the compartments present in your data.
 
-b. Parameters proper to the integration (that runs automatically after the metabolites analysis):
+**b. Parameters proper to the integration with transcriptomics (that runs automatically after the metabolome differential analysis)**:
 
+- **Deregulated genes set file**: Corresponds to the transcriptomics data, as explained in **Input data files** section.
 
-- **transcripts** : the file(s) with the results of the differential expression analysis. They must be as many as the number of comparisons (metabolomics analysis) and keep a coherent order with them.
+For each 'Deregulated gene set' box, a single **Differentially Expressed Genes (DEG)** file must be added, and must match with the metabolomics comparison set in the same box. Additional files can be added with the 'Insert deregulated gene set' button,
+see also the subsection 'Conditions and timepoint' above.
 
 - **pathways** : files for the pathways, as explained in **Input data files** section
 
 
-There exist hints on use that will guide you, next to the parameters.
+Overall, the metabologram module provides hints on use that will guide you, in the same menus and boxes of the parameters.
+
+
+**Output**
 
 The output consists of one figure by metabologram,  and a color key bar legend valid for all metabolograms produced
 

diff --git a/tools/dimet/macros.xml b/tools/dimet/macros.xml
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">0.2.4</token>
-    <token name="@VERSION_SUFFIX@">3</token>
+    <token name="@VERSION_SUFFIX@">4</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">dimet</requirement>
@@ -49,14 +49,14 @@
     </xml>
     <xml name="citations">
         <citations>
+            <citation type="doi">10.1093/bioinformatics/btae282</citation>
             <citation type="bibtex">
                 @software{Galvis_Rodriguez_DIMet,
                     author = {Galvis Rodriguez, Johanna  and Guyon, Joris and Dartigues, Benjamin and Specque, Florian and Daubon, Thomas and Karkar, Slim and Nikolski, Macha},
                     license = {MIT},
                     title = {{DIMet}},
                     url = {https://github.com/cbib/DIMet}
                     }
-
             </citation>
         </citations>
 
@@ -95,58 +95,23 @@
     </xml>
     <xml name="input_parameters_metabologram">
         <conditional name="data_input">
-            <param name="data_input_selector" type="select" label="Abundance, Enrichment or Isotopologues quantification files" help="Select between raw abundance and mean enrichment files">
+            <param name="data_input_selector" type="select" label="Abundance or Enrichment quantification files" help="Select between metabolite total abundances and mean enrichment files">
                 <option value="abundance" selected="True">abundance</option>
                 <option value="mean_enrichment">mean_enrichment</option>
             </param>
             <when value="abundance">
                 <expand macro="abundance_file_macros"/>
-                <param name="metabolites_list" type="select" optional="false" multiple="true"
-                       label="Select Metabolite(s) for condition 1 to plot (1 Min.). You have to load a abundance file prior to have access to metabolite list">
-                    <validator type="length" min="1" message="Please enter at max 2 compartments"/>
-                    <options from_dataset="abundance_file">
-                        <column name="metabolite_or_isotopologue" index="0"/>
-                        <column name="value" index="0"/>
-                        <filter type="unique_value" name="metabolite_or_isotopologue" column="0"/>
-                        <filter type="remove_value" value="metabolite_or_isotopologue"/>
-                    </options>
-                    <sanitizer>
-                        <valid initial="default">
-                            <add preset="string.printable"/>
-                            <add value="\t"/>
-                            <remove value="&quot;"/>
-                            <remove value="&apos;"/>
-                        </valid>
-                    </sanitizer>
-                </param>
                 <expand macro="statistical_test"/>
+
             </when>
             <when value="mean_enrichment">
                 <expand macro="enrichment_file_macros"/>
-                <param name="metabolites_list" type="select" optional="false" multiple="true"
-                       label="Select Metabolite(s) for condition 1 to plot (1 Min.). You have to load a abundance file prior to have access to metabolite list">
-                    <validator type="length" min="1" message="Please enter at max 2 compartments"/>
-                    <options from_dataset="me_or_frac_contrib_file">
-                        <column name="metabolite_or_isotopologue" index="0"/>
-                        <column name="value" index="0"/>
-                        <filter type="unique_value" name="metabolite_or_isotopologue" column="0"/>
-                        <filter type="remove_value" value="metabolite_or_isotopologue"/>
-                    </options>
-                    <sanitizer>
-                        <valid initial="default">
-                            <add preset="string.printable"/>
-                            <add value="\t"/>
-                            <remove value="&quot;"/>
-                            <remove value="&apos;"/>
-                        </valid>
-                    </sanitizer>
-                </param>
                 <expand macro="statistical_test"/>
             </when>
         </conditional>
+        <expand macro="metadata_file_macros"/>
         <param name="path_kegg_metabolites" type="data" format="tabular" label="Pathways kegg metabolites file" help="A file with the pathways and respective metabolites ID, that must match with those in your metabolomics data. The names of the columns must be the pathways' names, see the minimal data example downloaded from zenodo as explained above. (see help below for more details)"/>
         <param name="path_kegg_transcripts" type="data" format="tabular" label="Pathways kegg transcripts file" help="A file with the pathways and respective gene symbols, which must match with those present in the transcriptomics data. The names of the columns must be the pathways' names, see the minimal data example downloaded from zenodo as explained above. (see help below for more details)"/>
-        <expand macro="metadata_file_macros"/>
     </xml>
     <xml name="input_parameters_bivar_analysis">
         <conditional name="data_input">