galaxyproject · bgruening · Apr 29, 2024 · Mar 27, 2024 · Apr 17, 2024 · Apr 19, 2024
diff --git a/tools/maaslin2/maaslin2.xml b/tools/maaslin2/maaslin2.xml
@@ -7,6 +7,34 @@
     <expand macro="xrefs"/>
     <expand macro="requirements"/>
     <command detect_errors="exit_code"><![CDATA[
+
+## get column names of fixed and random effect from the input file, since galaxy 
+## can only return indices with type="data_column" 
+## using awk so that the file is only parsed on command line execution
+
+#if $fixed_effects
+#set idx = []
+#for $i in $fixed_effects:
+    #silent idx.append(f'${i}')
+#end for
+#set idx_for_awk = ','.join(idx)
+
+fixed_effects=`awk -v OFS=',' -F"\t" 'NR == 1 { print $idx_for_awk}' '$input_metadata'` &&
+echo 'Assigned fixed effects as:' \$fixed_effects &&
+#end if
+
+
+#if $random_effects
+#set idx = []
+#for $i in $random_effects:
+    #silent idx.append(f'${i}')
+#end for
+#set idx_for_awk = ','.join(idx)
+
+random_effects=`awk -v OFS=',' -F"\t" 'NR == 1 { print $idx_for_awk}' '$input_metadata'` &&
+echo 'Assigned random effects as:' \$random_effects &&
+#end if
+
 ln -s '$input_data' 'input_data.tsv'
 &&
 ln -s '$input_metadata' 'input_metadata.tsv'
@@ -31,10 +59,10 @@ Maaslin2.R
     --analysis_method '$additional_options.analysis_method'
 #end if
 #if $random_effects
-    --random_effects '$random_effects'
+    --random_effects \$random_effects
 #end if
 #if $fixed_effects
-    --fixed_effects '$fixed_effects'
+    --fixed_effects \$fixed_effects
 #end if
 #if $additional_options.correction
     --correction '$additional_options.correction'
@@ -51,19 +79,17 @@ Maaslin2.R
     'outputFolder'
 &&
 cd outputFolder && mkdir -p figures/ && cp *.pdf figures
+
     ]]></command>
     <inputs>
         <param name="input_data" type="data" format="tabular" label="Data (or features) file"/>
         <param name="input_metadata" type="data" format="tabular" label="Metadata file"/>
-        <param argument="--fixed_effects" type="select" multiple="true" optional="true" label="Interactions: Fixed effects" help="The fixed effects for the model, comma-delimited for multiple effects">
-            <option value="diagnosis" selected="true">diagnosis</option>
-            <option value="dysbiosisnonIBD" selected="true">dysbiosisnonIBD</option>
-            <option value="dysbiosisUC" selected="true">dysbiosisUC</option>
-            <option value="dysbiosisCD" selected="true">dysbiosisCD</option>
-            <option value="antibiotics" selected="true">antibiotics</option>
-            <option value="age" selected="true">age</option>
-        </param>        
-        <param argument="--random_effects" type="text" multiple="true" optional="true" label="Random effects" help="The random effects for the model,  comma-delimited for multiple effects"/>        
+        <param argument="--fixed_effects" type="data_column" data_ref="input_metadata" use_header_names="true"  multiple="true" optional="true" label="Interactions: Fixed effects" help="The fixed effects for the model, comma-delimited for multiple effects, Default value: All " />
+
+        <param argument="--random_effects" type="data_column" data_ref="input_metadata" use_header_names="true" multiple="true" optional="true" label="Random effects" help="The random effects for the model,  comma-delimited for multiple effects, Default: None" />
+
+
+
         <section name="additional_options" title="Additional Options" expanded="true">
             <param argument="--min_abundance" type="float" value="0.0" optional="true" label="Minimum abundance" help="The minimum abundance for each feature"/>
             <param argument="--min_prevalence" type="float" value="0.1" optional="true" label="Minimum prevalence" help="The minimum percent of samples for which a feature is detected at minimum abundance"/>
@@ -87,7 +113,11 @@ cd outputFolder && mkdir -p figures/ && cp *.pdf figures
                 <option value="NEGBIN">NEGBIN</option>
                 <option value="ZINB">ZINB</option>
             </param>
-            <param argument="--correction" type="text" value="BH" optional="true" label="Correction" help="The correction method for computing  the q-value"/>
+            <param argument="--correction" type="select" value="BH" optional="true" label="Correction" help="The correction method for computing  the q-value, Default: BH ">
+
+                <option value="BH">Benjamini-Hochberg(BH)</option>
+                <option value="BY">Benjamini-Yekutieli(BY)</option>
+            </param>
             <param argument="--standardize" type="boolean" truevalue="--standardize TRUE" falsevalue="--standardize FALSE" checked="true" label="Apply z-score so continuous metadata are on  the same scale"/>
         </section>          
         <section name="output" title="Set Plotting Output" expanded="true">
@@ -115,8 +145,8 @@ cd outputFolder && mkdir -p figures/ && cp *.pdf figures
         <test expect_num_outputs="5">
             <param name="input_data" value="HMP2_taxonomy.tsv"/>
             <param name="input_metadata" value="HMP2_metadata.tsv"/>
-            <param name="random_effects" value="site,subject"/>
-            <param name="fixed_effects" value="diagnosis,dysbiosisnonIBD,dysbiosisUC,dysbiosisCD,antibiotics,age"/>
+            <param name="random_effects" value= "2,5"/> 
+            <param name="fixed_effects" value="4,9,10,11,6,3"/> 
             <section name="additional_options">
                 <param name="min_abundance" value="0.0"/>
                 <param name="min_prevalence" value="0.1"/>
@@ -198,7 +228,7 @@ cd outputFolder && mkdir -p figures/ && cp *.pdf figures
         <test expect_num_outputs="5">
             <param name="input_data" value="HMP2_taxonomy.tsv"/>
             <param name="input_metadata" value="HMP2_metadata.tsv"/>
-            <param name="fixed_effects" value="diagnosis,dysbiosisnonIBD"/>
+            <param name="fixed_effects" value="4,9"/>
             <section name="additional_options">
                 <param name="min_abundance" value="0.0"/>
                 <param name="min_prevalence" value="0.1"/>
@@ -245,7 +275,7 @@ cd outputFolder && mkdir -p figures/ && cp *.pdf figures
         <test expect_num_outputs="5">
             <param name="input_data" value="HMP2_taxonomy.tsv"/>
             <param name="input_metadata" value="HMP2_metadata.tsv"/>
-            <param name="fixed_effects" value="diagnosis,dysbiosisnonIBD"/>
+            <param name="fixed_effects" value="4,9"/>
             <section name="additional_options">
                 <param name="min_abundance" value="0.0001"/>
                 <param name="min_prevalence" value="0.1"/>
@@ -304,6 +334,100 @@ cd outputFolder && mkdir -p figures/ && cp *.pdf figures
                 </element>                                                                    
             </output_collection>
         </test>   
+        <test expect_num_outputs="5">
+            <param name="input_data" value="HMP2_taxonomy.tsv"/>
+            <param name="input_metadata" value="HMP2_metadata.tsv"/>
+            <param name="random_effects" value="3" />
+            <section name="additional_options">
+                <param name="min_abundance" value="0.0"/>
+                <param name="min_prevalence" value="0.1"/>
+                <param name="max_significance" value="0.25"/>
+                <param name="normalization" value="TSS"/>
+                <param name="transform" value="LOG"/>
+                <param name="analysis_method" value="LM"/>
+                <param name="correction" value="BY"/>
+                <param name="standardize" value="True"/>
+            </section>
+            <section name="output">
+                <param name="plot_heatmap" value="true"/>
+                <param name="heatmap_first_n" value="50"/>
+                <param name="plot_scatter" value="true"/>
+                <param name="residuals_output" value="true"/>
+            </section>
+            <output name="all_results">
+                <assert_contents>
+                    <has_text text="feature"/>
+                    <has_n_lines n="8092"/>
+                    <has_n_columns n="9"/>
+                </assert_contents>
+            </output>
+            <output name="significant_results">
+                <assert_contents>
+                    <has_text text="subject"/>
+                    <has_n_lines n="216" delta="5"/>
+                    <has_n_columns n="9"/>
+                </assert_contents>
+            </output>
+            <output name="residuals">
+                <assert_contents>
+                    <has_size value="671142" delta="1000"/>
+                </assert_contents>
+            </output>
+            <output_collection name="figures_pdfs" type="list">
+                <element name="heatmap.pdf" ftype="pdf">
+                    <assert_contents>
+                        <has_size value="7000" delta="1000" />
+                    </assert_contents>
+                </element>                                                              
+            </output_collection>
+        </test> 
+        <test expect_num_outputs="5">
+            <param name="input_data" value="HMP2_taxonomy.tsv"/>
+            <param name="input_metadata" value="HMP2_metadata.tsv"/>
+
+            <section name="additional_options">
+                <param name="min_abundance" value="0.0"/>
+                <param name="min_prevalence" value="0.1"/>
+                <param name="max_significance" value="0.25"/>
+                <param name="normalization" value="TSS"/>
+                <param name="transform" value="LOG"/>
+                <param name="analysis_method" value="LM"/>
+                <param name="correction" value="BH"/>
+                <param name="standardize" value="True"/>
+            </section>
+            <section name="output">
+                <param name="plot_heatmap" value="true"/>
+                <param name="heatmap_first_n" value="50"/>
+                <param name="plot_scatter" value="true"/>
+                <param name="residuals_output" value="true"/>
+            </section>
+            <output name="all_results">
+                <assert_contents>
+                    <has_text text="feature"/>
+                    <has_n_lines n="8092"/>
+                    <has_n_columns n="9"/>
+                </assert_contents>
+            </output>
+            <output name="significant_results">
+                <assert_contents>
+                    <has_text text="subject"/>
+                    <has_n_lines n="880"/>
+                    <has_n_columns n="9"/>
+                </assert_contents>
+            </output>
+            <output name="residuals">
+                <assert_contents>
+                    <has_size value="670759" delta="1000"/>
+                </assert_contents>
+            </output>
+            <output_collection name="figures_pdfs" type="list">
+                <element name="heatmap.pdf" ftype="pdf">
+                    <assert_contents>
+                        <has_size value="7900" delta="1000" />
+                    </assert_contents>
+                </element>                                                              
+            </output_collection>
+        </test> 
     </tests>
     <help><![CDATA[
 @HELP_HEADER@
@@ -347,6 +471,9 @@ Output
         - It only includes associations with q-values <= to the threshold.
     - Data frame with residuals for each feature (R data file)
         - This file contains a data frame with residuals for each feature.
+
+Correction methods to compute the q-value : https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/p.adjust
+
 2- Visualization output files
     - Heatmap of the significant associations (PDF file)
         - This file contains a heatmap of the significant associations.

diff --git a/tools/maaslin2/macros.xml b/tools/maaslin2/macros.xml
@@ -21,4 +21,4 @@
             <yield/>
         </requirements>
     </xml>
- </macros>
+ </macros>