galaxyproject · ber58 · Jun 14, 2024 · Jun 14, 2024 · Jun 17, 2024 · Jun 17, 2024
diff --git a/tools/cami-opal/.shed.yml b/tools/cami-opal/.shed.yml
@@ -0,0 +1,12 @@
+name: cami_opal
+owner: iuc
+description: Evaluation package for metagenome binning and taxonomic assignments
+homepage_url: https://github.com/CAMI-challenge/OPAL
+long_description: |
+  OPAL is an evaluation package for metagenome binning and taxonomic assignments.
+  It provides performance metrics, results rankings, and comparative visualizations
+  for assessing multiple programs or parameter effects.
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/opal/
+type: unrestricted
+categories: 
+- Metagenomics
diff --git a/tools/cami-opal/cami_opal.xml b/tools/cami-opal/cami_opal.xml
@@ -0,0 +1,272 @@
+<tool id="cami_opal" name="CAMI OPAL" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Evaluation package for metagenome benchmark datasets</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code">
+        <![CDATA[
+            ## Set environment variable to ignore specific Python warnings
+            export PYTHONWARNINGS="ignore::FutureWarning" &&
+
+            #set $htmloutputfolder = $html.files_path
+
+            ## Define the path for the HTML output folder
+            mkdir -p '$htmloutputfolder' inputs &&
+
+            ## Create symbolic links for input files in the 'inputs' directory
+            #for $i, $file in enumerate($input_files):
+                ln -s '$file.file' 'inputs/$file.file.element_identifier' &&
+            #end for
+
+            ## Call the opal.py script with the appropriate arguments
+            opal.py 
+            -g '${gold_standard_file}' 
+
+            #for $i, $file in enumerate($input_files):
+                'inputs/$file.file.element_identifier'
+            #end for
+
+            #if $normalize:
+                -n 
+            #end if
+            #if $filter:
+                -f '${filter}' 
+            #end if
+            #if $plot_abundances:
+                -p 
+            #end if
+            #if $labels:
+                -l '${labels}' 
+            #end if
+            #if $time:
+                -t '${time}' 
+            #end if
+            #if $memory:
+                -m '${memory}' 
+            #end if
+            #if $desc:
+                -d '${desc}' 
+            #end if
+            #if $ranks:
+                -r '${ranks}' 
+            #end if
+            #if $metrics_plot_rel:
+                --metrics_plot_rel '${metrics_plot_rel}' 
+            #end if
+            #if $metrics_plot_abs:
+                --metrics_plot_abs '${metrics_plot_abs}' 
+            #end if
+            #if $branch_length_function:
+                -b '${branch_length_function}' 
+            #end if
+            #if $normalized_unifrac:
+                --normalized_unifrac 
+            #end if
+            -o output
+
+            ## Create an HTML output file
+            #if $html_output == 'true'
+            ## Copy the results to the specified output folder and suppress error outputs
+                && cp output/results.html '${html}' 2> /dev/null 
+                && cp -r output/* '$htmloutputfolder'
+            #end if
+
+            ## Create an output archive if requested
+            #if $tar_output == 'true'
+                && tar -czf output.tar.gz '$htmloutputfolder'
+            #end if
+        ]]>
+    </command>
+
+
+    <inputs>
+        <param name="gold_standard_file" format="tabular" type="data" label="Gold standard file" 
+            help="Input the gold standard file here. Recommended format: CAMI Profiling Bioboxes or BIOM format." />
+        <repeat name="input_files" title="Input files"
+            help="Enter multiple input files. Recommended format: CAMI Profiling Bioboxes or BIOM format.">
+            <param name="file" format="tabular" type="data" label="Input file" />
+        </repeat>
+        <param name="tar_output" type="boolean" label="Output in tar.gz format" help="Choose whether to tar.gz all outputs into a single file." checked="false" />
+        <param name="html_output" type="boolean" label="Output in HTML format" help="Select this option to generate an HTML file that contains the analysis results." truevalue="true" checked="true" />
+        <param name="normalize" type="boolean" optional="true"
+            label="Normalize samples" />
+        <param argument="--filter" type="integer" value="" optional="true"
+            label="Filter out the smallest relative abundances summing up to FILTER% within a rank"
+            help="Filter out the predictions with the smallest relative abundances summing up to FILTER% within a rank" />
+        <param name="plot_abundances" type="boolean" optional="true"
+            label="Plot abundances in the gold standard"
+            help="Plot abundances in the gold standard (can take some minutes)" />
+        <param name="labels" type="text" value="" optional="true"
+            label="Comma-separated profiles names"
+            help="Comma-separated profiles names" />
+        <param argument="--time" type="text" value="" optional="true"
+            label="Comma-separated runtimes in hours"
+            help="Comma-separated runtimes in hours" />
+        <param argument="--memory" type="text" value="" optional="true"
+            label="Comma-separated memory usages in gigabytes"
+            help="Comma-separated memory usages in gigabytes" />
+        <param argument="--desc" type="text" value=""
+            label="HTML description"
+            help="Enter the HTML page description here" />
+        <param argument="--ranks" type="text" value="" optional="true"
+            label="Highest and lowest taxonomic ranks to consider in performance rankings"
+            help="Valid ranks: superkingdom, phylum, class, order, family, genus, species, strain" />
+        <param argument="--metrics_plot_rel" type="text" value="" optional="true"
+            label="Metrics for spider plot of relative performances"
+            help="Valid metrics: w:weighted Unifrac, l:L1 norm, c:completeness, p:purity, f:false positives, t:true positives" />
+        <param argument="--metrics_plot_abs" type="text" value="" optional="true"
+            label="Metrics for spider plot of absolute performances"
+            help="Valid metrics: c:completeness, p:purity, b:Bray-Curtis" />
+        <param argument="--branch_length_function" type="text" value="" optional="true"
+            label="UniFrac tree branch length function"
+            help="Default: 'lambda x: 1/x', where x=tree depth" />
+        <param name="normalized_unifrac" type="boolean" optional="true"
+            label="Compute normalized version of weighted UniFrac"
+            help="Compute normalized version of weighted UniFrac by dividing by the theoretical max unweighted UniFrac" />
+    </inputs>
+    <outputs>
+        <data name="archive_output" format="tar" from_work_dir="output.tar.gz" label="${tool.name} on ${on_string}: tar.gz of the complete output">
+            <filter>tar_output</filter>
+        </data>        
+        <data format="html" name="html" label="${tool.name} on ${on_string}: HTML report" >
+            <filter>html_output</filter>
+        </data>
+        <data name="result" format="tabular" from_work_dir="output/results.tsv" label="${tool.name} on ${on_string}: Results" />
+    </outputs>
+    <tests>
+        <!-- Test basic functionality with one input file and default parameters -->
+        <test expect_num_outputs="1">
+            <param name="gold_standard_file" value="goldstandard_low_1.bin" />
+            <repeat name="input_files">
+                <param name="file" value="evil_darwin_13" />
+            </repeat>
+            <repeat name="input_files">
+                <param name="file" value="jolly_pasteur_3" />
+            </repeat>
+            <param name="html_output" value="false" />
+            <param name="normalize" value="false"/>
+            <param name="labels" value="Profile1, Profile2" />
+            <output name="result" ftype="tabular">
+                <assert_contents>
+                    <has_text text="Profile1"/>
+                    <has_text text="Profile2"/>
+                </assert_contents> 
+            </output>   
+        </test>
+
+        <!-- Test with HTML output enabled -->
+        <test expect_num_outputs="2">
+            <param name="gold_standard_file" value="goldstandard_low_1.bin" />
+            <repeat name="input_files">
+                <param name="file" value="evil_darwin_13" />
+            </repeat>
+            <repeat name="input_files">
+                <param name="file" value="jolly_pasteur_3" />
+            </repeat>
+            <param name="desc" value="Test description for OPAL"/>
+            <param name="html_output" value="true"/>
+            <output name="html" ftype="html">
+                <assert_contents>
+                    <has_text text="Test description for OPAL" />
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- Test with all parameters enabled -->
+        <test expect_num_outputs="3">
+            <param name="gold_standard_file" value="goldstandard_low_1.bin" />
+            <repeat name="input_files">
+                <param name="file" value="evil_darwin_13" />
+            </repeat>
+            <repeat name="input_files">
+                <param name="file" value="jolly_pasteur_3" />
+            </repeat>
+            <param name="normalize" value="true"/>
+            <param name="filter" value="5"/>
+            <param name="plot_abundances" value="true"/>
+            <param name="labels" value="Sample1,Sample2"/>
+            <param name="desc" value="Detailed test description for OPAL"/>
+            <param name="ranks" value="superkingdom,species"/>
+            <param name="metrics_plot_rel" value="w,l,c,p,f,t"/>
+            <param name="metrics_plot_abs" value="c,p,b"/>
+            <param name="branch_length_function" value="lambda x: 1/x"/>
+            <param name="normalized_unifrac" value="true"/>
+            <param name="html_output" value="true"/>
+            <param name="tar_output" value="true"/>
+            <output name="html" ftype="html">
+                <assert_contents>
+                    <has_text text="Detailed test description for OPAL" />
+                </assert_contents>
+            </output>
+            <!-- check created tar -->
+            <output name="archive_output">
+                <assert_contents>
+                    <has_archive_member path=".*\/results\.tsv" />
+                    <has_archive_member path=".*\/results\.html" />
+                </assert_contents>
+            </output>
+            <output name="result" ftype="tabular">
+                <assert_contents>
+                    <has_text text="Sample1" />
+                    <has_text text="Sample2" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>    
+    <help>
+    <![CDATA[
+        .. class:: infomark
+
+        **What is OPAL**
+
+        OPAL is an evaluation package for the comparative assessment of metagenome benchmark datasets. It calculates multiple metrics per dataset and provides results rankings and visualizations for assessing multiple programs or parameter effects.
+
+        **What it does**
+
+        OPAL performs the following key tasks:
+        - Evaluates profiles using a gold standard file.
+        - Generates multiple metrics for each profile.
+        - Provides comparative visualizations and performance rankings.
+
+        For more information, please visit `OPAL on GitHub <https://github.com/CAMI-challenge/OPAL>`_.
+
+        **Input**
+
+        OPAL requires the following inputs:
+
+        1. **Gold Standard File**
+            - This file is essential for the evaluation and should be in BIOM format.
+
+        2. **Profiles Files**
+            - Multiple profiles files are required for evaluation. These files should be in CAMI Profiling Bioboxes format or BIOM format.
+
+        **Optional Arguments**
+
+        - **Normalization**: Normalize the samples.
+        - **Filter**: Filter out the predictions with the smallest relative abundances summing up to the specified percentage within a rank.
+        - **Plot Abundances**: Generate plots for abundances in the gold standard (can take some minutes).
+        - **Labels**: Comma-separated names for the profiles.
+        - **Runtime**: Comma-separated runtimes in hours for each profile.
+        - **Memory Usage**: Comma-separated memory usages in gigabytes for each profile.
+        - **Description**: Description for the HTML page output.
+        - **Ranks**: Comma-separated highest and lowest taxonomic ranks to consider in performance rankings. Valid ranks include superkingdom, phylum, class, order, family, genus, species, and strain.
+        - **Metrics for Relative Performance Plot**: Comma-separated metrics for the spider plot of relative performances. Valid metrics include weighted Unifrac (w), L1 norm (l), completeness (c), purity (p), false positives (f), and true positives (t).
+        - **Metrics for Absolute Performance Plot**: Comma-separated metrics for the spider plot of absolute performances. Valid metrics include completeness (c), purity (p), and Bray-Curtis (b).
+        - **UniFrac Tree Branch Length Function**: Function for computing UniFrac tree branch length. Default is "lambda x: 1/x", where x is the tree depth.
+        - **Normalized UniFrac**: Compute normalized version of weighted UniFrac by dividing by the theoretical max unweighted UniFrac.
+
+        **Outputs**
+
+        OPAL generates the following outputs:
+
+        1. **HTML Report**: An HTML file containing visualizations and summary of the evaluation.
+        2. **Results File**: A TSV file with detailed evaluation metrics for each profile.
+
+        **Additional Information**
+
+        OPAL requires specific Python packages to run. Ensure all dependencies are installed. For more information, refer to the `OPAL GitHub page <https://github.com/CAMI-challenge/OPAL>`_.
+    ]]>
+    </help>
+    <expand macro="citations" />
+</tool>
diff --git a/tools/cami-opal/macros.xml b/tools/cami-opal/macros.xml
@@ -0,0 +1,17 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">cami-opal</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@TOOL_VERSION@">1.0.12</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.05</token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41592-022-01431-4</citation>
+            <yield/>
+        </citations>
+    </xml>
+</macros>