Skip to content

Commit

Permalink
Merge pull request galaxyproject#6504 from reytakop/sfold
Browse files Browse the repository at this point in the history
Add Sfold
  • Loading branch information
bgruening authored Nov 27, 2024
2 parents ba1e44d + 3a5c3a6 commit 4145612
Show file tree
Hide file tree
Showing 6 changed files with 518 additions and 0 deletions.
11 changes: 11 additions & 0 deletions tools/sfold/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: sfold
owner: iuc
description: "Predict the probable RNA secondary structures through structure ensemble sampling"
homepage_url: https://github.com/Ding-RNA-Lab/Sfold
long_description: |
Sfold is a software package for statistical folding and rational design of nucleic acids. It provides tools for predicting
RNA secondary structures, designing antisense oligonucleotides,
and analyzing RNA folding patterns.
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/sfold
categories:
- RNA
13 changes: 13 additions & 0 deletions tools/sfold/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<macros>
<token name="@TOOL_VERSION@">2.2.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">23.0</token>
<xml name="requirements">
<requirement type="package" version="2.2">sfold</requirement>
</xml>
<xml name="citations">
<citations>
<citation type="doi">10.1093/nar/gkh449</citation>
</citations>
</xml>
</macros>
335 changes: 335 additions & 0 deletions tools/sfold/sfold.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,335 @@
<tool id="sfold" name="Sfold" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>Prediction of RNA secondary structures through structure ensemble sampling</description>
<macros>
<import>macros.xml</import>
</macros>
<requirements>
<expand macro="requirements"/>
</requirements>
<command detect_errors="exit_code"><![CDATA[
#if not $non_commercial_use
>&2 echo "this tool is only available for non commercial use";
exit 1;
#end if
mkdir -p 'output' &&
sfold '$fasta'
-a $a
#if $f:
-f '$f'
#end if
#if str($l) != '':
-l $l
#end if
#if $m:
-m '$m'
#end if
-w $w
-i $i
]]></command>
<inputs>
<param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">
<validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>
</param>
<param argument="fasta" type="data" format="fasta" label="FASTA input"/>
<param argument="-a" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Run clustering on the sampled ensemble?"/>
<param argument="-f" type="data" format="ct" optional="true" label="Constrain data" help="Constraint syntax follows what is used in mfold 3.1"/>
<param argument="-l" type="integer" min="0" optional="true" label="Maximum distance between paired bases"/>
<param argument="-m" type="data" format="ct" optional="true" label="MFE structure" help="If provided, Sfold clustering module will determine the cluster to which this structure belongs."/>
<param argument="-w" type="integer" value="20" label="Length of antisense oligos"/>
<param argument="-i" type="select" label="Module">
<option value="0">None</option>
<option value="1">Sirna</option>
<option value="2">Soligo</option>
<option value="3">Both</option>
</param>
<param name="output_selector" type="select" multiple="true" optional="true" display="checkboxes" label="Output options">
<option value="ct" selected="true">Output ct files</option>
<option value="bp" selected="true">Output bp files</option>
<option value="clusters">Output cluster files</option>
</param>
</inputs>
<outputs>
<collection name="sfold_out" type="list" label="${tool.name} on ${on_string}: sfold log files">
<discover_datasets pattern="(?P&lt;name&gt;.+)\.out$" format="txt" directory="output/"/>
</collection>
<collection name="sfold_bp" type="list" label="${tool.name} on ${on_string}: sfold bp files">
<discover_datasets pattern="(?P&lt;name&gt;.+)\.bp$" format="txt" directory="output/"/>
<filter>'bp' in output_selector</filter>
</collection>
<collection name="sfold_ct" type="list" label="${tool.name} on ${on_string}: sfold ct files">
<discover_datasets pattern="(?P&lt;name&gt;.+)\.ct$" format="ct" directory="output/"/>
<filter>'ct' in output_selector</filter>
</collection>
<collection name="sfold_cluster_out" type="list" label="${tool.name} on ${on_string}: sfold cluster log files">
<discover_datasets pattern="(?P&lt;name&gt;.+)\.out$" format="txt" directory="output/clusters"/>
<discover_datasets pattern="(?P&lt;name&gt;.+)\.list$" format="txt" directory="output/clusters"/>
<filter>'clusters' in output_selector</filter>
</collection>
<collection name="sfold_cluster_bp" type="list" label="${tool.name} on ${on_string}: sfold cluster bp files">
<discover_datasets pattern="(?P&lt;name&gt;.+)\.bp$" format="txt" directory="output/clusters"/>
<filter>'clusters' in output_selector and 'bp' in output_selector</filter>
</collection>
<collection name="sfold_cluster_ct" type="list" label="${tool.name} on ${on_string}: sfold cluster ct files">
<discover_datasets pattern="(?P&lt;name&gt;.+)\.ct$" format="ct" directory="output/clusters"/>
<filter>'clusters' in output_selector and 'ct' in output_selector</filter>
</collection>
</outputs>
<tests>
<test expect_num_outputs="6">
<param name="non_commercial_use" value="true"/>
<param name="fasta" value="seq.fasta"/>
<param name="a" value="true"/>
<param name="f" value="cons.ct"/>
<param name="l" value="30"/>
<param name="m" value="mfe.ct"/>
<param name="w" value="20"/>
<param name="i" value="1"/>
<param name="output_selector" value="ct,bp,clusters"/>
<output_collection name="sfold_out" type="list">
<element name="10structure">
<assert_contents>
<has_n_lines n="770"/>
<has_text text="Structure 1 -28.90 0.74863E-01"/>
</assert_contents>
</element>
<element name="10structure_2">
<assert_contents>
<has_n_lines n="10"/>
</assert_contents>
</element>
<element name="bp.dist.from.ecentroid">
<assert_contents>
<has_n_lines n="1001"/>
<has_text text="1 5"/>
</assert_contents>
</element>
<element name="bp">
<assert_contents>
<has_text text="Structure 1"/>
</assert_contents>
</element>
<element name="bprob">
<assert_contents>
<has_n_lines n="4"/>
<has_text text=".16225072123690604808E+01L"/>
</assert_contents>
</element>
<element name="cdf">
<assert_contents>
<has_n_lines n="101"/>
</assert_contents>
</element>
<element name="fe">
<assert_contents>
<has_n_lines n="1000"/>
<has_text text="1 -28.70"/>
</assert_contents>
</element>
<element name="fe_init">
<assert_contents>
<has_n_lines n="1002"/>
<has_text text="Ensemble free energy (based on initial energies):-RT ln(U)= -28.297589224477537 "/>
</assert_contents>
</element>
<element name="loopr">
<assert_contents>
<has_n_lines n="76"/>
</assert_contents>
</element>
<element name="pdf">
<assert_contents>
<has_n_lines n="21"/>
</assert_contents>
</element>
<element name="sample">
<assert_contents>
<has_n_lines n="1"/>
<has_text text="Structure data in BP format removed to"/>
</assert_contents>
</element>
<element name="sample_1000">
<assert_contents>
<has_text text="(i j k): helix formed by base pairs i-j, (i+1)-(j-1),...,(i+k-1)-(j-k+1)"/>
</assert_contents>
</element>
<element name="sclass">
<assert_contents>
<has_text text="Sequence length = 76"/>
</assert_contents>
</element>
<element name="sfold">
<assert_contents>
<has_n_lines n="42"/>
<has_text text="1 G"/>
</assert_contents>
</element>
<element name="smfe">
<assert_contents>
<has_n_lines n="2"/>
<has_text text="Minimum free energy in the sample (SMFE)= -28.90 kcal/mol"/>
</assert_contents>
</element>
<element name="sstrand">
<assert_contents>
<has_n_lines n="76"/>
</assert_contents>
</element>
</output_collection>
<output_collection name="sfold_bp" type="list">
<element name="ecentroid">
<assert_contents>
<has_text text="1 72"/>
</assert_contents>
</element>
</output_collection>
<output_collection name="sfold_ct" type="list">
<element name="ecentroid">
<assert_contents>
<has_n_lines n="78"/>
</assert_contents>
</element>
</output_collection>
<output_collection name="sfold_cluster_out" type="list">
<element name="c01.2dhist">
<assert_contents>
<has_text text="1"/>
</assert_contents>
</element>
<element name="c01.bp.dist.from.ccentroid">
<assert_contents>
<has_text text="1"/>
</assert_contents>
</element>
<element name="c01.bp">
<assert_contents>
<has_text text="Structure 1"/>
</assert_contents>
</element>
<element name="c01.fe">
<assert_contents>
<has_text text=" 1"/>
</assert_contents>
</element>
<element name="c02.2dhist">
<assert_contents>
<has_text text="72"/>
</assert_contents>
</element>
<element name="c02.bp.dist.from.ccentroid">
<assert_contents>
<has_text text="1"/>
</assert_contents>
</element>
<element name="c02.bp">
<assert_contents>
<has_text text="Structure 1"/>
</assert_contents>
</element>
<element name="c02.fe">
<assert_contents>
<has_text text="10"/>
</assert_contents>
</element>
<element name="ch.index">
<assert_contents>
<has_n_lines n="19"/>
</assert_contents>
</element>
</output_collection>
<output_collection name="sfold_cluster_bp" type="list">
<element name="c01.ccentroid">
<assert_contents>
<has_text text="1 72"/>
</assert_contents>
</element>
<element name="c02.ccentroid">
<assert_contents>
<has_text text="Structure 1"/>
</assert_contents>
</element>
</output_collection>
<output_collection name="sfold_cluster_ct" type="list">
<element name="c01.ccentroid">
<assert_contents>
<has_n_lines n="78"/>
<has_text text="Length: 76"/>
</assert_contents>
</element>
<element name="c02.ccentroid">
<assert_contents>
<has_n_lines n="78"/>
<has_text text="Length: 76"/>
</assert_contents>
</element>
</output_collection>
</test>
<test expect_num_outputs="4">
<param name="non_commercial_use" value="true"/>
<param name="fasta" value="seq.fasta"/>
<param name="a" value="true"/>
<param name="f" value="cons.ct"/>
<param name="l" value="30"/>
<param name="m" value="mfe.ct"/>
<param name="w" value="20"/>
<param name="i" value="1"/>
<param name="output_selector" value="bp,clusters"/>
<output_collection name="sfold_cluster_bp" type="list">
<element name="c01.ccentroid">
<assert_contents>
<has_text text="1 72"/>
</assert_contents>
</element>
<element name="c02.ccentroid">
<assert_contents>
<has_text text="Structure 1"/>
</assert_contents>
</element>
</output_collection>
<output_collection name="sfold_bp" type="list">
<element name="ecentroid">
<assert_contents>
<has_text text="1 72"/>
</assert_contents>
</element>
</output_collection>
</test>
</tests>
<help><![CDATA[
Sfold is a software package for statistical folding and rational design of nucleic acids.
It provides tools for predicting RNA secondary structures, designing antisense oligonucleotides, and analyzing RNA folding patterns. Sfold is widely used in academic research for studying
RNA structure and function.
Input Format
-----------
The program requires one input file of RNA sequence in FASTA format.
Sfold options
------------
The program can run based on the clustering on the sampled ensemble or not.
It can also predict the secondary structure based on the constraint and MFE structure files.
The sfold has two different modules:
* SiRNA : Target accessibility prediction and RNA duplex thermodynamics for rational siRNA design
* Solig: Target accessibility prediction and rational design of antisense oligonucleotides and nucleic acid probes
The program can be run based on the above-mentioned modules, both of them or none of them.
Output Files
------------
The Sfold program generates 4 different types of output files. sfold.out and sclass.out in your output log files, save the general messages from standard output/error from which you can know if your Sfold run was successful. Most of the other
files contain a header section that defines the formats and meanings of the data in the output.
------------
Important: The program usage is restricted to non-commercial and academic research.
]]>
</help>
<expand macro="citations" />
</tool>
Loading

0 comments on commit 4145612

Please sign in to comment.