Skip to content

Commit

Permalink
Add new tool : Prodigal (#5855)
Browse files Browse the repository at this point in the history
* Import data test + init xml file

* New datatest + xml modif (out_format, mode)

* All parameters + test passed

* .shed file

* Update tools/prodigal/.shed.yml

Co-authored-by: Björn Grüning <[email protected]>

* Update tools/prodigal/prodigal.xml

Co-authored-by: Björn Grüning <[email protected]>

* Update tools/prodigal/prodigal.xml

Change format

Co-authored-by: Björn Grüning <[email protected]>

* Update tools/prodigal/prodigal.xml

Change format

Co-authored-by: Björn Grüning <[email protected]>

* Update tools/prodigal/prodigal.xml

Delete git citation

Co-authored-by: Björn Grüning <[email protected]>

* Update .shed.yml

* Update .shed.yml

* Update tools/prodigal/prodigal.xml

Change help

Co-authored-by: Björn Grüning <[email protected]>

* change format and reduce input

* reduce input size

* macro tokens version

* small final touches

* Delete tools/prodigal/macros.xml

* good test!

* Update tools/prodigal/prodigal.xml

Co-authored-by: Bérénice Batut <[email protected]>

---------

Co-authored-by: Björn Grüning <[email protected]>
Co-authored-by: Bérénice Batut <[email protected]>
  • Loading branch information
3 people authored Mar 14, 2024
1 parent 3974c0f commit fc5c693
Show file tree
Hide file tree
Showing 21 changed files with 54,122 additions and 0 deletions.
12 changes: 12 additions & 0 deletions tools/prodigal/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
categories:
- Genome annotation
description: A protein-coding gene prediction software tool for bacterial and archaeal genomes
long_description: |
Prodigal provides fast, accurate protein-coding gene predictions in GFF3, Genbank, or Sequin table format and runs smoothly on finished genomes, draft genomes, and metagenomes.
It's an unsupervised machine-learning algorithm. It does not need to be provided with any training data, and instead automatically learns the properties of the genome from the
sequence itself, including genetic code, RBS motif usage, start codon usage, and coding statistics.
name: prodigal
owner: iuc
homepage_url: https://github.com/hyattpd/Prodigal
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/prodigal
type: unrestricted
131 changes: 131 additions & 0 deletions tools/prodigal/prodigal.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
<tool id="prodigal" name="Prodigal Gene Predictor" version="@VERSION@+galaxy0" profile="22.05">
<description>A tool for gene prediction in microbial genomes</description>
<macros>
<token name="@VERSION@">2.6.3</token>
</macros>
<xrefs>
<xref type="bio.tools">prodigal</xref>
</xrefs>
<requirements>
<requirement type="package" version="@VERSION@">prodigal</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
prodigal
-i '$input_fa'
#if $input_train
-t '$input_train'
#end if
-o '$output'
-f '$out_format'
-p '$procedure'
-g '$trans_table'
-a '$output_faa'
-d '$output_fnn'
-s '$output_start'
$closed
$force_nonsd
$masked_seq
]]></command>
<inputs>
<param type="data" name="input_fa" format="fasta" label="Specify input file" help="-i" />
<param type="data" name="input_train" format="fasta" label="Specify training file" optional="true" help="Read and use the specified training file (-t)" />
<param type="select" name="out_format" label="Specify output format" help="-f" >
<option value="gbk" selected="true">GBK (default)</option>
<option value="gff">GFF</option>
<option value="sqn">SQN</option>
<option value="sco">SCO</option>
</param>
<param type="select" name="procedure" label="Specify mode" help="-p" >
<option value="single" selected="true">Single : Single genome, any number of
sequences (default)</option>
<option value="meta">Meta : Anonymous sequences, analyze
using preset training files, ideal for metagenomic data or single short sequences</option>
</param>
<param type="select" name="trans_table" label="Specify a translation table to use" help="-g" >
<option value="11">11 : Standard Bacteria/Archaea (default)</option>
<option value="4">4 : Mycoplasma/Spiroplasma</option>
<option value="#"># : Other genetic codes 1-25</option>
</param>
<param name="closed" type="boolean" label="Closed ends" truevalue="-c" falsevalue="" optional="true" help="Closed ends. Do not allow partial genes
at edges of sequence (-c)" />
<param name="force_nonsd" type="boolean" label="Force Prodigal to scan for
motifs" truevalue="-n" falsevalue="" optional="true" help="Do not use the Shine-Dalgarno RBS finder and instead force Prodigal to scan for motifs (-n)" />
<param name="masked_seq" type="boolean" label="Treat runs of N as masked sequence" truevalue="-m" falsevalue="" optional="true" help="Treat runs of N as masked sequence; don't build genes across them (-m)" />
</inputs>
<outputs>
<data name="output" format="genbank" label="${tool.name} on ${on_string} : coordinates">
<change_format>
<when input="out_format" value="gff" format="gff3" />
<when input="out_format" value="sqn" format="asn1" />
<when input="out_format" value="sco" format="genbank" />
</change_format>
</data>
<data name="output_faa" format="fasta" label="${tool.name} on ${on_string} : protein translations file" />
<data name="output_fnn" format="fasta" label="${tool.name} on ${on_string} : nucleotide sequences file" />
<data name="output_start" format="tabular" label="${tool.name} on ${on_string} : complete starts file" />
</outputs>
<tests>
<test>
<param name="input_fa" value="input.fna"/>
<output name="output" file="output"/>
</test>
<test>
<param name="input_fa" value="input.fna"/>
<output name="output" file="output" ftype="genbank"/>
<output name="output_faa" file="output.faa"/>
<output name="output_fnn" file="output.fnn"/>
<output name="output_start" file="output.start"/>
</test>
<test>
<param name="input_fa" value="input.fna"/>
<param name="out_format" value="gff"/>
<output name="output" file="output.gff" ftype="gff3" />
<output name="output_faa" file="output.faa"/>
<output name="output_fnn" file="output.fnn"/>
<output name="output_start" file="output.start"/>
</test>
<test>
<param name="input_fa" value="input.fna"/>
<param name="trans_table" value="4"/>
<output name="output" file="output_tt" ftype="genbank"/>
<output name="output_faa" file="output_tt.faa"/>
<output name="output_fnn" file="output_tt.fnn"/>
<output name="output_start" file="output_tt.start"/>
</test>
<test>
<param name="input_fa" value="input.fna"/>
<param name="procedure" value="single"/>
<param name="closed" value="true"/>
<param name="force_nonsd" value="true"/>
<param name="masked_seq" value="true"/>
<output name="output" file="output_c_n_m" ftype="genbank"/>
<output name="output_faa" file="output_c_n_m.faa"/>
<output name="output_fnn" file="output_c_n_m.fnn"/>
<output name="output_start" file="output_c_n_m.start"/>
</test>
<test>
<param name="input_fa" value="input.fna"/>
<param name="input_train" value="train_file.trn"/>
<output name="output" file="output_train" ftype="genbank"/>
<output name="output_faa" file="output_train.faa"/>
<output name="output_fnn" file="output_train.fnn"/>
<output name="output_start" file="output_train.start"/>
</test>
</tests>
<help><![CDATA[
Fast, reliable protein-coding gene prediction for prokaryotic genomes.
Features:
* Predicts protein-coding genes: Prodigal provides fast, accurate protein-coding gene predictions in GFF3, Genbank, or Sequin table format.
* Handles draft genomes and metagenomes: Prodigal runs smoothly on finished genomes, draft genomes, and metagenomes.
* Runs quickly: Prodigal analyzes the E. coli K-12 genome in 10 seconds on a modern MacBook Pro.
* Runs unsupervised: Prodigal is an unsupervised machine learning algorithm. It does not need to be provided with any training data, and instead automatically learns the properties of the genome from the sequence itself, including RBS motif usage, start codon usage, and coding statistics.
* Handles gaps and partial genes: The user can specify if Prodigal should build genes across runs of N's as well as how to handle genes at the edges of contigs.
* Identifies translation initiation sites: Prodigal predicts the correct translation initiation site for most genes, and can output information about every potential start site in the genome, including confidence score, RBS motif, and much more.
]]></help>
<citations>
<citation type="doi">10.1186/1471-2105-11-119</citation>
</citations>
</tool>
Loading

0 comments on commit fc5c693

Please sign in to comment.