Skip to content

Commit

Permalink
add new reference genomes (#6221)
Browse files Browse the repository at this point in the history
  • Loading branch information
bgruening authored Aug 11, 2024
1 parent 5b9441b commit 49a2861
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 8 deletions.
18 changes: 18 additions & 0 deletions tools/basil/all_fasta.loc.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#This file lists the locations and dbkeys of all the fasta files
#under the "genome" directory (a directory that contains a directory
#for each build). The script extract_fasta.py will generate the file
#all_fasta.loc. This file has the format (white space characters are
#TAB characters):
#
#<unique_build_id> <dbkey> <display_name> <file_path>
#
#So, all_fasta.loc could look something like this:
#
#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
#
#Your all_fasta.loc file should contain an entry for each individual
#fasta file. So there will be multiple fasta files for each build,
#such as with hg19 above.
#
54 changes: 46 additions & 8 deletions tools/basil/basil.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
<tool id="basil" name="basil" version="1.2.0">
<tool id="basil" name="basil" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0">
<description>Breakpoint detection, including large insertions</description>
<macros>
<token name="@TOOL_VERSION@">1.2.0</token>
<token name="@VERSION_SUFFIX@">1</token>
</macros>
<requirements>
<requirement type="package" version="1.2.0">anise_basil</requirement>
<requirement type="package" version="@TOOL_VERSION@">anise_basil</requirement>
</requirements>
<version_command>basil --version 2>&amp;1 | grep 'basil version' | cut -f 3 -d ' '</version_command>
<version_command>basil --version 2&gt;&amp;1 | grep 'basil version' | cut -f 3 -d ' '</version_command>
<command detect_errors="aggressive"><![CDATA[
ln -s '$ref' 'ref.fa' &&
#if $reference_source.reference_source_selector == 'history':
ln -f -s '$reference_source.ref' ref.fa &&
#else:
ln -f -s '$reference_source.ref.fields.path' ref.fa &&
#end if
ln -s '$bam' 'in.bam' &&
ln -s '$vcf' 'out.vcf' &&
basil
Expand All @@ -15,18 +23,48 @@
--oea-min-support-each-side '$min_oea_each_side'
]]></command>
<inputs>
<param name="ref" argument="--input-reference" type="data" format="Fasta" label="Reference Sequence File" help="FASTA file with the reference."/>
<conditional name="reference_source">
<param name="reference_source_selector" type="select" label="Select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below. If you would like to perform self-mapping select `history` here, then choose your input file as reference.">
<option value="cached">Use a built-in genome index</option>
<option value="history">Use a genome from history and build index</option>
</param>
<when value="cached">
<param name="ref" type="select" label="Using reference genome" help="Select genome from the list">
<options from_data_table="all_fasta">
<filter type="sort_by" column="2"/>
<validator type="no_options" message="No reference genomes are available"/>
</options>
<validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
</param>
</when>
<when value="history">
<param name="ref" argument="--input-reference" type="data" format="Fasta" label="Reference Sequence File" help="FASTA file with the reference."/>
</when>
</conditional>
<param name="bam" argument="--input-mapping" type="data" format="sam,bam" label="Alignment File" help="SAM/BAM file to use as the input."/>
<param name="min_oea_each_side" argument="--oea-min-support-each-side" type="integer" value="2" label="Minimum supporting reads, each side" help="Smallest number of OEA (one-end-anchor) reads on each side to support an insertion. In range [1..inf]. This is the minimum number of supporting reads (without mapped partners) on each side of an insertion breakpoint required to not be filtered." />
<param name="min_oea_each_side" argument="--oea-min-support-each-side" type="integer" value="2" label="Minimum supporting reads, each side" help="Smallest number of OEA (one-end-anchor) reads on each side to support an insertion. In range [1..inf]. This is the minimum number of supporting reads (without mapped partners) on each side of an insertion breakpoint required to not be filtered."/>
</inputs>
<outputs>
<data name="vcf" format="vcf" />
<data name="vcf" format="vcf"/>
</outputs>
<tests>
<test>
<conditional name="reference_source">
<param name="reference_source_selector" value="history" />
<param name="ref" ftype="fasta" value="ref.fa"/>
</conditional>
<param name="ref" value="ref.fa"/>
<param name="bam" value="simulated.bam"/>
<param name="min_oea_each_side" value="2"/>
<param name="min_oea_each_side" value="2"/>
<output name="vcf" file="basil.vcf"/>
</test>
<test>
<conditional name="reference_source">
<param name="reference_source_selector" value="cached" />
<param name="ref" value="genome"/>
</conditional>
<param name="bam" value="simulated.bam"/>
<param name="min_oea_each_side" value="2"/>
<output name="vcf" file="basil.vcf"/>
</test>
</tests>
Expand Down
1 change: 1 addition & 0 deletions tools/basil/test-data/all_fasta.loc.test
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
genome genome-dbkey genome-display ${__HERE__}/ref.fa
9 changes: 9 additions & 0 deletions tools/basil/tool_data_table_conf.xml.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc-->
<tables>
<!-- Locations of all fasta files under genome directory -->
<table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
<columns>value, dbkey, name, path</columns>
<file path="tool-data/all_fasta.loc" />
</table>
</tables>

7 changes: 7 additions & 0 deletions tools/basil/tool_data_table_conf.xml.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<tables>
<!-- Locations of all fasta files under genome directory -->
<table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
<columns>value, dbkey, name, path</columns>
<file path="${__HERE__}/test-data/all_fasta.loc.test" />
</table>
</tables>

0 comments on commit 49a2861

Please sign in to comment.