Skip to content

Commit

Permalink
Normalize paths for genome fetch and some of the genome indexer data …
Browse files Browse the repository at this point in the history
…managers, plus additional moderinzation (#6489)

* Update data_manager_fetch_genome_dbkeys_all_fasta for normalized layout

* Update data_manager_bowtie_index_builder for normalized layout, plus:

- Move colorspace builder to deprecated
- Drop python wrapper
- Update bowtie version
- Add tests

* Update data_manager_bowtie2_index_builder for normalized layout, plus:

- Drop python wrapper
- Update bowtie2 version
- Add test of non-default options

* Update data_manager_bwa_mem_index_builder for normalized layout, plus:

- Drop python wrapper
- Update bwa version
- Add test of non-default options

* Update data_manager_bwa_mem2_index_builder for normalized layout

* Update data_manager_star_index_builder for normalized layout, plus:

- Drop python wrapper
- Add options to automatically calculate --genomeSAindexNbases and --genomeChrBinNbits

* Update data_manager_sam_fasta_index_builder for normalized layout, plus:

- Drop python wrapper
- Update samtools version
- Add test of non-default options
  • Loading branch information
natefoo authored Dec 5, 2024
1 parent 10dfb13 commit 57d0567
Show file tree
Hide file tree
Showing 53 changed files with 562 additions and 475 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,34 +1,72 @@
<tool id="bowtie2_index_builder_data_manager" name="Bowtie2 index" tool_type="manage_data" version="@WRAPPER_VERSION@+galaxy0" profile="18.09">
<tool id="bowtie2_index_builder_data_manager" name="Bowtie2 index" tool_type="manage_data" version="@WRAPPER_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0">
<description>builder</description>
<macros>
<token name="@WRAPPER_VERSION@">2.5.4</token>
<token name="@VERSION_SUFFIX@">0</token>
</macros>
<requirements>
<requirement type="package" version="@WRAPPER_VERSION@">bowtie2</requirement>
</requirements>
<macros>
<token name="@WRAPPER_VERSION@">2.4.4</token>
</macros>
<command detect_errors="exit_code"><![CDATA[
python '$__tool_directory__/bowtie2_index_builder.py'
'${out_file}'
--fasta_filename '${all_fasta_source.fields.path}'
--fasta_dbkey '${all_fasta_source.fields.dbkey}'
--fasta_description '${all_fasta_source.fields.name}'
--data_table_name bowtie2_indexes ${tophat2}
#set $value = $sequence_id or $all_fasta_source.fields.dbkey
#set $fasta_file_name = str($all_fasta_source.fields.path).split('/')[-1]
mkdir -p '${out_file.extra_files_path}' &&
ln -s '${all_fasta_source.fields.path}' '${out_file.extra_files_path}/${fasta_file_name}' &&
cd '${out_file.extra_files_path}' &&
bowtie2-build --threads \${GALAXY_SLOTS:-1} '${out_file.extra_files_path}/${fasta_file_name}' '${value}' &&
cp '$dmjson' '$out_file'
]]></command>
<configfiles>
<configfile name="dmjson"><![CDATA[#slurp
#set $fasta_file_name = str($all_fasta_source.fields.path).split('/')[-1]
#set $value = $sequence_id or $all_fasta_source.fields.dbkey
#set $name = $sequence_name or $all_fasta_source.fields.name
{
"data_tables":{
"bowtie2_indexes":[
{
"value": "${value}",
"dbkey": "${all_fasta_source.fields.dbkey}",
"name": "${name}",
"path": "${fasta_file_name}"
}
#if $tophat2:
],
"tophat2_indexes":[
{
"value": "${value}",
"dbkey": "${all_fasta_source.fields.dbkey}",
"name": "${name}",
"path": "${fasta_file_name}"
}
#end if
]
}
}
]]></configfile>
</configfiles>
<inputs>
<param name="all_fasta_source" type="select" label="Source FASTA Sequence">
<options from_data_table="all_fasta"/>
</param>
<param name="sequence_name" type="text" value="" label="Name of sequence" />
<param name="sequence_id" type="text" value="" label="ID for sequence" />
<param name="tophat2" type="boolean" truevalue="--data_table_name tophat2_indexes" falsevalue="" checked="True" label="Also make available for TopHat" help="Adds values to tophat2_indexes tool data table" />
<param name="tophat2" type="boolean" checked="True" label="Also make available for TopHat" help="Adds values to tophat2_indexes tool data table" />
</inputs>
<outputs>
<data name="out_file" format="data_manager_json"/>
</outputs>
<tests>
<test>
<param name="all_fasta_source" value="phiX174"/>
<output name="out_file" value="bowtie2_data_manager.json"/>
<output name="out_file" value="bowtie2_data_manager.1.json"/>
</test>
<test>
<param name="all_fasta_source" value="phiX174"/>
<param name="sequence_name" value="Galeocerdo cuvier"/>
<param name="sequence_id" value="tigHai1"/>
<param name="tophat2" value="False"/>
<output name="out_file" file="bowtie2_data_manager.2.json"/>
</test>
</tests>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
<?xml version="1.0"?>
<data_managers>

<data_manager tool_file="data_manager/bowtie2_index_builder.xml" id="bowtie2_index_builder">
<data_table name="bowtie2_indexes">
<output>
Expand All @@ -10,9 +9,9 @@
<column name="path" output_ref="out_file" >
<move type="directory" relativize_symlinks="True">
<!-- <source>${path}</source>--> <!-- out_file.extra_files_path is used as base by default --> <!-- if no source, eg for type=directory, then refers to base -->
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/bowtie2_index/${value}</target>
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">genomes/${dbkey}/bowtie_index/v2/${value}</target>
</move>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/bowtie2_index/${value}/${path}</value_translation>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/genomes/${dbkey}/bowtie_index/v2/${value}/${path}</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
</output>
Expand All @@ -25,11 +24,10 @@
<column name="name" />
<column name="path" output_ref="out_file" >
<!-- no move, always happens as part of bowtie2 and uses that path -->
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/bowtie2_index/${value}/${path}</value_translation>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/genomes/${dbkey}/bowtie_index/v2/${value}/${path}</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
</output>
</data_table>
</data_manager>

</data_managers>
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
#fasta file. So there will be multiple fasta files for each build,
#such as with hg19 above.
#
phiX174 phiX174 phiX174 ${__HERE__}/phiX174.fasta
phiX174 phiX174 phiX 174 ${__HERE__}/phiX174.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"data_tables":{
"bowtie2_indexes":[
{
"value": "phiX174",
"dbkey": "phiX174",
"name": "phiX 174",
"path": "phiX174.fasta"
}
],
"tophat2_indexes":[
{
"value": "phiX174",
"dbkey": "phiX174",
"name": "phiX 174",
"path": "phiX174.fasta"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_tables":{
"bowtie2_indexes":[
{
"value": "tigHai1",
"dbkey": "phiX174",
"name": "Galeocerdo cuvier",
"path": "phiX174.fasta"
}
]
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,17 +1,39 @@
<tool id="bowtie_index_builder_data_manager" name="Bowtie index" tool_type="manage_data" version="1.2.1" profile="23.0">
<tool id="bowtie_index_builder_data_manager" name="Bowtie index" tool_type="manage_data" version="@WRAPPER_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0">
<description>builder</description>
<macros>
<token name="@WRAPPER_VERSION@">1.3.1</token>
<token name="@VERSION_SUFFIX@">0</token>
</macros>
<requirements>
<requirement type="package" version="1.2.0">bowtie</requirement>
<requirement type="package" version="3.8.3">python</requirement>
<requirement type="package" version="@WRAPPER_VERSION@">bowtie</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
python '$__tool_directory__/bowtie_index_builder.py'
'${out_file}'
--fasta_filename '${all_fasta_source.fields.path}'
--fasta_dbkey '${all_fasta_source.fields.dbkey}'
--fasta_description '${all_fasta_source.fields.name}'
--data_table_name bowtie_indexes
]]></command>
#set $fasta_file_name = str($all_fasta_source.fields.path).split('/')[-1]
mkdir -p '${out_file.extra_files_path}' &&
ln -s '${all_fasta_source.fields.path}' '${out_file.extra_files_path}/${fasta_file_name}' &&
bowtie-build '${out_file.extra_files_path}/${fasta_file_name}' '${out_file.extra_files_path}/${fasta_file_name}' &&
cp '$dmjson' '$out_file'
]]>
</command>
<configfiles>
<configfile name="dmjson"><![CDATA[#slurp
#set $fasta_file_name = str($all_fasta_source.fields.path).split('/')[-1]
#set $value = $sequence_id or $all_fasta_source.fields.dbkey
#set $name = $sequence_name or $all_fasta_source.fields.name
{
"data_tables":{
"bowtie_indexes":[
{
"value": "${value}",
"dbkey": "${all_fasta_source.fields.dbkey}",
"name": "${name}",
"path": "${fasta_file_name}"
}
]
}
}
]]></configfile>
</configfiles>
<inputs>
<param name="all_fasta_source" type="select" label="Source FASTA Sequence">
<options from_data_table="all_fasta"/>
Expand All @@ -22,6 +44,18 @@ python '$__tool_directory__/bowtie_index_builder.py'
<outputs>
<data name="out_file" format="data_manager_json"/>
</outputs>
<tests>
<test>
<param name="all_fasta_source" value="phiX174"/>
<output name="out_file" file="bowtie_data_manager.1.json"/>
</test>
<test>
<param name="all_fasta_source" value="phiX174"/>
<param name="sequence_name" value="Galeocerdo cuvier"/>
<param name="sequence_id" value="tigHai1"/>
<output name="out_file" file="bowtie_data_manager.2.json"/>
</test>
</tests>
<help><![CDATA[
.. class:: infomark
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,9 @@
<column name="path" output_ref="out_file" >
<move type="directory" relativize_symlinks="True">
<!-- <source>${path}</source>--> <!-- out_file.extra_files_path is used as base by default --> <!-- if no source, eg for type=directory, then refers to base -->
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/bowtie_index</target>
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">genomes/${dbkey}/bowtie_index/v1/${value}</target>
</move>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/bowtie_index/${path}</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
</output>
</data_table>
</data_manager>
<data_manager tool_file="data_manager/bowtie_color_space_index_builder.xml" id="bowtie_color_space_index_builder">
<data_table name="bowtie_indexes_color">
<output>
<column name="value" />
<column name="dbkey" />
<column name="name" />
<column name="path" output_ref="out_file" >
<move type="directory" relativize_symlinks="True">
<!-- <source>${path}</source>--> <!-- out_file.extra_files_path is used as base by default --> <!-- if no source, eg for type=directory, then refers to base -->
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/bowtie_index/color</target> <!-- confirm this as preferred location -->
</move>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/bowtie_index/color/${path}</value_translation>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/genomes/${dbkey}/bowtie_index/v1/${value}/${path}</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
</output>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#This file lists the locations and dbkeys of all the fasta files
#under the "genome" directory (a directory that contains a directory
#for each build). The script extract_fasta.py will generate the file
#all_fasta.loc. This file has the format (white space characters are
#TAB characters):
#
#<unique_build_id> <dbkey> <display_name> <file_path>
#
#So, all_fasta.loc could look something like this:
#
#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
#
#Your all_fasta.loc file should contain an entry for each individual
#fasta file. So there will be multiple fasta files for each build,
#such as with hg19 above.
#
phiX174 phiX174 phiX 174 ${__HERE__}/phiX174.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_tables":{
"bowtie_indexes":[
{
"value": "phiX174",
"dbkey": "phiX174",
"name": "phiX 174",
"path": "phiX174.fasta"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"data_tables":{
"bowtie_indexes":[
{
"value": "tigHai1",
"dbkey": "phiX174",
"name": "Galeocerdo cuvier",
"path": "phiX174.fasta"
}
]
}
}
Loading

0 comments on commit 57d0567

Please sign in to comment.