diff --git a/data_managers/data_manager_bwa_mem2_index_builder/.shed.yml b/data_managers/data_manager_bwa_mem2_index_builder/.shed.yml new file mode 100644 index 00000000000..f1c429c613a --- /dev/null +++ b/data_managers/data_manager_bwa_mem2_index_builder/.shed.yml @@ -0,0 +1,14 @@ +categories: +- Data Managers +description: Bwa-mem2 is the next version of the bwa-mem algorithm in bwa. +homepage_url: https://github.com/bwa-mem2/bwa-mem2 +long_description: | + Bwa-mem2 is the next version of the bwa-mem algorithm in bwa. It produces + alignment identical to bwa and is ~1.3-3.1x faster depending on the use-case, + dataset and the running machine. Bwa-mem2 uses a different index format that + is efficient on disk space and runtime memory but requires larger amounts of + memory (roughly 27x the reference) when building. +name: data_manager_bwa_mem2_index_builder +owner: iuc +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_bwa_mem2_index_builder +type: unrestricted diff --git a/data_managers/data_manager_bwa_mem2_index_builder/data_manager/bwa_mem2_index_builder.xml b/data_managers/data_manager_bwa_mem2_index_builder/data_manager/bwa_mem2_index_builder.xml new file mode 100644 index 00000000000..1d7af2d5099 --- /dev/null +++ b/data_managers/data_manager_bwa_mem2_index_builder/data_manager/bwa_mem2_index_builder.xml @@ -0,0 +1,75 @@ + + + + 2.2.1 + 0 + + + bwa-mem2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1038/nmeth.3317 + + diff --git a/data_managers/data_manager_bwa_mem2_index_builder/data_manager_conf.xml b/data_managers/data_manager_bwa_mem2_index_builder/data_manager_conf.xml new file mode 100644 index 00000000000..a757b4094d1 --- /dev/null +++ b/data_managers/data_manager_bwa_mem2_index_builder/data_manager_conf.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + ${dbkey}/bwa_mem2_index/${value} + + ${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/bwa_mem2_index/${value}/${path} + abspath + + + + + diff --git a/data_managers/data_manager_bwa_mem2_index_builder/test-data/all_fasta.loc b/data_managers/data_manager_bwa_mem2_index_builder/test-data/all_fasta.loc new file mode 100644 index 00000000000..c8b25f3f801 --- /dev/null +++ b/data_managers/data_manager_bwa_mem2_index_builder/test-data/all_fasta.loc @@ -0,0 +1,19 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +# +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# +phiX174 phiX174 phiX174 ${__HERE__}/phiX174.fasta diff --git a/data_managers/data_manager_bwa_mem2_index_builder/test-data/bwa_mem2_data_manager.1.json b/data_managers/data_manager_bwa_mem2_index_builder/test-data/bwa_mem2_data_manager.1.json new file mode 100644 index 00000000000..344c77f9b7e --- /dev/null +++ b/data_managers/data_manager_bwa_mem2_index_builder/test-data/bwa_mem2_data_manager.1.json @@ -0,0 +1,12 @@ +{ + "data_tables":{ + "bwa_mem2_indexes":[ + { + "value": "phiX174", + "dbkey": "phiX174", + "name": "phiX174", + "path": "phiX174.fasta" + } + ] + } +} diff --git a/data_managers/data_manager_bwa_mem2_index_builder/test-data/bwa_mem2_data_manager.2.json b/data_managers/data_manager_bwa_mem2_index_builder/test-data/bwa_mem2_data_manager.2.json new file mode 100644 index 00000000000..259853414c6 --- /dev/null +++ b/data_managers/data_manager_bwa_mem2_index_builder/test-data/bwa_mem2_data_manager.2.json @@ -0,0 +1,12 @@ +{ + "data_tables":{ + "bwa_mem2_indexes":[ + { + "value": "fooBar1", + "dbkey": "phiX174", + "name": "Galeocerdo cuvier", + "path": "phiX174.fasta" + } + ] + } +} diff --git a/data_managers/data_manager_bwa_mem2_index_builder/test-data/bwa_mem2_index.loc b/data_managers/data_manager_bwa_mem2_index_builder/test-data/bwa_mem2_index.loc new file mode 100644 index 00000000000..e69de29bb2d diff --git a/data_managers/data_manager_bwa_mem2_index_builder/test-data/phiX174.fasta b/data_managers/data_manager_bwa_mem2_index_builder/test-data/phiX174.fasta new file mode 100644 index 00000000000..53df885dc48 --- /dev/null +++ b/data_managers/data_manager_bwa_mem2_index_builder/test-data/phiX174.fasta @@ -0,0 +1,79 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA + diff --git a/data_managers/data_manager_bwa_mem2_index_builder/tool-data/all_fasta.loc.sample b/data_managers/data_manager_bwa_mem2_index_builder/tool-data/all_fasta.loc.sample new file mode 100644 index 00000000000..1a5a28d5e3f --- /dev/null +++ b/data_managers/data_manager_bwa_mem2_index_builder/tool-data/all_fasta.loc.sample @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +# +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# diff --git a/data_managers/data_manager_bwa_mem2_index_builder/tool-data/bwa_mem2_index.loc.sample b/data_managers/data_manager_bwa_mem2_index_builder/tool-data/bwa_mem2_index.loc.sample new file mode 100644 index 00000000000..d4c10ceadcd --- /dev/null +++ b/data_managers/data_manager_bwa_mem2_index_builder/tool-data/bwa_mem2_index.loc.sample @@ -0,0 +1,38 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of BWA indexed sequences data files. You will need +#to create these data files and then create a bwa_index.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bwa_index.loc +#file has this format (longer white space characters are TAB characters): +# +# +# +#So, for example, if you had phiX indexed stored in +#/depot/data2/galaxy/phiX/base/, +#then the bwa_index.loc entry would look like this: +# +#phiX174 phiX phiX Pretty /depot/data2/galaxy/phiX/base/phiX.fa +# +#and your /depot/data2/galaxy/phiX/base/ directory +#would contain phiX.fa.* files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 phiX.fa.amb +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 phiX.fa.ann +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 phiX.fa.bwt +#...etc... +# +#Your bwa_index.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#phiX174 phiX phiX174 /depot/data2/galaxy/phiX/base/phiX.fa +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/hg18/base/hg18canon.fa +#hg18full hg18 hg18 Full /depot/data2/galaxy/hg18/base/hg18full.fa +#/orig/path/hg19.fa hg19 hg19 /depot/data2/galaxy/hg19/base/hg19.fa +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +# diff --git a/data_managers/data_manager_bwa_mem2_index_builder/tool_data_table_conf.xml.sample b/data_managers/data_manager_bwa_mem2_index_builder/tool_data_table_conf.xml.sample new file mode 100644 index 00000000000..48d0b020104 --- /dev/null +++ b/data_managers/data_manager_bwa_mem2_index_builder/tool_data_table_conf.xml.sample @@ -0,0 +1,8 @@ + + + + + value, dbkey, name, path + +
+
diff --git a/data_managers/data_manager_bwa_mem2_index_builder/tool_data_table_conf.xml.test b/data_managers/data_manager_bwa_mem2_index_builder/tool_data_table_conf.xml.test new file mode 100644 index 00000000000..75c253c2999 --- /dev/null +++ b/data_managers/data_manager_bwa_mem2_index_builder/tool_data_table_conf.xml.test @@ -0,0 +1,12 @@ + + + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+