-
Notifications
You must be signed in to change notification settings - Fork 440
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5465 from AnnaSyme/taxpasta
add initial taxpasta wrapper
- Loading branch information
Showing
13 changed files
with
2,619 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name: taxpasta | ||
owner: iuc | ||
description: standardise taxonomic profiles | ||
homepage_url: https://taxpasta.readthedocs.io/en/latest/ | ||
long_description: | | ||
Standardise and merge taxonomic profiles into one tabular report. | ||
remote_repository_url: https://github.com/taxprofiler/taxpasta | ||
categories: | ||
- Sequence Analysis |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,238 @@ | ||
<tool id="taxpasta" name="taxpasta" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description>standardise taxonomic profiles</description> | ||
<macros> | ||
<token name="@TOOL_VERSION@">0.6.1</token> | ||
<token name="@VERSION_SUFFIX@">0</token> | ||
<token name="@PROFILE@">22.01</token> | ||
</macros> | ||
<xrefs> | ||
<xref type="bio.tools">taxpasta</xref> | ||
</xrefs> | ||
<requirements> | ||
<requirement type="package" version="@TOOL_VERSION@">taxpasta</requirement> | ||
</requirements> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
taxpasta | ||
$action.action | ||
--profiler $profiler | ||
--taxonomy '$taxonomy.fields.path' | ||
#if $action.action == 'merge' | ||
--output-format '$action.format.output_format' | ||
#if $action.format.output_format == 'TSV' | ||
--output '$tabular_output' | ||
$action.format.wide | ||
#else if $action.format.output_format == 'BIOM' | ||
--output '$biom_output' | ||
#end if | ||
#else | ||
--output-format 'TSV' | ||
--output '$tabular_output' | ||
#end if | ||
$add_name | ||
$add_rank | ||
$add_lineage | ||
$add_id_lineage | ||
$add_rank_lineage | ||
#for $file in $infile | ||
'$file' | ||
#end for | ||
]]></command> | ||
<inputs> | ||
<conditional name="action"> | ||
<param name="action" type="select" label="Taxpasta action"> | ||
<option value="standardise">Standardise input(s)</option> | ||
<option value="merge">Standardise and combine multiple taxonomic profiles from the same profiler</option> | ||
</param> | ||
<when value="standardise"/> | ||
<when value="merge"> | ||
<conditional name="format"> | ||
<param argument="--output-format" type="select" label="Desired output format"> | ||
<option value="TSV">Tabular</option> | ||
<option value="BIOM">BIOM</option> | ||
</param> | ||
<when value="TSV"> | ||
<param argument="--wide" type="boolean" truevalue="--wide" falsevalue="--long" checked="true" label="Output merged abundance data in either wide or (tidy) long format"/> | ||
</when> | ||
<when value="BIOM"/> | ||
</conditional> | ||
</when> | ||
</conditional> | ||
<param argument="--profiler" type="select" label="What profiling tool created this report?" help="Select one profiler only"> | ||
<option value="bracken">Bracken</option> | ||
<option value="Centrifuge">Centrifuge</option> | ||
<option value="diamond">DIAMOND</option> | ||
<option value="ganon">ganon</option> | ||
<option value="kaiju">Kaiju</option> | ||
<option value="kraken2">Kraken2</option> | ||
<option value="krakenuniq">KrakenUniq</option> | ||
<option value="megan6">MEGAN6/MALT</option> | ||
<option value="metaphlan">MetaPhlAn</option> | ||
<option value="motus">mOTUs</option> | ||
</param> | ||
<param name="infile" type="data" format="tabular" multiple="true" label="Taxonomic report(s) to standardise" help="The reports should be from the same profiling tool" /> | ||
<param argument="--taxonomy" type="select" label="NCBI taxonomy" help="To have actual human-readable taxon names in the standardised output"> | ||
<options from_data_table="ncbi_taxonomy"> | ||
<validator message="No NCBI database is available" type="no_options"/> | ||
</options> | ||
</param> | ||
<param argument="--add-name" type="boolean" truevalue="--add-name" falsevalue="" checked="true" label="Add the taxon name to the output"/> | ||
<param argument="--add-rank" type="boolean" truevalue="--add-rank" falsevalue="" checked="false" label="Add the taxon rank to the output"/> | ||
<param argument="--add-lineage" type="boolean" truevalue="--add-lineage" falsevalue="" checked="false" label="Add the taxon's entire lineage to the output" | ||
help="The taxon names are separated by semi-colons"/> | ||
<param argument="--add-id-lineage" type="boolean" truevalue="--add-id-lineage" falsevalue="" checked="false" label="Add the taxon's entire lineage to the output" | ||
help="The taxon identifiers are separated by semi-colons"/> | ||
<param argument="--add-rank-lineage" type="boolean" truevalue="--add-rank-lineage" falsevalue="" checked="false" label="Add the taxon's entire rank lineage to the output" | ||
help="These are taxon ranks separated by semi-colons"/> | ||
</inputs> | ||
<outputs> | ||
<data name="tabular_output" format="tabular" label="${tool.name} on ${on_string}: Tabular"> | ||
<filter>(action['action'] == 'merge' and action['format']['output_format'] == 'TSV') or action['action'] == 'standardise'</filter> | ||
</data> | ||
<data name="biom_output" format="biom1" label="${tool.name} on ${on_string}: BIOM"> | ||
<filter>(action['action'] == 'merge' and action['format']['output_format'] == 'BIOM')</filter> | ||
</data> | ||
</outputs> | ||
<tests> | ||
<!-- test data from taxpasta https://github.com/taxprofiler/taxpasta/tree/dev/tests/data--> | ||
<test expect_num_outputs="1"> | ||
<conditional name="action"> | ||
<param name="action" value="standardise"/> | ||
</conditional> | ||
<param name="profiler" value="metaphlan"/> | ||
<param name="infile" value="ERR7569997.txt"/> | ||
<param name="taxonomy" value="test-db-tox"/> | ||
<param name="add_name" value="true"/> | ||
<param name="add_rank" value="false" /> | ||
<param name="add_lineage" value="false"/> | ||
<param name="add_id_lineage" value="false"/> | ||
<param name="add_rank_lineage" value="false"/> | ||
<output name="tabular_output" ftype="tabular"> | ||
<assert_contents> | ||
<has_text text="Proteobacteria"/> | ||
<has_n_columns n="3"/> | ||
<has_n_lines n="241"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<!-- test multiple inputs and the merge command--> | ||
<test expect_num_outputs="1"> | ||
<conditional name="action"> | ||
<param name="action" value="merge"/> | ||
<conditional name="format"> | ||
<param name="output_format" value="TSV"/> | ||
<param name="wide" value="true"/> | ||
</conditional> | ||
</conditional> | ||
<param name="profiler" value="metaphlan"/> | ||
<param name="infile" value="ERR7569997.txt,ERR7569998.txt"/> | ||
<param name="taxonomy" value="test-db-tox"/> | ||
<param name="add_name" value="true"/> | ||
<param name="add_rank" value="false" /> | ||
<param name="add_lineage" value="false"/> | ||
<param name="add_id_lineage" value="false"/> | ||
<param name="add_rank_lineage" value="false"/> | ||
<output name="tabular_output" ftype="tabular"> | ||
<assert_contents> | ||
<has_text text="Gammaproteobacteria"/> | ||
<has_n_columns n="4"/> | ||
<has_n_lines n="252"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<!-- testing diamond input--> | ||
<test expect_num_outputs="1"> | ||
<conditional name="action"> | ||
<param name="action" value="standardise"/> | ||
</conditional> | ||
<param name="action" value="standardise"/> | ||
<param name="profiler" value="diamond"/> | ||
<param name="infile" value="diamond_valid_1.tsv"/> | ||
<param name="taxonomy" value="test-db-tox"/> | ||
<param name="add_name" value="true"/> | ||
<param name="add_rank" value="false" /> | ||
<param name="add_lineage" value="false"/> | ||
<param name="add_id_lineage" value="false"/> | ||
<param name="add_rank_lineage" value="false"/> | ||
<output name="tabular_output" ftype="tabular"> | ||
<assert_contents> | ||
<has_text text="1310613"/> | ||
<has_n_columns n="3"/> | ||
<has_n_lines n="4"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<!-- testing kraken input and all boolean--> | ||
<test expect_num_outputs="1"> | ||
<conditional name="action"> | ||
<param name="action" value="standardise"/> | ||
</conditional> | ||
<param name="profiler" value="kraken2"/> | ||
<param name="infile" value="2612_pe-ERR5766176-db1.kraken2.report.txt"/> | ||
<param name="taxonomy" value="test-db-tox"/> | ||
<param name="add_name" value="true"/> | ||
<param name="add_rank" value="true" /> | ||
<param name="add_lineage" value="true"/> | ||
<param name="add_id_lineage" value="true"/> | ||
<param name="add_rank_lineage" value="true"/> | ||
<output name="tabular_output" ftype="tabular"> | ||
<assert_contents> | ||
<has_text text="root"/> | ||
<has_text text="rank_lineage"/> | ||
<has_n_columns n="7"/> | ||
<has_n_lines n="45"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<!-- testing BIOM output--> | ||
<test expect_num_outputs="1"> | ||
<conditional name="action"> | ||
<param name="action" value="merge"/> | ||
<conditional name="format"> | ||
<param name="output_format" value="BIOM"/> | ||
</conditional> | ||
</conditional> | ||
<param name="profiler" value="kraken2"/> | ||
<param name="infile" value="2612_pe-ERR5766176-db1.kraken2.report.txt,2611_se-ERR5766174-db1.kraken2.report.txt"/> | ||
<param name="taxonomy" value="test-db-tox"/> | ||
<param name="add_name" value="true"/> | ||
<param name="add_rank" value="false" /> | ||
<param name="add_lineage" value="false"/> | ||
<param name="add_id_lineage" value="false"/> | ||
<param name="add_rank_lineage" value="false"/> | ||
<output name="biom_output" ftype="biom1"> | ||
<assert_contents> | ||
<has_text text="314146"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
.. class:: infomark | ||
**What it does** | ||
The main purpose of taxpasta is to standardise taxonomic profiles created by a range of bioinformatics tools. | ||
We call those tools taxonomic profilers. They each come with their own particular tabular output format. | ||
Across the profilers, relative abundances can be reported in read counts, fractions, or percentages, as well as | ||
any number of additional columns with extra information. We therefore decided to take the lessons learnt to heart | ||
and provide our own solution to deal with this pasticcio. With taxpasta you can ingest all of those formats and, at a | ||
minimum, output taxonomy identifiers and their integer counts. Taxpasta can not only standardise profiles but also merge | ||
them across samples for the same profiler into a single table. | ||
**Input(s)** | ||
* One or many outputs from a particular profiling tool, such as kraken2 or diamond. | ||
**Output** | ||
* A reformatted report in tabular format, either for the single input, or for multiple inputs, as long as they are from the same profiling tool. | ||
For more information see: https://taxpasta.readthedocs.io/en/latest/ | ||
]]></help> | ||
<citations> | ||
<citation type="doi">10.21105/joss.05627</citation> | ||
</citations> | ||
</tool> |
44 changes: 44 additions & 0 deletions
44
tools/taxpasta/test-data/2611_se-ERR5766174-db1.kraken2.report.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
99.98 787758 787758 U 0 unclassified | ||
0.02 119 0 R 1 root | ||
0.02 119 0 R1 131567 cellular organisms | ||
0.02 119 0 D 2759 Eukaryota | ||
0.02 119 0 D1 33154 Opisthokonta | ||
0.01 96 0 K 4751 Fungi | ||
0.01 96 0 K1 451864 Dikarya | ||
0.01 96 0 P 4890 Ascomycota | ||
0.01 96 0 P1 716545 saccharomyceta | ||
0.01 96 0 P2 147537 Saccharomycotina | ||
0.01 96 0 C 4891 Saccharomycetes | ||
0.01 96 0 O 4892 Saccharomycetales | ||
0.01 96 0 F 4893 Saccharomycetaceae | ||
0.01 96 0 G 4930 Saccharomyces | ||
0.01 96 0 S 4932 Saccharomyces cerevisiae | ||
0.01 96 96 S1 559292 Saccharomyces cerevisiae S288C | ||
0.00 23 0 K 33208 Metazoa | ||
0.00 23 0 K1 6072 Eumetazoa | ||
0.00 23 0 K2 33213 Bilateria | ||
0.00 23 0 K3 33511 Deuterostomia | ||
0.00 23 0 P 7711 Chordata | ||
0.00 23 0 P1 89593 Craniata | ||
0.00 23 0 P2 7742 Vertebrata | ||
0.00 23 0 P3 7776 Gnathostomata | ||
0.00 23 0 P4 117570 Teleostomi | ||
0.00 23 0 P5 117571 Euteleostomi | ||
0.00 23 0 P6 8287 Sarcopterygii | ||
0.00 23 0 P7 1338369 Dipnotetrapodomorpha | ||
0.00 23 0 P8 32523 Tetrapoda | ||
0.00 23 0 P9 32524 Amniota | ||
0.00 23 0 C 40674 Mammalia | ||
0.00 23 0 C1 32525 Theria | ||
0.00 23 0 C2 9347 Eutheria | ||
0.00 23 0 C3 1437010 Boreoeutheria | ||
0.00 23 0 C4 314146 Euarchontoglires | ||
0.00 23 0 O 9443 Primates | ||
0.00 23 0 O1 376913 Haplorrhini | ||
0.00 23 0 O2 314293 Simiiformes | ||
0.00 23 0 O3 9526 Catarrhini | ||
0.00 23 0 O4 314295 Hominoidea | ||
0.00 23 0 F 9604 Hominidae | ||
0.00 23 0 F1 207598 Homininae | ||
0.00 23 0 G 9605 Homo | ||
0.00 23 23 S 9606 Homo sapiens |
44 changes: 44 additions & 0 deletions
44
tools/taxpasta/test-data/2612_pe-ERR5766176-db1.kraken2.report.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
99.97 627680 627680 U 0 unclassified | ||
0.03 168 0 R 1 root | ||
0.03 168 0 R1 131567 cellular organisms | ||
0.03 168 0 D 2759 Eukaryota | ||
0.03 168 0 D1 33154 Opisthokonta | ||
0.02 152 0 K 33208 Metazoa | ||
0.02 152 0 K1 6072 Eumetazoa | ||
0.02 152 0 K2 33213 Bilateria | ||
0.02 152 0 K3 33511 Deuterostomia | ||
0.02 152 0 P 7711 Chordata | ||
0.02 152 0 P1 89593 Craniata | ||
0.02 152 0 P2 7742 Vertebrata | ||
0.02 152 0 P3 7776 Gnathostomata | ||
0.02 152 0 P4 117570 Teleostomi | ||
0.02 152 0 P5 117571 Euteleostomi | ||
0.02 152 0 P6 8287 Sarcopterygii | ||
0.02 152 0 P7 1338369 Dipnotetrapodomorpha | ||
0.02 152 0 P8 32523 Tetrapoda | ||
0.02 152 0 P9 32524 Amniota | ||
0.02 152 0 C 40674 Mammalia | ||
0.02 152 0 C1 32525 Theria | ||
0.02 152 0 C2 9347 Eutheria | ||
0.02 152 0 C3 1437010 Boreoeutheria | ||
0.02 152 0 C4 314146 Euarchontoglires | ||
0.02 152 0 O 9443 Primates | ||
0.02 152 0 O1 376913 Haplorrhini | ||
0.02 152 0 O2 314293 Simiiformes | ||
0.02 152 0 O3 9526 Catarrhini | ||
0.02 152 0 O4 314295 Hominoidea | ||
0.02 152 0 F 9604 Hominidae | ||
0.02 152 0 F1 207598 Homininae | ||
0.02 152 0 G 9605 Homo | ||
0.02 152 152 S 9606 Homo sapiens | ||
0.00 16 0 K 4751 Fungi | ||
0.00 16 0 K1 451864 Dikarya | ||
0.00 16 0 P 4890 Ascomycota | ||
0.00 16 0 P1 716545 saccharomyceta | ||
0.00 16 0 P2 147537 Saccharomycotina | ||
0.00 16 0 C 4891 Saccharomycetes | ||
0.00 16 0 O 4892 Saccharomycetales | ||
0.00 16 0 F 4893 Saccharomycetaceae | ||
0.00 16 0 G 4930 Saccharomyces | ||
0.00 16 0 S 4932 Saccharomyces cerevisiae | ||
0.00 16 16 S1 559292 Saccharomyces cerevisiae S288C |
Oops, something went wrong.