Skip to content

Commit

Permalink
Updated FASTK tool wrapper (galaxyproject#5965)
Browse files Browse the repository at this point in the history
* Updated FASTK wrapper

* Minor Changes

* Fixed .shed.yml

* Updated Datatype

* Updated shed.yml

* Updated tool id'

* Changes as per review

* Update tools/fastk/fastk.xml

Co-authored-by: M Bernt <[email protected]>

---------

Co-authored-by: Björn Grüning <[email protected]>
Co-authored-by: M Bernt <[email protected]>
  • Loading branch information
3 people authored May 3, 2024
1 parent 921fa90 commit 57fa4dc
Show file tree
Hide file tree
Showing 6 changed files with 775 additions and 0 deletions.
16 changes: 16 additions & 0 deletions tools/fastk/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: fastk
owner: iuc
categories:
- Assembly
description: "FastK: A K-mer counter (for HQ assembly data sets)"
long_description: FastK is a k‑mer counter that is optimized for processing high-quality DNA assembly data sets such as those produced with an Illumina instrument or a PacBio run in HiFi mode.
homepage_url: https://github.com/thegenemyers/FASTK
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastk
type: unrestricted
suite:
name: "suite_fastk"
description: "A suite of tools for FASTK in Galaxy"
type: repository_suite_definition
auto_tool_repositories:
name_template: "{{ tool_id }}"
description_template: "Wrapper for the FASTK tool suite: {{ tool_name }}"
114 changes: 114 additions & 0 deletions tools/fastk/fastk.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
<tool id="fastk_fastk" name="FastK" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.2">
<description>A k-mer counter for high-quality assembly datasets</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="requirements" />
<command detect_errors="exit_code"><![CDATA[
mkdir -p outfiles/tmpfiles &&
#if $infile.is_of_type("fastq"):
#set INPUTFILE="input.fastq"
#elif $infile.is_of_type("fastq.gz"):
#set INPUTFILE="input.fastq.gz"
#else
#set INPUTFILE="input."+$infile.ext
#end if
ln -s '$infile' $INPUTFILE &&
FastK $INPUTFILE
-k$kmer_size
#if $sorted_table.sorted_table_option == 'yes_with_default':
-t
#elif $sorted_table.sorted_table_option == 'yes_with_custom':
-t$sorted_table_cutoff
#end if
-T\${GALAXY_SLOTS:-8} -Noutfiles/output -Poutfiles/tmpfiles
#if $sorted_table.sorted_table_option == 'yes_with_default':
&& Tabex outfiles/output.ktab -t${sorted_table.tabex_threshold_for_default} LIST > '$tabex_hist'
#elif $sorted_table.sorted_table_option == 'yes_with_custom':
&& Tabex outfiles/output.ktab -t${sorted_table.tabex_threshold_for_custom} LIST > '$tabex_hist'
#end if
&& tar -c -f fastk.tar ./outfiles/
]]></command>
<inputs>
<param name="infile" type="data" format="fasta,fasta.gz,fastq,fastq.gz,cram,unsorted.bam,sam" label="Input file"/>
<param name="kmer_size" argument="-k" type="integer" min="5" max="50" value="40" label="Enter desired k-mer size" help="Default: 40" />
<conditional name="sorted_table">
<param name="sorted_table_option" type="select" label="Sort table" help="Do you want a sorted table of all canonical k-mers and their counts? The sorted table is sorted lexicographically on the k-mer where a &lt; c &lt; g &lt; t.">
<option value="no">No</option>
<option value="yes_with_default">Yes, Default sorted </option>
<option value="yes_with_custom">Yes, Custom sorted </option>
</param>
<when value="no"/>
<when value="yes_with_default">
<param name="tabex_threshold_for_default" argument="-t" type="integer" value="5" min="1" label="Tabex count threshold" help="Trim all k-mers with counts less than threshold"/>
</when>
<when value="yes_with_custom">
<param name="sorted_table_cutoff" type="integer" min="2" value="10" label="Enter sorted table cutoff value"/>
<param name="tabex_threshold_for_custom" argument="-t" type="integer" value="5" min="1" label="Tabex count threshold" help="Trim all k-mers with counts less than threshold"/>
</when>
</conditional>
</inputs>
<outputs>
<data name="fastk_out" format="tar" from_work_dir="fastk.tar" label="${tool.name} on ${on_string}: FastK files"/>
<data name="fastk_hist_out" format="fastk_hist" from_work_dir="outfiles/output.hist" label="${tool.name} on ${on_string}: FastK hist" />
<data name="tabex_hist" format="txt" label="${tool.name} on ${on_string}: Tabex output">
<filter> sorted_table['sorted_table_option'] != 'no' </filter>
</data>
</outputs>
<tests>
<!-- TEST 1 -->
<test expect_num_outputs="2">
<param name="infile" value="input01.fasta.gz"/>
<output name="fastk_out" ftype="tar">
<assert_contents>
<has_archive_member path="./outfiles/output.hist"/>
</assert_contents>
</output>
</test>
<!-- TEST 2 -->
<test expect_num_outputs="3">
<param name="infile" value="input01.fasta.gz"/>
<conditional name="sorted_table">
<param name="sorted_table_option" value="yes_with_default"/>
</conditional>
<output name="fastk_out" ftype="tar">
<assert_contents>
<has_archive_member path="./outfiles/output.hist"/>
<has_archive_member path="./outfiles/output.ktab"/>
</assert_contents>
</output>
<output name="tabex_hist" value="test02.tabex.txt"/>
</test>
<!-- TEST 3 -->
<test expect_num_outputs="3">
<param name="infile" value="input01.fasta.gz"/>
<conditional name="sorted_table">
<param name="sorted_table_option" value="yes_with_custom"/>
<param name="sorted_table_cutoff" value="5"/>
</conditional>
<output name="fastk_out" ftype="tar">
<assert_contents>
<has_archive_member path="./outfiles/output.hist"/>
<has_archive_member path="./outfiles/output.ktab"/>
</assert_contents>
</output>
<output name="tabex_hist" value="test03.tabex.txt"/>
</test>
</tests>
<help><![CDATA[
FastK is a k‑mer counter that is optimized for processing high quality DNA assembly data sets such as those produced with an Illumina instrument or a PacBio run in HiFi mode.
The input data can be in CRAM, BAM, SAM, fasta, or fastq files.
FastK produces the following outputs:
1. FastK hist: file in binary format containing histogram information detailing the frequency of occurrence for each k‑mer within the dataset.
2. A Tabex txt file comprising a table of k‑mer/count pairs, sorted lexicographically on the k‑mer sequence, followingthe order a < c < g < t
3. A tar file comprising of hidden .ktab files that can be used for downstream FASTK tools.
]]></help>
<expand macro="citations"/>
</tool>
23 changes: 23 additions & 0 deletions tools/fastk/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<macros>
<token name="@TOOL_VERSION@">1.0.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">fastk</requirement>
</requirements>
</xml>
<xml name="citations">
<citations>
<citation type="bibtex">
@misc{github,
author = {Gene Meyers},
year = {2020},
title = {FastK},
publisher = {GitHub},
journal = {GitHub repository},
url = {https://github.com/thegenemyers/FASTK},
}
</citation>
</citations>
</xml>
</macros>
Binary file added tools/fastk/test-data/input01.fasta.gz
Binary file not shown.
Loading

0 comments on commit 57fa4dc

Please sign in to comment.