Skip to content

Commit

Permalink
Seqtk telo (#5669)
Browse files Browse the repository at this point in the history
* merge

* add new output

* seqtk 1.4 has a telomere finder. This adds it to the rest and bumps the version.
passes lint but the CI lint seems to be failing...

* better test data

* single quote wrap parameters

* passes lint after adding Bjoern's citation to macros

* fix macro order

* seqtk 1.4 has a telomere finder. This adds it to the rest and bumps the version.
passes lint but the CI lint seems to be failing...

* better test data

* single quote wrap parameters

* fix rst lint

* fix help

* fix redundant but beautiful code. Byebye.

---------

Co-authored-by: Bjoern Gruening <[email protected]>
  • Loading branch information
fubar2 and bgruening authored Dec 9, 2023
1 parent 0331dc2 commit 2f75805
Show file tree
Hide file tree
Showing 21 changed files with 254 additions and 45 deletions.
2 changes: 1 addition & 1 deletion tools/seqtk/.shed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ categories:
description: Toolkit for processing sequences in FASTA/Q formats
homepage_url: https://github.com/lh3/seqtk
long_description: |
Sequence Analysis toolkit provies several small utilities for working with FastA and FastQ data.
Sequence Analysis toolkit provides several small utilities for working with FastA and FastQ data.
name: seqtk
owner: iuc
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/seqtk
Expand Down
14 changes: 13 additions & 1 deletion tools/seqtk/macros.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
<xref type="bio.tools">seqtk</xref>
</xrefs>
</xml>
<token name="@TOOL_VERSION@">1.3</token>
<token name="@TOOL_VERSION@">1.4</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@AWK_VERSION@">4.2.1</token>
<xml name="stdio">
<stdio>
Expand Down Expand Up @@ -41,6 +42,17 @@ This Galaxy tool relies on the seqtk toolkit from `lh3/seqtk
]]></token>
<xml name="citation">
<citations>
<citation type="bibtex">
@misc{seqtk,
author = {Heng Li},
year = {2023},
title = {seqtk},
publisher = {GitHub},
journal = {GitHub repository},
howpublished{\url{https://github.com/lh3/seqtk}},
}
</citation>
</citations>
</xml>

</macros>
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_comp.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0"?>
<tool id="seqtk_comp" name="seqtk_comp" version="@TOOL_VERSION@.1">
<tool id="seqtk_comp" name="seqtk_comp" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>get the nucleotide composition of FASTA/Q</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements">
<requirement type="package" version="@AWK_VERSION@">gawk</requirement>
</expand>
Expand All @@ -22,7 +22,7 @@ seqtk comp
<param name="in_bed" type="data" format="bed" optional="true" label="BED file"/>
</inputs>
<outputs>
<data name="default" format="tabular" label="Nucleotide composition of $in_file.name"/>
<data name="default" format="tabular" label="${tool.name} on ${on_string}: Nucleotide composition"/>
</outputs>
<tests>
<test>
Expand Down
11 changes: 7 additions & 4 deletions tools/seqtk/seqtk_cutN.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
<?xml version="1.0"?>
<tool id="seqtk_cutN" name="seqtk_cutN" version="@[email protected]">
<tool id="seqtk_cutN" name="seqtk_cutN" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>cut sequence at long N</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<command><![CDATA[
Expand All @@ -22,7 +21,11 @@ $g
<param argument="-g" type="boolean" truevalue="-g" falsevalue="" checked="false" label="Print gaps only, no sequence" />
</inputs>
<outputs>
<data name="default" format_source="in_file" label="$in_file.name split on N runs longer than $n"/>
<data name="default" format_source="in_file" label="${tool.name} on ${on_string}: split on N runs longer than $n">
<change_format>
<when input="g" value="-g" format="bed" />
</change_format>
</data>
</outputs>
<tests>
<test>
Expand Down
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_dropse.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0"?>
<tool id="seqtk_dropse" name="seqtk_dropse" version="@TOOL_VERSION@.1">
<tool id="seqtk_dropse" name="seqtk_dropse" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>drop unpaired from interleaved Paired End FASTA/Q</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<command><![CDATA[
Expand All @@ -16,7 +16,7 @@ seqtk dropse
<expand macro="in_faq"/>
</inputs>
<outputs>
<data name="default" format_source="in_file" label="Only paired-end reads from $in_file.name"/>
<data name="default" format_source="in_file" label="${tool.name} on ${on_string}: only paired-end reads"/>
</outputs>
<tests>
<test>
Expand Down
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_fqchk.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0"?>
<tool id="seqtk_fqchk" name="seqtk_fqchk" version="@TOOL_VERSION@.1">
<tool id="seqtk_fqchk" name="seqtk_fqchk" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>fastq QC (base/quality summary)</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements">
<requirement type="package" version="@AWK_VERSION@">gawk</requirement>
</expand>
Expand All @@ -20,7 +20,7 @@ seqtk fqchk
<param argument="-q" type="integer" value="20" label="Quality values" help="Use 0 to get the distribution of all quality values"/>
</inputs>
<outputs>
<data name="default" format="tabular" label="Quality information for $in_file.name"/>
<data name="default" format="tabular" label="${tool.name} on ${on_string}: Quality information"/>
</outputs>
<tests>
<test>
Expand Down
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_hety.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0"?>
<tool id="seqtk_hety" name="seqtk_hety" version="@TOOL_VERSION@.1">
<tool id="seqtk_hety" name="seqtk_hety" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>regional heterozygosity</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements">
<requirement type="package" version="@AWK_VERSION@">gawk</requirement>
</expand>
Expand All @@ -24,7 +24,7 @@ $m
<param argument="-m" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Treat lowercases as masked" />
</inputs>
<outputs>
<data name="default" format="tabular" label="Heterozygous regions in $in_file.name"/>
<data name="default" format="tabular" label="${tool.name} on ${on_string}: Heterozygous regions"/>
</outputs>
<tests>
<test>
Expand Down
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_listhet.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0"?>
<tool id="seqtk_listhet" name="seqtk_listhet" version="@TOOL_VERSION@.1">
<tool id="seqtk_listhet" name="seqtk_listhet" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>extract the position of each het</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements">
<requirement type="package" version="@AWK_VERSION@">gawk</requirement>
</expand>
Expand All @@ -18,7 +18,7 @@ seqtk listhet
<expand macro="in_faq"/>
</inputs>
<outputs>
<data name="default" format="tabular" label="Positions of heterozygous bases in $in_file.name"/>
<data name="default" format="tabular" label="${tool.name} on ${on_string}: Positions of heterozygous bases"/>
</outputs>
<tests>
<test>
Expand Down
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_mergefa.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0"?>
<tool id="seqtk_mergefa" name="seqtk_mergefa" version="@TOOL_VERSION@.1">
<tool id="seqtk_mergefa" name="seqtk_mergefa" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>merge two FASTA/Q files</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<command><![CDATA[
Expand All @@ -28,7 +28,7 @@ $h
<param argument="-h" type="boolean" truevalue="-h" falsevalue="" checked="false" label="Suppress hets in the input" />
</inputs>
<outputs>
<data name="default" format_source="in_fa1" label="Merger of $in_fa1.name and $in_fa2.name"/>
<data name="default" format_source="in_fa1" label="${tool.name} on ${on_string}"/>
</outputs>
<tests>
<test>
Expand Down
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_mergepe.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0"?>
<tool id="seqtk_mergepe" name="seqtk_mergepe" version="@TOOL_VERSION@.1">
<tool id="seqtk_mergepe" name="seqtk_mergepe" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>interleave two unpaired FASTA/Q files for a paired-end file</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<command><![CDATA[
Expand All @@ -18,7 +18,7 @@ seqtk mergepe
<param name="in_fq2" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input FASTA/Q file #2"/>
</inputs>
<outputs>
<data name="default" format_source="in_fq1" label="$in_fq1.name and $in_fq2.name as interleaved paired-end"/>
<data name="default" format_source="in_fq1" label="${tool.name} on ${on_string}: interleaved paired-end"/>
</outputs>
<tests>
<test>
Expand Down
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_mutfa.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0"?>
<tool id="seqtk_mutfa" name="seqtk_mutfa" version="@TOOL_VERSION@.1">
<tool id="seqtk_mutfa" name="seqtk_mutfa" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>point mutate FASTA at specified positions</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<command><![CDATA[
Expand All @@ -18,7 +18,7 @@ seqtk mutfa
<param name="in_snp" type="data" format="tabular" label="Input SNP file"/>
</inputs>
<outputs>
<data name="default" format_source="in_file" label="Mutated $in_file.name"/>
<data name="default" format_source="in_file" label="${tool.name} on ${on_string}: Mutated"/>
</outputs>
<tests>
<test>
Expand Down
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_randbase.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0"?>
<tool id="seqtk_randbase" name="seqtk_randbase" version="@TOOL_VERSION@.1">
<tool id="seqtk_randbase" name="seqtk_randbase" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>choose a random base from hets</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<command><![CDATA[
Expand All @@ -16,7 +16,7 @@ seqtk randbase
<expand macro="in_faq"/>
</inputs>
<outputs>
<data name="default" format_source="in_file" label="Unambiguous $in_file.name"/>
<data name="default" format_source="in_file" label="${tool.name} on ${on_string}: Unambiguous"/>
</outputs>
<tests>
<test>
Expand Down
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_sample.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0"?>
<tool id="seqtk_sample" name="seqtk_sample" version="@TOOL_VERSION@.2">
<tool id="seqtk_sample" name="seqtk_sample" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>random subsample of fasta or fastq sequences</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<command><![CDATA[
Expand All @@ -24,7 +24,7 @@ $subsample_size
</section>
</inputs>
<outputs>
<data name="default" format_source="in_file" label="Subsample of reads from $in_file.name"/>
<data name="default" format_source="in_file" label="${tool.name} on ${on_string}: Subsample of reads"/>
</outputs>
<tests>
<test>
Expand Down
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_seq.xml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
<?xml version="1.0"?>
<tool id="seqtk_seq" name="seqtk_seq" version="@TOOL_VERSION@.3">
<tool id="seqtk_seq" name="seqtk_seq" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>common transformation of FASTA/Q</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<command><![CDATA[
Expand Down Expand Up @@ -54,7 +54,7 @@ $x2
<param name="x2" argument="-2" type="boolean" truevalue="-2" falsevalue="" checked="false" label="Output the 2n reads only" />
</inputs>
<outputs>
<data name="default" format_source="in_file" />
<data name="default" format_source="in_file" label="${tool.name} on ${on_string}" />
</outputs>
<tests>
<!-- This is a sorry excuse for a test for a tool which does way more
Expand Down
6 changes: 3 additions & 3 deletions tools/seqtk/seqtk_subseq.xml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
<?xml version="1.0"?>
<tool id="seqtk_subseq" name="seqtk_subseq" version="@TOOL_VERSION@.1">
<tool id="seqtk_subseq" name="seqtk_subseq" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>extract subsequences from FASTA/Q files</description>
<expand macro="bio_tools"/>
<macros>
<import>macros.xml</import>
<requirement type="package" version="@AWK_VERSION@">gawk</requirement>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<command><![CDATA[
Expand Down Expand Up @@ -41,7 +41,7 @@ $t
<param argument="-l" type="integer" value="0" label="Sequence line length" />
</inputs>
<outputs>
<data name="default" format_source="in_file" label="Selected sequences from $in_file.name">
<data name="default" format_source="in_file" label="${tool.name} on ${on_string}: Selected sequences">
<change_format>
<when input="t" value="-t" format="tabular"/>
</change_format>
Expand Down
61 changes: 61 additions & 0 deletions tools/seqtk/seqtk_telo.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<?xml version="1.0"?>
<tool id="seqtk_telo" name="seqtk_telo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
<description>find telomeres</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<command><![CDATA[
seqtk telo
-m '$m'
-p '$p'
-d '$d'
-s '$s'
$P
'$in_file'
> '$default'
]]></command>
<inputs>
<expand macro="in_faq"/>
<param argument="-m" type="text" value="CCCTAA" label="Telomere repeat to search for. Default is vertebrate" />
<param argument="-p" type="integer" value="1" label="Penalty for a non-repeat" />
<param argument="-d" type="integer" value="2000" label="Maximum drop" />
<param argument="-s" type="integer" value="300" label="Minimum score" />
<param argument="-P" type="boolean" truevalue="-P" falsevalue="" checked="false" label="Print scoring" />
</inputs>
<outputs>
<data name="default" format="bed" label="${tool.name} on ${on_string}: telomers"/>
</outputs>
<tests>
<test>
<param name="in_file" value="seqtk_telo.fa"/>
<output name="default" file="seqtk_telo.out" ftype="bed"/>
</test>
<test>
<param name="in_file" value="seqtk_telo.fa.gz" ftype="fasta.gz"/>
<output name="default" file="seqtk_telogz.out" ftype="bed"/>
</test>
</tests>
<help><![CDATA[
**What it does**
Identifies telomeres using a default vertebrate repeat sequence CCCTAA, or a user supplied one
::
Usage: seqtk telo [options] <in.fq>
Options:
-m STR motif [CCCTAA]
-p INT penalty [1]
-d INT max drop [2000]
-s INT min score [300]
-P print scoring
@ATTRIBUTION@
]]></help>
<expand macro="citation" />
</tool>
Loading

0 comments on commit 2f75805

Please sign in to comment.