Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Split meryl #5942

Merged
merged 11 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions tools/meryl/meryl.xml → deprecated/tools/meryl/meryl.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
<macros>
<import>macros.xml</import>
</macros>
<expand macro='edam_ontology' />
<xrefs>
<xref type="bio.tools">meryl</xref>
</xrefs>
<expand macro='edam_ontology' />
<expand macro='requirements' />
<version_command>meryl --version</version_command>
<command detect_errors='exit_code'><![CDATA[
Expand All @@ -21,7 +21,7 @@
ln -s '$operation_type.input_reads' ./input.${operation_type.input_reads.ext} &&

meryl
$operation_type.count_operations
$operation_type.count_operation
k=$size
memory=\$GALAXY_MEMORY_GB
threads=\${GALAXY_SLOTS:-1}
Expand Down Expand Up @@ -148,7 +148,7 @@
<option value="trio-mode">Build hap-mer dbs for trios</option>
</param>
<when value="count-kmers">
<param name="count_operations" type="select" label="Count operations" help="Select an operation to be executed">
<param name="count_operation" type="select" label="Count operation" help="Select an operation to be executed">
<option value="count">Count: count the occurrences of canonical k-mers</option>
<option value="count-forward">Count-forward: count the occurreces of forward k-mers</option>
<option value="count-reverse">Count-reverse: count the occurreces of reverse k-mers</option>
Expand Down Expand Up @@ -446,7 +446,6 @@
<param name="command_type" value="groups-kmers" />
<param name="groups_operations" value="union" />
<param name="input_meryldb_02" value="output_02.read-db.meryldb,output_03.read-db.meryldb" ftype="meryldb" />
<param name="input_meryldb_03" value="" ftype="meryldb" />
</conditional>
<output name="read_db" ftype="meryldb">
<assert_contents>
Expand Down Expand Up @@ -588,13 +587,13 @@
</conditional>
<output name="read_db" ftype="meryldb">
<assert_contents>
<has_size value="3362942" delta="300" />
<has_size value="3362942" delta="2000" />
</assert_contents>
</output>
<output name="read_db_hist" file="output_23.read-db.hist" />
<output name="pat_db" ftype="meryldb">
<assert_contents>
<has_size value="120610" delta="300" />
<has_size value="120610" delta="400" />
</assert_contents>
</output>
<output name="pat_db_hist" file="output_23.pat.hist" />
Expand Down
16 changes: 12 additions & 4 deletions tools/meryl/.shed.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
categories:
- Assembly
name: meryl
owner: iuc
description: Meryl a k-mer counter.
long_description: |
Meryl a k-mer counter. It is built into the Celera assembler and is also available as a stand-alone application. Meryl uses a sorting-based approach that sorts k-mers in lexicographical order.
name: meryl
owner: iuc
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/meryl
homepage_url: https://github.com/marbl/meryl
type: unrestricted
categories:
- Assembly
auto_tool_repositories:
name_template: "{{ tool_id }}"
description_template: "Wrapper for Meryl function: {{ tool_name }}."
suite:
name: "suite_meryl"
description: "A suite of tools that brings the Meryl project into Galaxy."
long_description: |
Meryl a k-mer counter. It is built into the Celera assembler and is also available as a stand-alone application. Meryl uses a sorting-based approach that sorts k-mers in lexicographical order.
123 changes: 123 additions & 0 deletions tools/meryl/arithmetic-kmers.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
<tool id="meryl_arithmetic_kmers" name="Meryl" version="@TOOL_VERSION@+@GALAXY_TOOL_VERSION@@SUFFIX_VERSION@" profile="20.01">
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

higher profile version?

<description>apply arithmetic operations to k-mer counts</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="edam_ontology"/>
<xrefs>
<xref type="bio.tools">meryl</xref>
</xrefs>
<expand macro="requirements"/>
<version_command>meryl --version</version_command>
<command detect_errors="exit_code"><![CDATA[
export GALAXY_MEMORY_GB=\$((\${GALAXY_MEMORY_MB:-8192}/1024)) &&
mkdir -p ./temp_db/ &&
tar -zxf $input_meryldb_02 -C ./temp_db/ &&
mv ./temp_db/* tmp.meryl &&
meryl
$arithmetic_operations
$X
tmp.meryl
output read-db.meryl &&
tar -zcf read-db.meryldb read-db.meryl
]]></command>
<inputs>
<param name="arithmetic_operations" type="select" label="Arithmetic operations" help="Select an operation to be executed">
<option value="increase">Increase: add x to the count of each k-mer</option>
<option value="decrease">Decrease: subtract x from the count of each k-mer</option>
<option value="multiply">Multiply: multiply the count of each k-mer by x</option>
<option value="divide">Divide: divide the count of each k-mer by x</option>
<option value="divide-round">Divide-round: divide the count of each k-mer by x and round the results</option>
<option value="modulo">Modulo: set the count of each k-mer to the remainder of the count divided by x</option>
</param>
<param name="input_meryldb_02" type="data" format="meryldb" label="Input meryldb" help="Select a meryldb dataset"/>
<param name="X" type="integer" min="1" max="1000000" value="" optional="true" label="Operand"/>
</inputs>
<outputs>
<data name="read_db" format="meryldb" from_work_dir="read-db.meryldb"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="X" value="100000"/>
<param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb"/>
<param name="arithmetic_operations" value="increase"/>
<output name="read_db" ftype="meryldb">
<assert_contents>
<has_size value="59500" delta="1000"/>
<expand macro="meryldb_archive_assumptions"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<param name="X" value="100"/>
<param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb"/>
<param name="arithmetic_operations" value="decrease"/>
<output name="read_db" ftype="meryldb">
<assert_contents>
<has_size value="42313" delta="1000"/>
<expand macro="meryldb_archive_assumptions"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<param name="X" value="3"/>
<param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb"/>
<param name="arithmetic_operations" value="multiply"/>
<output name="read_db" ftype="meryldb">
<assert_contents>
<has_size value="60530" delta="1000"/>
<expand macro="meryldb_archive_assumptions"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<param name="X" value="2"/>
<param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb"/>
<param name="arithmetic_operations" value="divide"/>
<output name="read_db" ftype="meryldb">
<assert_contents>
<has_size value="56200" delta="1000"/>
<expand macro="meryldb_archive_assumptions"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<param name="X" value="2"/>
<param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb"/>
<param name="arithmetic_operations" value="divide-round"/>
<output name="read_db" ftype="meryldb">
<assert_contents>
<has_size value="56100" delta="1000"/>
<expand macro="meryldb_archive_assumptions"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<param name="X" value="3"/>
<param name="input_meryldb_02" value="read-db.meryldb" ftype="meryldb"/>
<param name="arithmetic_operations" value="modulo"/>
<output name="read_db" ftype="meryldb">
<assert_contents>
<has_size value="37501" delta="1000"/>
<expand macro="meryldb_archive_assumptions"/>
</assert_contents>
</output>
</test>
</tests>
<help>

.. class:: infomark

**Purpose**

Meryl is the k-mer counter. This tool applies arithmetic operations on k-mer counts:

- Increase: add x to the count of each k-mer
- Decrease: subsctract x from the count of each k-mer
- Multiply: multiply the count of each k-mer by x
- Divide: divide the count of each k-mer by x
- Divide-round: divide the count of each k-mer by x and round th results
- Modulo: set the count of each k-mer to the remainder of the count divided by x
</help>
<expand macro="citations"/>
</tool>
122 changes: 122 additions & 0 deletions tools/meryl/count-kmers.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
<tool id="meryl_count_kmers" name="Meryl" version="@TOOL_VERSION@+@GALAXY_TOOL_VERSION@@SUFFIX_VERSION@" profile="20.01">
<description>count k-mers</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="edam_ontology"/>
<xrefs>
<xref type="bio.tools">meryl</xref>
</xrefs>
<expand macro="requirements"/>
<version_command>meryl --version</version_command>
<command detect_errors="exit_code"><![CDATA[
export GALAXY_MEMORY_GB=\$((\${GALAXY_MEMORY_MB:-8192}/1024)) &&
#if $options_kmer_size.kmer_size == 'estimate'
#from math import log
#set size=int(log(int($options_kmer_size.genome_size)*(1-float($options_kmer_size.collision_rate))/float($options_kmer_size.collision_rate))/log(4))
#elif $options_kmer_size.kmer_size == 'provide'
#set size=$options_kmer_size.input_kmer_size
#end if
ln -s '$input_reads' ./input.${input_reads.ext} &&

meryl
$count_operation
k=$size
memory=\$GALAXY_MEMORY_GB
threads=\${GALAXY_SLOTS:-1}
./input.${input_reads.ext}
output read-db.meryl &&
echo 'K-mer size: ${size}' &&
tar -zcf read-db.meryldb read-db.meryl
]]></command>
<inputs>
<param name="count_operation" type="select" label="Count operations" help="Select an operation to be executed">
<option value="count">Count: count the occurrences of canonical k-mers</option>
<option value="count-forward">Count-forward: count the occurreces of forward k-mers</option>
<option value="count-reverse">Count-reverse: count the occurreces of reverse k-mers</option>
</param>
<param name="input_reads" type="data" format="fastq,fastq.gz,fasta,fasta.gz" label="Input sequences" help="Select your reads in FASTA/FASTQ format."/>
<conditional name="options_kmer_size">
<param name="kmer_size" type="select" label="K-mer size selector">
<option value="provide">Set a k-mer size</option>
<option value="estimate">Estimate the best k-mer size</option>
</param>
<when value="provide">
<param name="input_kmer_size" type="integer" min="1" max="50" value="" label="K-mer size" help="For a human genome, the best k-mer size is k=21 for both haploid (3.1G) or diploid (6.2G).."/>
</when>
<when value="estimate">
<param name="genome_size" type="integer" min="1000" max="70000000000" value="1000" label="Genome size" help="Haploid genome size or diploid genome size, depending on what we evaluate. In bp. Only required if the k-mer size is not provided."/>
<param name="collision_rate" type="float" min="0.0001" max="0.01" value="0.001" label="Tolerable collision rate" help="Tolerable collision rate. By default is 0.001."/>
</when>
</conditional>
</inputs>
<outputs>
<data name="read_db" format="meryldb" from_work_dir="read-db.meryldb"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<conditional name="options_kmer_size">
<param name="kmer_size" value="provide"/>
<param name="input_kmer_size" value="7"/>
</conditional>
<param name="input_reads" value="child.fasta"/>
<param name="count_operation" value="count"/>
<output name="read_db" ftype="meryldb">
<assert_contents>
<has_size value="22152" delta="1000"/>
<expand macro="meryldb_archive_assumptions"/>
</assert_contents>
</output>
<assert_stdout>
<has_line line="K-mer size: 7"/>
</assert_stdout>
</test>
<test expect_num_outputs="1">
<conditional name="options_kmer_size">
<param name="kmer_size" value="provide"/>
<param name="input_kmer_size" value="7"/>
</conditional>
<param name="input_reads" value="child.fasta.gz" ftype="fasta.gz"/>
<param name="count_operation" value="count"/>
<output name="read_db" ftype="meryldb">
<assert_contents>
<has_size value="22200" delta="1000"/>
<expand macro="meryldb_archive_assumptions"/>
</assert_contents>
</output>
<assert_stdout>
<has_line line="K-mer size: 7"/>
</assert_stdout>
</test>
<test expect_num_outputs="1">
<conditional name="options_kmer_size">
<param name="kmer_size" value="provide"/>
<param name="input_kmer_size" value="7"/>
</conditional>
<param name="input_reads" value="child.fastq.gz" ftype="fastqsanger.gz"/>
<param name="count_operation" value="count"/>
<output name="read_db" ftype="meryldb">
<assert_contents>
<has_size value="22200" delta="1000"/>
<expand macro="meryldb_archive_assumptions"/>
</assert_contents>
</output>
<assert_stdout>
<has_line line="K-mer size: 7"/>
</assert_stdout>
</test>
</tests>
<help>

.. class:: infomark

**Purpose**

Meryl is the k-mer counter. This tool can be used to count kmers.

- Count: count the occurrences of canonical k-mers
- Count-forward: count the occurreces of forward k-mers
- Count-reverse: count the occurreces of reverse k-mers
</help>
<expand macro="citations"/>
</tool>
Loading