Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dada2: add tool to check for primers #6615

Merged
merged 4 commits into from
Dec 7, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 162 additions & 0 deletions tools/dada2/dada2_primercheck.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
<tool id="dada2_primerCheck" name="dada2: primer check" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09">
<description></description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="bio_tools"/>
<expand macro="requirements"/>
<expand macro="stdio"/>
<expand macro="version_command"/>
<command detect_errors="exit_code"><![CDATA[
Rscript '$dada2_script'
]]></command>
<configfiles>
<configfile name="dada2_script"><![CDATA[
#import re
library(Biostrings, quietly=T)
library(ShortRead, quietly=T)

FWD <- "$forward_primer"
REV <- "$reverse_primer"

allOrients <- function(primer) {
# Create all orientations of the input sequence
dna <- DNAString(primer) # The Biostrings works w/ DNAString objects rather than character vectors
orients <- c(Forward = dna, Complement = Biostrings::complement(dna), Reverse = Biostrings::reverse(dna), RevComp = Biostrings::reverseComplement(dna))
return(sapply(orients, toString)) # Convert back to character vector
}
FWD.orients <- allOrients(FWD)
REV.orients <- allOrients(REV)

primerHits <- function(primer, fn) {
## Counts number of reads in which the primer is found
nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE)
return(sum(nhits > 0))
}

df <- NULL;
#for $i, $read in enumerate($paired_cond.reads):
#set elid = re.sub('[^\w\-\.]', '_', str($read.element_identifier))
#if $paired_cond.paired_select == "single"
#set fwd_reads = $read
#elif $paired_cond.paired_select == "separate"
#set fwd_reads = $read
#set rev_reads = $paired_cond.sdaer[i]
#else
#set fwd_reads = $read.forward
#set rev_reads = $read.reverse
#end if
df <- rbind(df, c('$elid', 'FWD', 'FWD', sapply(FWD.orients, primerHits, fn = '$fwd_reads')))
df <- rbind(df, c('$elid', 'REV', 'FWD', sapply(REV.orients, primerHits, fn = '$fwd_reads')))
#if $paired_cond.paired_select != "single"
#if $paired_cond.paired_select == "separate"
#set elid = re.sub('[^\w\-\.]', '_', str($paired_cond.sdaer[i].element_identifier))
#end if
df <- rbind(df, c('$elid', 'FWD', 'REV', sapply(FWD.orients, primerHits, fn = '$rev_reads')))
df <- rbind(df, c('$elid', 'REV', 'REV', sapply(REV.orients, primerHits, fn = '$rev_reads')))
#end if
#end for
colnames(df) <- c('Sample', 'Primer', 'ReadDir', 'Sequence', 'Complement', 'Reverse', 'RevComp')
write.table(df, "$out", quote=F, sep="\t", row.names = F, col.names = T)
]]></configfile>
</configfiles>
<inputs>
<expand macro="fastq_input" multiple="True" collection_type="list:paired" argument_fwd="fl" argument_rev="fl"/>
<param name="forward_primer" type="text" label="Forward primer sequence">
<validator type="empty_field" message="You need to specify a forward primer sequence"/>
</param>
<param name="reverse_primer" type="text" label="Reverse primer sequence">
<validator type="empty_field" message="You need to specify a reverse primer sequence"/>
</param>
</inputs>
<outputs>
<data name="out" format="tabular"/>
</outputs>
<tests>
<!-- paired data in paired collection -->
<test expect_num_outputs="1">
<conditional name="paired_cond">
<param name="paired_select" value="paired"/>
<param name="reads">
<collection type="list:paired">
<element name="F3D0_S188_L001">
<collection type="paired">
<element name="forward" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
<element name="reverse" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
</collection>
</element>
<element name="F3D141_S207_L001">
<collection type="paired">
<element name="forward" value="F3D141_S207_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
<element name="reverse" value="F3D141_S207_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
</collection>
</element>
</collection>
</param>
</conditional>

<param name="forward_primer" value="ACCTGCGGARGGATCA"/>
<param name="reverse_primer" value="GAGATCCRTTGYTRAAAGTT"/>
<output name="out">
<assert_contents>
<has_n_lines n="9"/>
<has_n_columns n="7"/>
</assert_contents>
</output>
</test>
<!-- paired data in separate collection -->
<test expect_num_outputs="1">
<conditional name="paired_cond">
<param name="paired_select" value="separate"/>
<param name="reads" value="F3D0_S188_L001_R1_001.fastq.gz,F3D141_S207_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
<param name="sdaer" value="F3D0_S188_L001_R2_001.fastq.gz,F3D141_S207_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
</conditional>

<param name="forward_primer" value="ACCTGCGGARGGATCA"/>
<param name="reverse_primer" value="GAGATCCRTTGYTRAAAGTT"/>
<output name="out">
<assert_contents>
<has_n_lines n="9"/>
<has_n_columns n="7"/>
</assert_contents>
</output>
</test>
<!-- single end data -->
<test expect_num_outputs="1">
<conditional name="paired_cond">
<param name="paired_select" value="single"/>
<param name="reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
</conditional>
<param name="forward_primer" value="ACCTGCGGARGGATCA"/>
<param name="reverse_primer" value="GAGATCCRTTGYTRAAAGTT"/>
<output name="out">
<assert_contents>
<has_n_lines n="3"/>
<has_n_columns n="7"/>
</assert_contents>
</output>
</test>
</tests>

<help><![CDATA[
Description
...........

Simple check for primer sequences in sequencing data. The tool counts the number
of occurrences of the primer sequence, its complement, the reverse and the
reverse complement.

See also: https://benjjneb.github.io/dada2/ITS_workflow.html#identify-primers

Usage
.....

**Input** is a FASTQ datasets and forward and reverse primer

**Output** a table listing the counts of the different occurrences in the read files.


@HELP_OVERVIEW@
]]></help>
<expand macro="citations"/>
</tool>
Loading