galaxyproject · bgruening · Dec 7, 2024 · Dec 6, 2024 · Dec 7, 2024 · Dec 7, 2024
@@ -0,0 +1,162 @@
+<tool id="dada2_primerCheck" name="dada2: primer check" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tools"/>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_command"/>
+    <command detect_errors="exit_code"><![CDATA[
+Rscript '$dada2_script'
+    ]]></command>
+    <configfiles>
+        <configfile name="dada2_script"><![CDATA[
+#import re
+library(Biostrings, quietly=T)
+library(ShortRead, quietly=T)
+
+FWD <- "$forward_primer"
+REV <- "$reverse_primer"
+
+allOrients <- function(primer) {
+    # Create all orientations of the input sequence
+    dna <- DNAString(primer)  # The Biostrings works w/ DNAString objects rather than character vectors
+    orients <- c(Forward = dna, Complement = Biostrings::complement(dna), Reverse = Biostrings::reverse(dna), RevComp = Biostrings::reverseComplement(dna))
+    return(sapply(orients, toString))  # Convert back to character vector
+}
+FWD.orients <- allOrients(FWD)
+REV.orients <- allOrients(REV)
+
+primerHits <- function(primer, fn) {
+    ## Counts number of reads in which the primer is found
+    nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE)
+    return(sum(nhits > 0))
+}
+
+df <- NULL;
+#for $i, $read in enumerate($paired_cond.reads):
+    #set elid = re.sub('[^\w\-\.]', '_', str($read.element_identifier))
+    #if $paired_cond.paired_select == "single"
+        #set fwd_reads = $read
+    #elif $paired_cond.paired_select == "separate"
+        #set fwd_reads = $read
+        #set rev_reads = $paired_cond.sdaer[i]
+    #else
+        #set fwd_reads = $read.forward
+        #set rev_reads = $read.reverse
+    #end if
+    df <- rbind(df, c('$elid', 'FWD', 'FWD', sapply(FWD.orients, primerHits, fn = '$fwd_reads')))
+    df <- rbind(df, c('$elid', 'REV', 'FWD', sapply(REV.orients, primerHits, fn = '$fwd_reads')))
+    #if $paired_cond.paired_select != "single"
+        #if $paired_cond.paired_select == "separate"
+            #set elid = re.sub('[^\w\-\.]', '_', str($paired_cond.sdaer[i].element_identifier))
+        #end if
+        df <- rbind(df, c('$elid', 'FWD', 'REV', sapply(FWD.orients, primerHits, fn = '$rev_reads')))
+        df <- rbind(df, c('$elid', 'REV', 'REV', sapply(REV.orients, primerHits, fn = '$rev_reads')))
+    #end if
+#end for
+colnames(df) <- c('Sample', 'Primer', 'ReadDir', 'Sequence', 'Complement', 'Reverse', 'RevComp')
+write.table(df, "$out", quote=F, sep="\t", row.names = F, col.names = T)
+    ]]></configfile>
+    </configfiles>
+    <inputs>
+        <expand macro="fastq_input" multiple="True" collection_type="list:paired" argument_fwd="fl" argument_rev="fl"/>
+        <param name="forward_primer" type="text" label="Forward primer sequence">
+            <validator type="empty_field" message="You need to specify a forward primer sequence"/>
+        </param>
+        <param name="reverse_primer" type="text" label="Reverse primer sequence">
+            <validator type="empty_field" message="You need to specify a reverse primer sequence"/>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="out" format="tabular"/>
+    </outputs>
+    <tests>
+        <!-- paired data in paired collection -->
+        <test expect_num_outputs="1">
+            <conditional name="paired_cond">
+                <param name="paired_select" value="paired"/>
+                <param name="reads">
+                    <collection type="list:paired">
+                        <element name="F3D0_S188_L001">
+                            <collection type="paired">
+                                <element name="forward" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
+                                <element name="reverse" value="F3D0_S188_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
+                            </collection>
+                        </element>
+                        <element name="F3D141_S207_L001">
+                            <collection type="paired">
+                                <element name="forward" value="F3D141_S207_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
+                                <element name="reverse" value="F3D141_S207_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
+                            </collection>
+                        </element>
+                    </collection>
+                </param>
+            </conditional>
+
+            <param name="forward_primer" value="ACCTGCGGARGGATCA"/>
+            <param name="reverse_primer" value="GAGATCCRTTGYTRAAAGTT"/>
+            <output name="out">
+                <assert_contents>
+                    <has_n_lines n="9"/>
+                    <has_n_columns n="7"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- paired data in separate collection -->
+        <test expect_num_outputs="1">
+            <conditional name="paired_cond">
+                <param name="paired_select" value="separate"/>
+                <param name="reads" value="F3D0_S188_L001_R1_001.fastq.gz,F3D141_S207_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
+                <param name="sdaer" value="F3D0_S188_L001_R2_001.fastq.gz,F3D141_S207_L001_R2_001.fastq.gz" ftype="fastqsanger.gz"/>
+            </conditional>
+
+            <param name="forward_primer" value="ACCTGCGGARGGATCA"/>
+            <param name="reverse_primer" value="GAGATCCRTTGYTRAAAGTT"/>
+            <output name="out">
+                <assert_contents>
+                    <has_n_lines n="9"/>
+                    <has_n_columns n="7"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- single end data -->
+        <test expect_num_outputs="1">
+            <conditional name="paired_cond">
+                <param name="paired_select" value="single"/>
+                <param name="reads" value="F3D0_S188_L001_R1_001.fastq.gz" ftype="fastqsanger.gz"/>
+            </conditional>
+            <param name="forward_primer" value="ACCTGCGGARGGATCA"/>
+            <param name="reverse_primer" value="GAGATCCRTTGYTRAAAGTT"/>
+            <output name="out">
+                <assert_contents>
+                    <has_n_lines n="3"/>
+                    <has_n_columns n="7"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+Description
+...........
+
+Simple check for primer sequences in sequencing data. The tool counts the number
+of occurrences of the primer sequence, its complement, the reverse and the
+reverse complement.
+
+See also: https://benjjneb.github.io/dada2/ITS_workflow.html#identify-primers
+
+Usage
+.....
+
+**Input** is a FASTQ datasets and forward and reverse primer
+
+**Output** a table listing the counts of the different occurrences in the read files.
+
+
+@HELP_OVERVIEW@
+    ]]></help>
+    <expand macro="citations"/>
+</tool>