diff --git a/tools/cutadapt/cutadapt.xml b/tools/cutadapt/cutadapt.xml
index 273f1c5def3..efa06f8f938 100644
--- a/tools/cutadapt/cutadapt.xml
+++ b/tools/cutadapt/cutadapt.xml
@@ -7,17 +7,18 @@
cutadapt --version
-
0:
--minimum-length=$filter_options.minimum_length
#end if
-#if str($filter_options.maximum_length) and str($library.type) != "single" and str($library.maximum_length2) != '':
- --maximum-length=$filter_options.maximum_length:$library.maximum_length2
-#else if str($filter_options.maximum_length):
+#if $paired and str($filter_options.maximum_length2):
+ --maximum-length=$filter_options.maximum_length:$filter_options.maximum_length2
+#elif str($filter_options.maximum_length):
--maximum-length=$filter_options.maximum_length
#end if
#if str($filter_options.max_n):
--max-n=$filter_options.max_n
#end if
-#if str( $library.type ) != "single":
- #if $filter_options.pair_filter:
- --pair-filter=$filter_options.pair_filter
- #end if
-#end if
#if str($filter_options.max_expected_errors):
- --max-expected-errors=$filter_options.max_expected_errors
+ --max-ee=$filter_options.max_expected_errors
+#end if
+#if str($filter_options.max_average_error_rate):
+ --max-aer=$filter_options.max_average_error_rate
+#end if
+$filter_options.discard_casava
+#if $paired and str($filter_options.pair_filter) != 'any':
+ --pair-filter=$filter_options.pair_filter
#end if
-$filter_options.discard_cassava
#if $input_1.ext.startswith("fastqillumina") or $input_1.ext.startswith("fastqsolexa")
--quality-base=64
#end if
-#if str($read_mod_options.quality_cutoff) != '0':
- --quality-cutoff=$read_mod_options.quality_cutoff
-#end if
-#if str($read_mod_options.nextseq_trim) != '0':
- --nextseq-trim=$read_mod_options.nextseq_trim
-#end if
-$read_mod_options.trim_n
-$read_mod_options.poly_a
#if $read_mod_options.strip_suffix != ''
- --strip-suffix $read_mod_options.strip_suffix
-#end if
-#if str($read_mod_options.shorten_options.shorten_values) == 'True':
- #if str($read_mod_options.shorten_options.shorten_end) == '3prime'
- --length=$read_mod_options.shorten_options.length
- #else
- --length=-$read_mod_options.shorten_options.length
- #end if
+ --strip-suffix='$read_mod_options.strip_suffix'
#end if
#if str($read_mod_options.length_tag) != '':
--length-tag='$read_mod_options.length_tag'
@@ -172,13 +198,49 @@ $read_mod_options.poly_a
#end if
$read_mod_options.zero_cap
+## Outputs handling
+#if 'json_stats' in $output_selector:
+ --json=stats.json
+#end if
+#if 'info_file' in $output_selector:
+ --info-file='$info_file'
+#end if
+#if 'rest_file' in $output_selector:
+ -r='${rest_output}'
+#end if
+#if 'wildcard_file' in $output_selector:
+ --wildcard-file='${wild_output}'
+#end if
+#if 'too_short_file' in $output_selector:
+ --too-short-output='${too_short_output}'
+ #if $paired:
+ --too-short-paired-output='${too_short_paired_output}'
+ #end if
+#end if
+#if 'too_long_file' in $output_selector:
+ --too-long-output='${too_long_output}'
+ #if $paired:
+ --too-long-paired-output='${too_long_paired_output}'
+ #end if
+#end if
+#if 'untrimmed_file' in $output_selector:
+ --untrimmed-output='${untrimmed_output}'
+ #if $paired:
+ --untrimmed-paired-output='${untrimmed_paired_output}'
+ #end if
+#end if
+#if not $paired and 'multiple_output' in $output_selector:
+ -o 'split/{name}.${input_1.ext}'
+#else:
+ -o '$out1'
+ #if $paired:
+ -p '$out2'
+ #end if
+#end if
-'${read1}'
+'$read1'
#if $paired:
- '${read2}'
- #if $library.r2.quality_cutoff2:
- -Q=$library.r2.quality_cutoff2
- #end if
+ '$read2'
#end if
#if 'report' in $output_selector:
@@ -186,7 +248,6 @@ $read_mod_options.zero_cap
#end if
]]>
-
@@ -194,27 +255,24 @@ $read_mod_options.zero_cap
-
-
+
-
-
+
+
-
-
+
+
-
-
-
+
@@ -227,32 +285,19 @@ $read_mod_options.zero_cap
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+ [0-9]+(,[0-9])?
-
-
-
-
-
-
-
-
-
+
@@ -261,7 +306,6 @@ $read_mod_options.zero_cap
-
@@ -277,6 +321,28 @@ $read_mod_options.zero_cap
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -304,9 +370,8 @@ $read_mod_options.zero_cap
-
-
+
@@ -318,40 +383,36 @@ $read_mod_options.zero_cap
-
- library['type'] != 'paired_collection' and 'multiple_output' not in output_selector
+ library['type'] != 'paired_collection'
+ not output_selector or 'multiple_output' not in output_selector
-
- library['type'] == 'paired' and 'multiple_output' not in output_selector
+ library['type'] == 'paired'
+ not output_selector or 'multiple_output' not in output_selector
-
- library['type'] == 'paired_collection' and 'multiple_output' not in output_selector
+ library['type'] == 'paired_collection'
+ not output_selector or 'multiple_output' not in output_selector
-
output_selector and 'report' in output_selector
-
+
output_selector and 'info_file' in output_selector
-
output_selector and 'rest_file' in output_selector
-
output_selector and 'wildcard_file' in output_selector
-
output_selector and 'untrimmed_file' in output_selector
@@ -361,7 +422,6 @@ $read_mod_options.zero_cap
output_selector and 'untrimmed_file' in output_selector
-
output_selector and 'too_short_file' in output_selector
@@ -388,7 +448,6 @@ $read_mod_options.zero_cap
output_selector and 'multiple_output' in output_selector
-
@@ -444,10 +503,9 @@ $read_mod_options.zero_cap
-
-
+
@@ -513,6 +571,39 @@ $read_mod_options.zero_cap
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -526,8 +617,8 @@ $read_mod_options.zero_cap
-
-
+
+
@@ -542,7 +633,9 @@ $read_mod_options.zero_cap
-
+
+
+
@@ -566,8 +659,6 @@ $read_mod_options.zero_cap
-
-
@@ -599,7 +690,6 @@ $read_mod_options.zero_cap
-
@@ -620,7 +710,6 @@ $read_mod_options.zero_cap
-
@@ -682,14 +771,15 @@ $read_mod_options.zero_cap
-
+
-
+
+
@@ -706,7 +796,9 @@ $read_mod_options.zero_cap
-
+
+
+
@@ -721,7 +813,9 @@ $read_mod_options.zero_cap
-
+
+
+
@@ -833,10 +927,10 @@ $read_mod_options.zero_cap
-
+
@@ -846,7 +940,7 @@ $read_mod_options.zero_cap
-
+
@@ -866,11 +960,11 @@ $read_mod_options.zero_cap
-
+
@@ -899,17 +993,17 @@ $read_mod_options.zero_cap
-
+
-
+
@@ -933,18 +1027,18 @@ $read_mod_options.zero_cap
-
+
-
+
@@ -960,13 +1054,16 @@ $read_mod_options.zero_cap
-
+
+
+
+
@@ -980,13 +1077,16 @@ $read_mod_options.zero_cap
-
+
+
+
+
@@ -1107,7 +1207,6 @@ $read_mod_options.zero_cap
-
@@ -1123,6 +1222,9 @@ $read_mod_options.zero_cap
+
+
+
@@ -1131,7 +1233,7 @@ $read_mod_options.zero_cap
-
+
@@ -1144,7 +1246,9 @@ $read_mod_options.zero_cap
-
+
+
+
@@ -1198,13 +1302,14 @@ $read_mod_options.zero_cap
-
+
+
+
-
@@ -1212,7 +1317,6 @@ $read_mod_options.zero_cap
-
@@ -1225,31 +1329,45 @@ $read_mod_options.zero_cap
-
+
+
+
+
+
+
+
-
+
+
+
+
+
+
+
-
-
+
+
+
+
-
+
-
@@ -1289,7 +1406,6 @@ $read_mod_options.zero_cap
-
@@ -1346,25 +1462,32 @@ $read_mod_options.zero_cap
**What it does**
--------------------
-
**Cutadapt** finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads.
Cleaning your data in this way is often required: Reads from small-RNA sequencing contain the 3’ sequencing adapter because the read is longer than the molecule that is sequenced, such as in microRNA, or CRISPR data, or Poly-A tails that are useful for pulling out RNA from your sample but often you don’t want them to be in your reads.
-Cutadapt_ helps with these trimming tasks by finding the adapter or primer sequences in an error-tolerant way. It can also modify and filter reads in various ways. Cutadapt searches for the adapter in all reads and removes it when it finds it. Unless you use a filtering option, all reads that were present in the input file will also be present in the output file, some of them trimmed, some of them not. Even reads that were trimmed entirely (because the adapter was found in the very beginning) are output. All of this can be changed with options in the tool form above.
+Cutadapt helps with these trimming tasks by finding the adapter or primer sequences in an error-tolerant way. It can also modify and filter reads in various ways. Cutadapt searches for the adapter in all reads and removes it when it finds it. Unless you use a filtering option, all reads that were present in the input file will also be present in the output file, some of them trimmed, some of them not. Even reads that were trimmed entirely (because the adapter was found in the very beginning) are output. All of this can be changed with options in the tool form above.
-The tool is based on the **Open Source** Cutadapt_ tool. See the complete `Cutadapt documentation`_ for additional details. If you use Cutadapt, please cite *Marcel, 2011* under **Citations** below.
+See the complete `Cutadapt documentation`_ for additional details.
--------------------
+If you use Cutadapt, please cite *Marcel, 2011* under **Citations** below.
-**Inputs**
+-----
--------------------
+Input Sequences
+***************
-Input files for Cutadapt need to be:
+Accepted input formats for the tool are:
-- FASTQ.GZ, FASTQ.BZ2, FASTQ or FASTA
+- FASTQ.GZ
+- FASTQ.BZ2
+- FASTQ or
+- FASTA
+
+-----
+
+Specifying Adapters
+*******************
To trim an adapter, input the ADAPTER sequence in plain text or in a FASTA file e.g. AACCGGTT (with the characters: **$**, **^**, **...**, if anchored or linked).
@@ -1416,12 +1539,44 @@ AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
The adapter sequences can be found in the document `Illumina TruSeq Adapters De-Mystified`_.
+
-----------
-**Outputs**
+**Paired Adapters**
-----------
+Normally, the tool looks for adapters on R1 and R2 reads independently. That is, the best matching R1 adapter of each type (3' End, 5' End, Anywhere) is removed from R1 and the best matching R2 adapter of each type is removed from R2.
+
+To change this, you can use the **Pairwise adapter search** (--pair-adapters) option, which causes each R1 adapter to be paired up with its corresponding R2 adapter. The first R1 adapter of a given type that you specify will be paired up with the first R2 adapter of that type, and so on. The adapters are then always removed in pairs from a read pair.
+
+For example, if you specify the following two 3'-end adapters for the R1 reads:
+
+- ``AAAAA``
+- ``GGGGG``
+
+and these two 3'-end adapters for the R2 reads:
+
+- ``CCCC``
+- ``TTTT``
+
+then, with this option enabled, the tool will trim a pair of reads only if:
+
+- either ``AAAAA`` is found in R1 and ``CCCCC`` is found in R2,
+- or ``GGGG`` is found in R1 and ``TTTT`` is found in R2.
+
+Two limitations exist in this mode:
+
+1. You need to provide equal numbers of R1 and R2 adapters of each type to allow pair formation, or the tool run will fail.
+2. The algorithm identifies the best-matching R1 adapter first and then checks whether it can find its corresponding R2 adapter. If not, the read pair remains unchanged, even though it is, in theory, possible that a different R1 adapter that does not fit as well would have had a corresponding R2 adapter present, i.e., some legitimate adapter pairs might remain unhandled.
+
+This mode is useful, for example, for `demultiplexing Illumina unique dual indices (UDIs)`_.
+
+-----
+
+Outputs
+*******
+
- Trimmed reads
Optionally, under **Output Options** you can choose to output
@@ -1429,10 +1584,13 @@ Optionally, under **Output Options** you can choose to output
* Report
* Info file
+-----------
**Report**
-Cutadapt can output per-adapter statistics if you select to output the report above.
+-----------
+
+Cutadapt can output per-adapter statistics if you select to generate the report above.
Example:
@@ -1461,9 +1619,12 @@ Example:
Read 1: 24,090 bp
Read 2: 24,081 bp
+-----------
**Info file**
+-----------
+
The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file.
Columns contain the following data:
@@ -1493,13 +1654,10 @@ When parsing the file, be aware that additional columns may be added in the futu
If the --times option is used and greater than 1, each read can appear more than once in the info file. There will be one line for each found adapter, all with identical read names. Only for the first of those lines will the concatenation of columns 5-7 be identical to the original read sequence (and accordingly for columns 9-11). For subsequent lines, the shown sequence are the ones that were used in subsequent rounds of adapter trimming, that is, they get successively shorter.
-
---------------------
-
-**Rename Reads**
-
--------------------
+Renaming Reads
+**************
The --rename option expects a template string such as {id} extra_info {adapter_name} as a parameter. It can contain regular text and placeholders that consist of a name enclosed in curly braces ({placeholdername}).
@@ -1523,27 +1681,17 @@ For paired-end data, the placeholder {rn} is available (“read number”), and
In addition, it is possible to write a placeholder as {r1.placeholdername} or {r2.placeholdername}, which always takes the replacement value from R1 or R2, respectively.
The {r1.placeholder} and {r2.placeholder} notation is available for all placeholders except {rn} and {id} because the read ID needs to be identical for both reads.
---------------------
-
-**More Information**
-
---------------------
-
-See the excellent `Cutadapt documentation`_
-
-.. _Cutadapt: https://cutadapt.readthedocs.io/en/stable/
-.. _`Cutadapt documentation`: https://cutadapt.readthedocs.io/en/latest/index.html
-.. _`Illumina TruSeq Adapters De-Mystified`: http://tucf-genomics.tufts.edu/documents/protocols/TUCF_Understanding_Illumina_TruSeq_Adapters.pdf
-
-
---------------------
+-----
**Galaxy Wrapper Development**
---------------------
+Original author: Lance Parsons
-Author: Lance Parsons
+-----
+.. _`Cutadapt documentation`: https://cutadapt.readthedocs.io
+.. _`Illumina TruSeq Adapters De-Mystified`: http://tucf-genomics.tufts.edu/documents/protocols/TUCF_Understanding_Illumina_TruSeq_Adapters.pdf
+.. _`demultiplexing Illumina unique dual indices (UDIs)`: https://cutadapt.readthedocs.io/en/stable/guide.html#unique-dual-indices
]]>
diff --git a/tools/cutadapt/macros.xml b/tools/cutadapt/macros.xml
index 297db15cb85..8ce4da5419c 100644
--- a/tools/cutadapt/macros.xml
+++ b/tools/cutadapt/macros.xml
@@ -1,9 +1,9 @@
4.8
- 0
+ 1fastq.bz2,fastq.gz,fastq,fasta.bz2,fasta.gz,fasta
-
+ topic_0632
@@ -25,237 +25,127 @@
cutadapt
-
#if $a.adapter_source.adapter_source_list == 'builtin':
$ADAPTER_ARGUMENT '${a.adapter_source.adapter.fields.name}'='${a.adapter_source.adapter}${a.single_noindels}'
- #else if $a.adapter_source.adapter_source_list == 'file':
+ #elif $a.adapter_source.adapter_source_list == 'file':
$ADAPTER_ARGUMENT file:'${a.adapter_source.adapter_file}${a.single_noindels}'
- #else if str($a.adapter_source.adapter_name) != "":
+ #elif str($a.adapter_source.adapter_name) != "":
$ADAPTER_ARGUMENT '${a.adapter_source.adapter_name}'='${a.adapter_source.adapter}${a.single_noindels}'
- #else
+ #else:
$ADAPTER_ARGUMENT '${a.adapter_source.adapter}${a.single_noindels}'
- #end if
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
-
-
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- [0-9]+(,[0-9])?
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
diff --git a/tools/cutadapt/test-data/cutadapt_out1_pair_adapters.fq.gz b/tools/cutadapt/test-data/cutadapt_out1_pair_adapters.fq.gz
new file mode 100644
index 00000000000..1e0f474fd9d
Binary files /dev/null and b/tools/cutadapt/test-data/cutadapt_out1_pair_adapters.fq.gz differ
diff --git a/tools/cutadapt/test-data/cutadapt_out2_pair_adapters.fq.gz b/tools/cutadapt/test-data/cutadapt_out2_pair_adapters.fq.gz
new file mode 100644
index 00000000000..1f5b16886ae
Binary files /dev/null and b/tools/cutadapt/test-data/cutadapt_out2_pair_adapters.fq.gz differ
diff --git a/tools/cutadapt/test-data/cutadapt_rest.json b/tools/cutadapt/test-data/cutadapt_rest_json.txt
similarity index 79%
rename from tools/cutadapt/test-data/cutadapt_rest.json
rename to tools/cutadapt/test-data/cutadapt_rest_json.txt
index 63a84f58e11..108a32e9cd3 100644
--- a/tools/cutadapt/test-data/cutadapt_rest.json
+++ b/tools/cutadapt/test-data/cutadapt_rest_json.txt
@@ -1,23 +1,3 @@
-{
- "tag": "Cutadapt report",
- "schema_version": [0, 1],
- "cutadapt_version": "3.5",
- "python_version": "3.9.7",
- "command_line_arguments": [
- "-j=1",
- "--json",
- "stats.json",
- "-a",
- "AAAGATG",
- "--rest-file=rest_output.fa",
- "--output=out1.fa",
- "--error-rate=0.1",
- "--times=1",
- "--overlap=3",
- "--action=trim",
- "cutadapt_rest_fa.fa"
- ],
- "cores": 1,
"input": {
"path1": "cutadapt_rest_fa.fa",
"path2": null,
@@ -26,7 +6,7 @@
"read_counts": {
"input": 5,
"filtered": {
- "too_short": null,
+ "too_short": 0,
"too_long": null,
"too_many_n": null,
"too_many_expected_errors": null,
@@ -87,4 +67,3 @@
"adapters_read2": null,
"poly_a_trimmed_read1": null,
"poly_a_trimmed_read2": null
-}