diff --git a/tools/qc/fastq_utils/.shed.yml b/tools/qc/fastq_utils/.shed.yml new file mode 100644 index 00000000..839dfe62 --- /dev/null +++ b/tools/qc/fastq_utils/.shed.yml @@ -0,0 +1,21 @@ +name: fastq_utils +owner: ebi-gxa +description: "Set of tools for handling fastq files" +long_description: "fastq_utils is a set of Linux utilities to validate and manipulate fastq files. + It also includes a set of programs to preprocess barcodes (namely UMIs, + cells and samples), add the barcodes as tags in BAM files and count UMIs." +homepage_url: https://github.com/nunofonseca/fastq_utils +remote_repository_url: https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/qc/fastq_utils +type: unrestricted +categories: +- Transcriptomics +- RNA +auto_tool_repositories: + name_template: "{{ tool_id }}" + description_template: "Set of tools for handling fastq files: {{ tool_name }}" +suite: + name: "suite_fastq_utils" + description: "Set of tools for handling fastq files" + long_description: "fastq_utils is a set of Linux utilities to validate and manipulate fastq files. + It also includes a set of programs to preprocess barcodes (namely UMIs, + cells and samples), add the barcodes as tags in BAM files and count UMIs." diff --git a/tools/qc/fastq_utils/fastq_pre_barcodes.xml b/tools/qc/fastq_utils/fastq_pre_barcodes.xml new file mode 100755 index 00000000..985835d9 --- /dev/null +++ b/tools/qc/fastq_utils/fastq_pre_barcodes.xml @@ -0,0 +1,227 @@ + + Preprocesses the reads to move the barcodes (UMI, Cell, ...) to the respective readname, optionally discarding reads with bases in the barcode regions below a given threshold. + + fastq_utils + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/qc/fastq_utils/get_test_data.sh b/tools/qc/fastq_utils/get_test_data.sh new file mode 100644 index 00000000..080a4289 --- /dev/null +++ b/tools/qc/fastq_utils/get_test_data.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +BASE_LINK="https://raw.githubusercontent.com/nunofonseca/fastq_utils/master/tests" + +BAR11_FILE="barcode_test_1.fastq.gz" +BAR12_FILE="barcode_test_2.fastq.gz" +BAR21_FILE="barcode_test2_1.fastq.gz" +BAR22_FILE="barcode_test2_2.fastq.gz" +INTER_FILE="inter.fastq.gz" + +BAR11_LINK=$BASE_LINK"/"$BAR11_FILE +BAR12_LINK=$BASE_LINK"/"$BAR12_FILE +BAR21_LINK=$BASE_LINK"/"$BAR21_FILE +BAR22_LINK=$BASE_LINK"/"$BAR22_FILE +INTER_LINK=$BASE_LINK"/"$INTER_FILE + +function get_data { + local link=$1 + local fname=$2 + + if [ ! -f $fname ]; then + echo "$fname not available locally, downloading.." + wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link + fi +} + +# Get test data +pushd test-data + +get_data $BAR11_LINK $BAR11_FILE +get_data $BAR12_LINK $BAR12_FILE +get_data $BAR21_LINK $BAR21_FILE +get_data $BAR22_LINK $BAR22_FILE +get_data $INTER_LINK $INTER_FILE