forked from nf-core/modules
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* added topas/gencons tests * finish tests * run prettier * modify test new input gatk ug * Update modules/nf-core/topas/gencons/main.nf Co-authored-by: James A. Fellows Yates <[email protected]> * Apply suggestions from code review Co-authored-by: James A. Fellows Yates <[email protected]> * updated input, output and test after review * Fix meta and Update tests * Prettify --------- Co-authored-by: James A. Fellows Yates <[email protected]> Co-authored-by: Adam Talbot <[email protected]> Co-authored-by: Simon Pearce <[email protected]>
- Loading branch information
Showing
6 changed files
with
312 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
process TOPAS_GENCONS { | ||
tag "$meta.id" | ||
label 'process_single' | ||
|
||
conda "bioconda::topas=1.0.1" | ||
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? | ||
'https://depot.galaxyproject.org/singularity/topas:1.0.1--hdfd78af_1': | ||
'biocontainers/topas:1.0.1--hdfd78af_1' }" | ||
|
||
input: | ||
tuple val(meta), path(vcf) | ||
tuple val(meta2), path(vcf_indels) | ||
tuple val(meta3), path(reference) | ||
tuple val(meta4), path(fai) | ||
val(vcf_output) | ||
|
||
output: | ||
tuple val(meta), path("*.fasta.gz"), emit: fasta | ||
tuple val(meta), path("*.vcf.gz") , emit: vcf , optional: true | ||
tuple val(meta), path("*.ccf") , emit: ccf | ||
tuple val(meta), path("*.log") , emit: log | ||
path "versions.yml" , emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def optionalvcfindels = vcf_indels ? "-indels ${vcf_indels}" : '' | ||
def optionalfai = fai ? "-fai ${fai}" : '' | ||
def vcfoutput = vcf_output ? "-vcf_out ${prefix}.vcf" : "" | ||
def VERSION = '1.0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. | ||
|
||
""" | ||
topas \\ | ||
GenConS \\ | ||
$args \\ | ||
-o ${prefix}.fasta \\ | ||
-snps $vcf \\ | ||
$optionalvcfindels \\ | ||
$vcfoutput \\ | ||
-ref $reference | ||
gzip -n ${prefix}.fasta | ||
if [[ -f ${prefix}.vcf ]];then | ||
gzip -n ${prefix}.vcf | ||
fi | ||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
topas: $VERSION | ||
END_VERSIONS | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
name: "topas_gencons" | ||
description: Create fasta consensus with TOPAS toolkit with options to penalize substitutions for typical DNA damage present in ancient DNA | ||
keywords: | ||
- consensus | ||
- fasta | ||
- ancient DNA | ||
tools: | ||
- "topas": | ||
description: "This toolkit allows the efficient manipulation of sequence data in various ways. It is organized into modules: The FASTA processing modules, the FASTQ processing modules, the GFF processing modules and the VCF processing modules." | ||
homepage: "https://github.com/subwaystation/TOPAS" | ||
documentation: "https://github.com/subwaystation/TOPAS/wiki/Overview-Modules" | ||
tool_dev_url: "https://github.com/subwaystation/TOPAS" | ||
doi: "10.1038/s41598-017-17723-1" | ||
licence: "['CC-BY']" | ||
|
||
input: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- vcf: | ||
type: file | ||
description: Gzipped compressed vcf file generated with GATK UnifiedGenotyper containing the called snps | ||
pattern: "*.vcf.gz" | ||
|
||
- vcf_indels: | ||
type: file | ||
description: Optional gzipped compressed vcf file generated with GATK UnifiedGenotyper containing the called indels | ||
pattern: "*.vcf.gz" | ||
|
||
- reference: | ||
type: file | ||
description: Fasta file of reference genome | ||
pattern: "*.fasta" | ||
|
||
- fai: | ||
type: file | ||
description: Optional index for the fasta file of reference genome | ||
pattern: "*.fai" | ||
|
||
- vcf_output: | ||
type: boolean | ||
description: Boolean value to indicate if a compressed vcf file with the consensus calls included as SNPs should be produced | ||
|
||
output: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. [ id:'test', single_end:false ] | ||
- versions: | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
|
||
- fasta: | ||
type: file | ||
description: Gzipped consensus fasta file with bases under threshold replaced with Ns | ||
pattern: "*.fasta.gz" | ||
|
||
- vcf: | ||
type: file | ||
description: Gzipped vcf file with updated calls for the SNPs used in the consensus generation and for bases under threshold replaced with Ns | ||
pattern: "*.vcf.gz" | ||
|
||
- ccf: | ||
type: file | ||
description: Statistics file containing information about the consensus calls in the fasta file | ||
pattern: "*.ccf" | ||
|
||
- log: | ||
type: file | ||
description: Log file | ||
pattern: "*.log" | ||
|
||
authors: | ||
- "@aidaanva" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
#!/usr/bin/env nextflow | ||
|
||
nextflow.enable.dsl = 2 | ||
|
||
include { GATK_UNIFIEDGENOTYPER as GATK_UNIFIEDGENOTYPERSNPS } from '../../../../modules/nf-core/gatk/unifiedgenotyper/main.nf' | ||
include { GATK_UNIFIEDGENOTYPER as GATK_UNIFIEDGENOTYPERINDELS } from '../../../../modules/nf-core/gatk/unifiedgenotyper/main.nf' | ||
include { TOPAS_GENCONS } from '../../../../../modules/nf-core/topas/gencons/main.nf' | ||
|
||
workflow test_topas_gencons { | ||
|
||
input_gatk = [ [ id:'test' ], // meta map | ||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), | ||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), | ||
] | ||
fasta = [ | ||
[id: 'test'], | ||
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) | ||
] | ||
fai = [ | ||
[id: 'test'], | ||
file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) | ||
] | ||
dict = [ | ||
[id: 'test'], | ||
file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) | ||
] | ||
|
||
GATK_UNIFIEDGENOTYPERSNPS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]]) | ||
|
||
gencons_vcf = GATK_UNIFIEDGENOTYPERSNPS.out.vcf | ||
gencons_vcf_indels = [[],[]] | ||
gencons_fasta =[ [ id:'test' ], // meta map | ||
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) | ||
] | ||
gencons_vcf_output = false | ||
|
||
TOPAS_GENCONS ( gencons_vcf, gencons_vcf_indels, gencons_fasta, [[],[]], gencons_vcf_output) | ||
} | ||
|
||
workflow test_topas_gencons_fai { | ||
|
||
input_gatk = [ [ id:'test' ], // meta map | ||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), | ||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), | ||
] | ||
fasta = [ | ||
[id: 'test'], | ||
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) | ||
] | ||
fai = [ | ||
[id: 'test'], | ||
file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) | ||
] | ||
dict = [ | ||
[id: 'test'], | ||
file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) | ||
] | ||
|
||
GATK_UNIFIEDGENOTYPERSNPS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]]) | ||
|
||
gencons_vcf = GATK_UNIFIEDGENOTYPERSNPS.out.vcf | ||
gencons_vcf_indels = [[],[]] | ||
gencons_fasta =[ [ id:'test' ], // meta map | ||
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) | ||
] | ||
gencons_vcf_output = false | ||
|
||
TOPAS_GENCONS ( gencons_vcf, gencons_vcf_indels, gencons_fasta, fai, gencons_vcf_output) | ||
} | ||
|
||
workflow test_topas_gencons_indels { | ||
|
||
input_gatk = [ [ id:'test' ], // meta map | ||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), | ||
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), | ||
] | ||
fasta = [ | ||
[id: 'test'], | ||
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) | ||
] | ||
fai = [ | ||
[id: 'test'], | ||
file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) | ||
] | ||
dict = [ | ||
[id: 'test'], | ||
file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) | ||
] | ||
|
||
GATK_UNIFIEDGENOTYPERSNPS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]]) | ||
GATK_UNIFIEDGENOTYPERINDELS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]]) | ||
|
||
|
||
gencons_vcf = GATK_UNIFIEDGENOTYPERSNPS.out.vcf | ||
gencons_vcf_indels = GATK_UNIFIEDGENOTYPERINDELS.out.vcf | ||
gencons_fasta =[ [ id:'test' ], // meta map | ||
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) | ||
] | ||
gencons_vcf_output = true | ||
|
||
TOPAS_GENCONS ( gencons_vcf, gencons_vcf_indels, gencons_fasta, [[],[]], gencons_vcf_output) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
process { | ||
|
||
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } | ||
|
||
withName: GATK_UNIFIEDGENOTYPERSNPS { | ||
ext.args = "--genotype_likelihoods_model SNP -stand_call_conf 30 --sample_ploidy 2 -dcov 250 --output_mode EMIT_ALL_SITES" | ||
ext.prefix = { "${meta.id}_snps" } | ||
} | ||
|
||
withName: GATK_UNIFIEDGENOTYPERINDELS { | ||
ext.args = "--genotype_likelihoods_model INDEL -stand_call_conf 30 --sample_ploidy 2 -dcov 250 --output_mode EMIT_ALL_SITES" | ||
ext.prefix = { "${meta.id}_indels" } | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
- name: topas gencons test_topas_gencons | ||
command: nextflow run ./tests/modules/nf-core/topas/gencons -entry test_topas_gencons -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/topas/gencons/nextflow.config | ||
tags: | ||
- topas/gencons | ||
- topas | ||
files: | ||
- path: output/gatk/test_snps.vcf.gz | ||
- path: output/gatk/versions.yml | ||
- path: output/topas/test.fasta.ccf | ||
md5sum: 794d4231ee35302a9816df2c1f595041 | ||
- path: output/topas/test.fasta.gz | ||
md5sum: 1f219a39d28eed85be8a48cb167d5879 | ||
- path: output/topas/test.fasta.log | ||
contains: | ||
- "Parameters chosen: " | ||
- path: output/topas/versions.yml | ||
|
||
- name: topas gencons test_topas_gencons_fai | ||
command: nextflow run ./tests/modules/nf-core/topas/gencons -entry test_topas_gencons_fai -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/topas/gencons/nextflow.config | ||
tags: | ||
- topas/gencons | ||
- topas | ||
files: | ||
- path: output/gatk/test_snps.vcf.gz | ||
- path: output/gatk/versions.yml | ||
- path: output/topas/test.fasta.ccf | ||
md5sum: 794d4231ee35302a9816df2c1f595041 | ||
- path: output/topas/test.fasta.gz | ||
md5sum: 1f219a39d28eed85be8a48cb167d5879 | ||
- path: output/topas/test.fasta.log | ||
contains: | ||
- "Parameters chosen: " | ||
- path: output/topas/versions.yml | ||
|
||
- name: topas gencons test_topas_gencons_indels | ||
command: nextflow run ./tests/modules/nf-core/topas/gencons -entry test_topas_gencons_indels -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/topas/gencons/nextflow.config | ||
tags: | ||
- topas/gencons | ||
- topas | ||
files: | ||
- path: output/gatk/test_indels.vcf.gz | ||
- path: output/gatk/test_snps.vcf.gz | ||
- path: output/gatk/versions.yml | ||
- path: output/topas/test.fasta.ccf | ||
md5sum: f7337d6e701ab7c5b3c8654742e5116a | ||
- path: output/topas/test.fasta.gz | ||
md5sum: 1f219a39d28eed85be8a48cb167d5879 | ||
- path: output/topas/test.fasta.log | ||
contains: | ||
- "Parameters chosen: " | ||
- path: output/topas/test.vcf.gz | ||
contains: | ||
- "##FILTER=" | ||
- path: output/topas/versions.yml |