Skip to content

Commit

Permalink
Added module trgt/genotype (#7002)
Browse files Browse the repository at this point in the history
* Imported genotype module from trgt branch

* Fixed test issues

---------

Co-authored-by: Sateesh_Peri <[email protected]>
  • Loading branch information
Schmytzi and sateeshperi authored Nov 16, 2024
1 parent 21f230b commit 484afd1
Show file tree
Hide file tree
Showing 5 changed files with 513 additions and 0 deletions.
7 changes: 7 additions & 0 deletions modules/nf-core/trgt/genotype/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::trgt=1.2.0"
56 changes: 56 additions & 0 deletions modules/nf-core/trgt/genotype/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process TRGT_GENOTYPE {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/trgt:1.2.0--h9ee0642_0':
'biocontainers/trgt:1.2.0--h9ee0642_0' }"

input:
tuple val(meta) , path(bam), path(bai), val(karyotype)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(repeats)

output:
tuple val(meta), path("*.vcf.gz") , emit: vcf
tuple val(meta), path("*.spanning.bam"), emit: bam , optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def karyo = karyotype ? "--karyotype ${karyotype}" : ""
"""
trgt genotype \\
$args \\
--genome ${fasta} \\
--reads ${bam} \\
--repeats ${repeats} \\
${karyo} \\
--threads ${task.cpus} \\
--output-prefix ${prefix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
trgt: \$(trgt --version |& sed '1!d ; s/trgt //')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.spanning.bam
echo "" | gzip > ${prefix}.vcf.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
trgt: \$(trgt --version |& sed '1!d ; s/trgt //')
END_VERSIONS
"""
}
98 changes: 98 additions & 0 deletions modules/nf-core/trgt/genotype/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "trgt_genotype"
description: Tandem repeat genotyping from PacBio HiFi data
keywords:
- repeat expansion
- pacbio
- genomics
tools:
- "trgt":
description: "Tandem repeat genotyping and visualization from PacBio HiFi data"
homepage: "https://github.com/PacificBiosciences/trgt"
documentation: "https://github.com/PacificBiosciences/trgt/blob/main/docs/tutorial.md"
tool_dev_url: "https://github.com/PacificBiosciences/trgt"
doi: "10.1038/s41587-023-02057-3"
licence:
["Pacific Biosciences Software License (https://github.com/PacificBiosciences/trgt/blob/main/LICENSE.md)"]

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- bam:
type: file
description: Sorted BAM file
pattern: "*.bam"
- bai:
type: file
description: "Index of the BAM file"
pattern: "*.bam"
- karyotype:
type: string
description: "Karyotype of the sample. Either XX or XY. Defaults to XX if not given"
enum:
- XX
- XY
- - meta2:
type: map
description: |
Groovy map containing reference information
e.g. `[ id: 'genome' ]`
- fasta:
type: file
description: "FASTA reference file"
pattern: "*.{fasta,fa,fna}"
- - meta3:
type: map
description: |
Groovy map containing reference information
e.g. `[ id: 'genome' ]`
- fai:
type: file
description: "Index for FASTA file"
pattern: "*.fai"
- - meta4:
type: map
description: |
Groovy map containing repeat information
e.g. `[ id: 'repeats' ]`
- repeats:
type: file
description: "BED file with repeat coordinates"
pattern: "*.bed"

output:
- vcf:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.vcf.gz":
type: file
description: "VCF file with repeat genotypes"
pattern: "*.vcf.gz"
- bam:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.spanning.bam":
type: file
description: BAM file with pieces of reads aligning to repeats
pattern: "*.spanning.bam"
- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@Schmytzi"
- "@fellen31"
maintainers:
- "@Schmytzi"
154 changes: 154 additions & 0 deletions modules/nf-core/trgt/genotype/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// nf-core modules test trgt
nextflow_process {

name "Test Process TRGT_GENOTYPE"
script "../main.nf"
process "TRGT_GENOTYPE"

tag "modules"
tag "modules_nfcore"
tag "trgt"
tag "trgt/genotype"
tag "samtools/faidx"
tag "gunzip"

setup {
run("GUNZIP"){
script "../../../gunzip/main.nf"
process {
"""
input[0] = [
[ id : 'chr22' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22_chr22_KI270734v1_random/sequence/genome.fa.gz', checkIfExists: true)
]
"""
}
}
run("SAMTOOLS_FAIDX"){
script "../../../samtools/faidx/main.nf"
process {
"""
input[0] = GUNZIP.out.gunzip
input[1] = [[],[]]
"""
}
}
}

test("homo sapiens - [bam,bai,XX], [fa,fai], bed") {

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
'XX'
]
input[1] = GUNZIP.out.gunzip
input[2] = SAMTOOLS_FAIDX.out.fai
input[3] = Channel.of('chr22\t18890357\t18890451\tID=TEST;MOTIFS=AT;STRUC=(AT)n')
.collectFile(name : 'repeats.bed', newLine: false)
.map { file -> [ [ id : 'chr22' ], file ] }
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("homo sapiens - [bam,bai,[]], [fa,fai], bed") {

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
[]
]
input[1] = GUNZIP.out.gunzip
input[2] = SAMTOOLS_FAIDX.out.fai
input[3] = Channel.of('chr22\t18890357\t18890451\tID=TEST;MOTIFS=AT;STRUC=(AT)n')
.collectFile(name : 'repeats.bed', newLine: false)
.map { file -> [ [ id : 'chr22' ], file ] }
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("homo sapiens - [bam,bai,XX], [fa,fai], bed - stub") {
options "-stub"

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
'XX'
]
input[1] = GUNZIP.out.gunzip
input[2] = SAMTOOLS_FAIDX.out.fai
input[3] = Channel.of('chr22\t18890357\t18890451\tID=TEST;MOTIFS=AT;STRUC=(AT)n')
.collectFile(name : 'repeats.bed', newLine: false)
.map { file -> [ [ id : 'chr22' ], file ] }
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("homo sapiens - [bam,bai,[]], [fa,fai], bed - stub") {

when {
process {
"""
input[0] = [
[ id:'test' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
[]
]
input[1] = GUNZIP.out.gunzip
input[2] = SAMTOOLS_FAIDX.out.fai
input[3] = Channel.of('chr22\t18890357\t18890451\tID=TEST;MOTIFS=AT;STRUC=(AT)n')
.collectFile(name : 'repeats.bed', newLine: false)
.map { file -> [ [ id : 'chr22' ], file ] }
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}
}
Loading

0 comments on commit 484afd1

Please sign in to comment.