Skip to content

Commit

Permalink
Added module trgt/merge (#7012)
Browse files Browse the repository at this point in the history
* Imported module merge from branch trgt

* Removed dump channels

Co-authored-by: Sateesh_Peri <[email protected]>

* Fixed indentation

Co-authored-by: Sateesh_Peri <[email protected]>

* Updated tests to use params

* Delete modules/nf-core/trgt/merge/tests/tags.yml

don't need this yml anymore

---------

Co-authored-by: Sateesh_Peri <[email protected]>
  • Loading branch information
Schmytzi and sateeshperi authored Nov 18, 2024
1 parent 7ed0aec commit 6ba546c
Show file tree
Hide file tree
Showing 6 changed files with 432 additions and 0 deletions.
6 changes: 6 additions & 0 deletions modules/nf-core/trgt/merge/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::trgt=1.2.0"
58 changes: 58 additions & 0 deletions modules/nf-core/trgt/merge/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
process TRGT_MERGE {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/trgt:1.2.0--h9ee0642_0':
'biocontainers/trgt:1.2.0--h9ee0642_0' }"

input:
tuple val(meta) , path(vcfs), path(tbis)
tuple val(meta2), path(fasta) // optional
tuple val(meta3), path(fai) // optional

output:
tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
"vcf"
def output = args.contains("--output ") || args.contains("--output=") || args.contains("-o ") ? "" : "--output ${prefix}.${extension}"
def reference = fasta ? "--genome ${fasta}" : ""

"""
trgt merge \\
$args \\
$reference \\
$output \\
--vcf ${vcfs}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
trgt: \$(trgt --version |& sed '1!d ; s/trgt //')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
touch ${prefix}.vcf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
trgt: \$(trgt --version |& sed '1!d ; s/trgt //')
END_VERSIONS
"""
}
80 changes: 80 additions & 0 deletions modules/nf-core/trgt/merge/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "trgt_merge"
description: Merge TRGT VCFs from multiple samples
keywords:
- trgt
- repeat expansion
- pacbio
- genomics
tools:
- "trgt":
description: "Tandem repeat genotyping and visualization from PacBio HiFi data"
homepage: "https://github.com/PacificBiosciences/trgt"
documentation: "https://github.com/PacificBiosciences/trgt/blob/main/docs/tutorial.md"
tool_dev_url: "https://github.com/PacificBiosciences/trgt"
doi: "10.1038/s41587-023-02057-3"
licence:
["Pacific Biosciences Software License (https://github.com/PacificBiosciences/trgt/blob/main/LICENSE.md)"]

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- vcfs:
type: file
description: |
List containing VCF files from TRGT
Must contain at least 2 elements unless `--force-single` is given
Samples in each VCf must be pairwise disjoint
- tbis:
type: file
description: |
List containing indexes of VCF files from TRGT
Must contain at least 2 elements unless `--force-single` is given
- - meta2:
type: map
description: |
Groovy map containing reference information
e.g. `[ id: 'genome' ]`
- fasta:
type: file
description: |
FASTA reference file (optional)
Required if VCFs were generated with TRGT pre 1.0
pattern: "*.{fasta,fa,fna}"
- - meta3:
type: map
description: |
Groovy map containing reference information
e.g. `[ id: 'genome' ]`
- fai:
type: file
description: |
Index for FASTA file (optional)
Required if VCFs were generated with TRGT pre 1.0
pattern: "*.fai"

output:
- vcf:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.{vcf,vcf.gz,bcf,bcf.gz}":
type: file
description: "Merged output file"
pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@Schmytzi"
maintainers:
- "@Schmytzi"
193 changes: 193 additions & 0 deletions modules/nf-core/trgt/merge/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
// nf-core modules test trgt/merge
nextflow_process {

name "Test Process TRGT_MERGE"
script "../main.nf"
process "TRGT_MERGE"
config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "trgt"
tag "trgt/merge"
tag "trgt/genotype"
tag "samtools/faidx"
tag "samtools/sort"
tag "samtools/index"
tag "bcftools/sort"
tag "gunzip"

setup {
run("GUNZIP"){
script "../../../gunzip/main.nf"
process {
"""
input[0] = [
[ id : 'chr22' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22_chr22_KI270734v1_random/sequence/genome.fa.gz', checkIfExists: true)
]
"""
}
}
run("SAMTOOLS_FAIDX"){
script "../../../samtools/faidx/main.nf"
process {
"""
input[0] = GUNZIP.out.gunzip
input[1] = [[],[]]
"""
}
}
run("TRGT_GENOTYPE"){
script "../../genotype/main.nf"
process {
"""
input[0] = Channel.fromList([
tuple(
[ id:'test1' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
'XX'
),
tuple(
[ id:'test2' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
'XX'
)
])
input[1] = GUNZIP.out.gunzip
input[2] = SAMTOOLS_FAIDX.out.fai
input[3] = Channel.of('chr22\t18890357\t18890451\tID=TEST;MOTIFS=AT;STRUC=(AT)n')
.collectFile(name : 'repeats.bed', newLine: false)
.map { file -> [ [ id : 'chr22' ], file ] }
.first()
"""
}
}
run("BCFTOOLS_SORT"){
script "../../../bcftools/sort/main.nf"
process {
"""
input[0] = TRGT_GENOTYPE.out.vcf
"""
}
}
}
test("homo sapiens - 2 VCFs") {
when {
process {
"""
input[0] = BCFTOOLS_SORT.out.vcf
.join(BCFTOOLS_SORT.out.tbi)
.map { meta, vcf, tbi -> [ [ id : 'test' ], vcf, tbi ] }
.groupTuple()
input[1] = [[],[]]
input[2] = [[],[]]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf.get(0).get(1)).vcf.variantsMD5,
path(process.out.vcf.get(0).get(1)).vcf.summary,
path(process.out.vcf.get(0).get(1)).vcf.sampleCount,
process.out.versions
).match() }
)
}
}
test("homo sapiens - 2 VCFs - reference") {
when {
process {
"""
input[0] = BCFTOOLS_SORT.out.vcf
.join(BCFTOOLS_SORT.out.tbi)
.map { meta, vcf, tbi -> [ [ id : 'test' ], vcf, tbi ] }
.groupTuple()
input[1] = GUNZIP.out.gunzip
input[2] = SAMTOOLS_FAIDX.out.fai
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf.get(0).get(1)).vcf.variantsMD5,
path(process.out.vcf.get(0).get(1)).vcf.summary,
path(process.out.vcf.get(0).get(1)).vcf.sampleCount,
process.out.versions
).match() }
)
}
}
test("homo sapiens - 1 VCF") {
when {
process {
"""
input[0] = BCFTOOLS_SORT.out.vcf
.join(BCFTOOLS_SORT.out.tbi)
.first()
input[1] = GUNZIP.out.gunzip
input[2] = SAMTOOLS_FAIDX.out.fai
"""
}
}
then {
assertAll(
{ assert !process.success }
)
}
}
test("homo sapiens - 1 VCF - --force-single") {
when {
params {
trgt_merge_args = "--force-single"
}
process {
"""
input[0] = BCFTOOLS_SORT.out.vcf
.join(BCFTOOLS_SORT.out.tbi)
.first()
input[1] = GUNZIP.out.gunzip
input[2] = SAMTOOLS_FAIDX.out.fai
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(
path(process.out.vcf.get(0).get(1)).vcf.variantsMD5,
path(process.out.vcf.get(0).get(1)).vcf.summary,
path(process.out.vcf.get(0).get(1)).vcf.sampleCount,
process.out.versions
).match() }
)
}
}
test("homo sapiens - 2 VCFs - stub") {
options "-stub"
when {
process {
"""
input[0] = BCFTOOLS_SORT.out.vcf
.join(BCFTOOLS_SORT.out.tbi)
.map { meta, vcf, tbi -> [ [ id : 'test' ], vcf, tbi ] }
.groupTuple()
input[1] = [[],[]]
input[2] = [[],[]]
"""
}
}
then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
Loading

0 comments on commit 6ba546c

Please sign in to comment.