From 6ba546c2f86dc205aca931b8bb3c92e85d548ada Mon Sep 17 00:00:00 2001 From: Daniel Schmitz Date: Mon, 18 Nov 2024 11:43:13 +0100 Subject: [PATCH] Added module trgt/merge (#7012) * Imported module merge from branch trgt * Removed dump channels Co-authored-by: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com> * Fixed indentation Co-authored-by: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com> * Updated tests to use params * Delete modules/nf-core/trgt/merge/tests/tags.yml don't need this yml anymore --------- Co-authored-by: Sateesh_Peri <33637490+sateeshperi@users.noreply.github.com> --- modules/nf-core/trgt/merge/environment.yml | 6 + modules/nf-core/trgt/merge/main.nf | 58 ++++++ modules/nf-core/trgt/merge/meta.yml | 80 ++++++++ modules/nf-core/trgt/merge/tests/main.nf.test | 193 ++++++++++++++++++ .../trgt/merge/tests/main.nf.test.snap | 80 ++++++++ .../nf-core/trgt/merge/tests/nextflow.config | 15 ++ 6 files changed, 432 insertions(+) create mode 100644 modules/nf-core/trgt/merge/environment.yml create mode 100644 modules/nf-core/trgt/merge/main.nf create mode 100644 modules/nf-core/trgt/merge/meta.yml create mode 100644 modules/nf-core/trgt/merge/tests/main.nf.test create mode 100644 modules/nf-core/trgt/merge/tests/main.nf.test.snap create mode 100644 modules/nf-core/trgt/merge/tests/nextflow.config diff --git a/modules/nf-core/trgt/merge/environment.yml b/modules/nf-core/trgt/merge/environment.yml new file mode 100644 index 00000000000..614dc872b7b --- /dev/null +++ b/modules/nf-core/trgt/merge/environment.yml @@ -0,0 +1,6 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::trgt=1.2.0" diff --git a/modules/nf-core/trgt/merge/main.nf b/modules/nf-core/trgt/merge/main.nf new file mode 100644 index 00000000000..88eb69457a3 --- /dev/null +++ b/modules/nf-core/trgt/merge/main.nf @@ -0,0 +1,58 @@ +process TRGT_MERGE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/trgt:1.2.0--h9ee0642_0': + 'biocontainers/trgt:1.2.0--h9ee0642_0' }" + + input: + tuple val(meta) , path(vcfs), path(tbis) + tuple val(meta2), path(fasta) // optional + tuple val(meta3), path(fai) // optional + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def output = args.contains("--output ") || args.contains("--output=") || args.contains("-o ") ? "" : "--output ${prefix}.${extension}" + def reference = fasta ? "--genome ${fasta}" : "" + + """ + trgt merge \\ + $args \\ + $reference \\ + $output \\ + --vcf ${vcfs} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trgt: \$(trgt --version |& sed '1!d ; s/trgt //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trgt: \$(trgt --version |& sed '1!d ; s/trgt //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/trgt/merge/meta.yml b/modules/nf-core/trgt/merge/meta.yml new file mode 100644 index 00000000000..e8b79cb2da7 --- /dev/null +++ b/modules/nf-core/trgt/merge/meta.yml @@ -0,0 +1,80 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "trgt_merge" +description: Merge TRGT VCFs from multiple samples +keywords: + - trgt + - repeat expansion + - pacbio + - genomics +tools: + - "trgt": + description: "Tandem repeat genotyping and visualization from PacBio HiFi data" + homepage: "https://github.com/PacificBiosciences/trgt" + documentation: "https://github.com/PacificBiosciences/trgt/blob/main/docs/tutorial.md" + tool_dev_url: "https://github.com/PacificBiosciences/trgt" + doi: "10.1038/s41587-023-02057-3" + licence: + ["Pacific Biosciences Software License (https://github.com/PacificBiosciences/trgt/blob/main/LICENSE.md)"] + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - vcfs: + type: file + description: | + List containing VCF files from TRGT + Must contain at least 2 elements unless `--force-single` is given + Samples in each VCf must be pairwise disjoint + - tbis: + type: file + description: | + List containing indexes of VCF files from TRGT + Must contain at least 2 elements unless `--force-single` is given + - - meta2: + type: map + description: | + Groovy map containing reference information + e.g. `[ id: 'genome' ]` + - fasta: + type: file + description: | + FASTA reference file (optional) + Required if VCFs were generated with TRGT pre 1.0 + pattern: "*.{fasta,fa,fna}" + - - meta3: + type: map + description: | + Groovy map containing reference information + e.g. `[ id: 'genome' ]` + - fai: + type: file + description: | + Index for FASTA file (optional) + Required if VCFs were generated with TRGT pre 1.0 + pattern: "*.fai" + +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: "Merged output file" + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@Schmytzi" +maintainers: + - "@Schmytzi" diff --git a/modules/nf-core/trgt/merge/tests/main.nf.test b/modules/nf-core/trgt/merge/tests/main.nf.test new file mode 100644 index 00000000000..dfccd4f9ec2 --- /dev/null +++ b/modules/nf-core/trgt/merge/tests/main.nf.test @@ -0,0 +1,193 @@ +// nf-core modules test trgt/merge +nextflow_process { + + name "Test Process TRGT_MERGE" + script "../main.nf" + process "TRGT_MERGE" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "trgt" + tag "trgt/merge" + tag "trgt/genotype" + tag "samtools/faidx" + tag "samtools/sort" + tag "samtools/index" + tag "bcftools/sort" + tag "gunzip" + + setup { + run("GUNZIP"){ + script "../../../gunzip/main.nf" + process { + """ + input[0] = [ + [ id : 'chr22' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22_chr22_KI270734v1_random/sequence/genome.fa.gz', checkIfExists: true) + ] + """ + } + } + run("SAMTOOLS_FAIDX"){ + script "../../../samtools/faidx/main.nf" + process { + """ + input[0] = GUNZIP.out.gunzip + input[1] = [[],[]] + """ + } + } + run("TRGT_GENOTYPE"){ + script "../../genotype/main.nf" + process { + """ + input[0] = Channel.fromList([ + tuple( + [ id:'test1' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true), + 'XX' + ), + tuple( + [ id:'test2' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true), + 'XX' + ) + ]) + input[1] = GUNZIP.out.gunzip + input[2] = SAMTOOLS_FAIDX.out.fai + input[3] = Channel.of('chr22\t18890357\t18890451\tID=TEST;MOTIFS=AT;STRUC=(AT)n') + .collectFile(name : 'repeats.bed', newLine: false) + .map { file -> [ [ id : 'chr22' ], file ] } + .first() + """ + } + } + run("BCFTOOLS_SORT"){ + script "../../../bcftools/sort/main.nf" + process { + """ + input[0] = TRGT_GENOTYPE.out.vcf + """ + } + } + } + test("homo sapiens - 2 VCFs") { + when { + process { + """ + input[0] = BCFTOOLS_SORT.out.vcf + .join(BCFTOOLS_SORT.out.tbi) + .map { meta, vcf, tbi -> [ [ id : 'test' ], vcf, tbi ] } + .groupTuple() + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.sampleCount, + process.out.versions + ).match() } + ) + } + } + test("homo sapiens - 2 VCFs - reference") { + when { + process { + """ + input[0] = BCFTOOLS_SORT.out.vcf + .join(BCFTOOLS_SORT.out.tbi) + .map { meta, vcf, tbi -> [ [ id : 'test' ], vcf, tbi ] } + .groupTuple() + input[1] = GUNZIP.out.gunzip + input[2] = SAMTOOLS_FAIDX.out.fai + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.sampleCount, + process.out.versions + ).match() } + ) + } + } + test("homo sapiens - 1 VCF") { + when { + process { + """ + input[0] = BCFTOOLS_SORT.out.vcf + .join(BCFTOOLS_SORT.out.tbi) + .first() + input[1] = GUNZIP.out.gunzip + input[2] = SAMTOOLS_FAIDX.out.fai + """ + } + } + then { + assertAll( + { assert !process.success } + ) + } + } + test("homo sapiens - 1 VCF - --force-single") { + when { + params { + trgt_merge_args = "--force-single" + } + process { + """ + input[0] = BCFTOOLS_SORT.out.vcf + .join(BCFTOOLS_SORT.out.tbi) + .first() + input[1] = GUNZIP.out.gunzip + input[2] = SAMTOOLS_FAIDX.out.fai + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.sampleCount, + process.out.versions + ).match() } + ) + } + } + test("homo sapiens - 2 VCFs - stub") { + options "-stub" + when { + process { + """ + input[0] = BCFTOOLS_SORT.out.vcf + .join(BCFTOOLS_SORT.out.tbi) + .map { meta, vcf, tbi -> [ [ id : 'test' ], vcf, tbi ] } + .groupTuple() + input[1] = [[],[]] + input[2] = [[],[]] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/trgt/merge/tests/main.nf.test.snap b/modules/nf-core/trgt/merge/tests/main.nf.test.snap new file mode 100644 index 00000000000..73f306b0ebf --- /dev/null +++ b/modules/nf-core/trgt/merge/tests/main.nf.test.snap @@ -0,0 +1,80 @@ +{ + "homo sapiens - 2 VCFs - reference": { + "content": [ + "75acbf6205a44ca44d6fc644dda57d82", + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1, phased=false, phasedAutodetect=false]", + 2, + [ + "versions.yml:md5,ac3a6dca80725cee1bf724975e54c067" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-15T14:52:26.329527707" + }, + "homo sapiens - 2 VCFs": { + "content": [ + "75acbf6205a44ca44d6fc644dda57d82", + "VcfFile [chromosomes=[chr22], sampleCount=2, variantCount=1, phased=false, phasedAutodetect=false]", + 2, + [ + "versions.yml:md5,ac3a6dca80725cee1bf724975e54c067" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-15T14:39:54.475134386" + }, + "homo sapiens - 2 VCFs - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,ac3a6dca80725cee1bf724975e54c067" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ac3a6dca80725cee1bf724975e54c067" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-15T15:06:23.198648143" + }, + "homo sapiens - 1 VCF - --force-single": { + "content": [ + "6e09773892c37f17e01c9771e8ef2171", + "VcfFile [chromosomes=[chr22], sampleCount=1, variantCount=1, phased=false, phasedAutodetect=false]", + 1, + [ + "versions.yml:md5,ac3a6dca80725cee1bf724975e54c067" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-11-15T15:01:59.765895584" + } +} \ No newline at end of file diff --git a/modules/nf-core/trgt/merge/tests/nextflow.config b/modules/nf-core/trgt/merge/tests/nextflow.config new file mode 100644 index 00000000000..b5caec9f71a --- /dev/null +++ b/modules/nf-core/trgt/merge/tests/nextflow.config @@ -0,0 +1,15 @@ +params { + trgt_merge_args = "" +} +process { + withName: "BCFTOOLS_SORT" { + ext.args = "--write-index=tbi --output-type z" + ext.prefix = { "sorted_${meta.id}" } + } + withName: "TRGT_GENOTYPE" { + ext.args = { "--sample-name ${meta.id}"} + } + withName: "TRGT_MERGE" { + ext.args = params.trgt_merge_args + } +}