Added module trgt/merge (#7012)

* Imported module merge from branch trgt * Removed dump channels Co-authored-by: Sateesh_Peri <[email protected]> * Fixed indentation Co-authored-by: Sateesh_Peri <[email protected]> * Updated tests to use params * Delete modules/nf-core/trgt/merge/tests/tags.yml don't need this yml anymore --------- Co-authored-by: Sateesh_Peri <[email protected]>
nf-core · Nov 18, 2024 · 6ba546c · 6ba546c
1 parent 7ed0aec
commit 6ba546c
Show file tree

Hide file tree

Showing 6 changed files with 432 additions and 0 deletions.
diff --git a/modules/nf-core/trgt/merge/environment.yml b/modules/nf-core/trgt/merge/environment.yml
@@ -0,0 +1,6 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::trgt=1.2.0"
diff --git a/modules/nf-core/trgt/merge/main.nf b/modules/nf-core/trgt/merge/main.nf
@@ -0,0 +1,58 @@
+process TRGT_MERGE {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/trgt:1.2.0--h9ee0642_0':
+        'biocontainers/trgt:1.2.0--h9ee0642_0' }"
+
+    input:
+    tuple val(meta) , path(vcfs), path(tbis)
+    tuple val(meta2), path(fasta) // optional
+    tuple val(meta3), path(fai)   // optional
+
+    output:
+    tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf
+    path "versions.yml"                               , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
+                    args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
+                    args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
+                    args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
+                    "vcf"
+    def output = args.contains("--output ") || args.contains("--output=") || args.contains("-o ") ? "" : "--output ${prefix}.${extension}"
+    def reference = fasta ? "--genome ${fasta}" : ""
+
+    """
+    trgt merge \\
+        $args \\
+        $reference \\
+        $output \\
+        --vcf ${vcfs}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        trgt: \$(trgt --version |& sed '1!d ; s/trgt //')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    touch ${prefix}.vcf
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        trgt: \$(trgt --version |& sed '1!d ; s/trgt //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/trgt/merge/meta.yml b/modules/nf-core/trgt/merge/meta.yml
@@ -0,0 +1,80 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "trgt_merge"
+description: Merge TRGT VCFs from multiple samples
+keywords:
+  - trgt
+  - repeat expansion
+  - pacbio
+  - genomics
+tools:
+  - "trgt":
+      description: "Tandem repeat genotyping and visualization from PacBio HiFi data"
+      homepage: "https://github.com/PacificBiosciences/trgt"
+      documentation: "https://github.com/PacificBiosciences/trgt/blob/main/docs/tutorial.md"
+      tool_dev_url: "https://github.com/PacificBiosciences/trgt"
+      doi: "10.1038/s41587-023-02057-3"
+      licence:
+        ["Pacific Biosciences Software License (https://github.com/PacificBiosciences/trgt/blob/main/LICENSE.md)"]
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1', single_end:false ]`
+    - vcfs:
+        type: file
+        description: |
+          List containing VCF files from TRGT
+          Must contain at least 2 elements unless `--force-single` is given
+          Samples in each VCf must be pairwise disjoint
+    - tbis:
+        type: file
+        description: |
+          List containing indexes of VCF files from TRGT
+          Must contain at least 2 elements unless `--force-single` is given
+  - - meta2:
+        type: map
+        description: |
+          Groovy map containing reference information
+          e.g. `[ id: 'genome' ]`
+    - fasta:
+        type: file
+        description: |
+          FASTA reference file (optional)
+          Required if VCFs were generated with TRGT pre 1.0
+        pattern: "*.{fasta,fa,fna}"
+  - - meta3:
+        type: map
+        description: |
+          Groovy map containing reference information
+          e.g. `[ id: 'genome' ]`
+    - fai:
+        type: file
+        description: |
+          Index for FASTA file (optional)
+          Required if VCFs were generated with TRGT pre 1.0
+        pattern: "*.fai"
+
+output:
+  - vcf:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+      - "*.{vcf,vcf.gz,bcf,bcf.gz}":
+          type: file
+          description: "Merged output file"
+          pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+
+authors:
+  - "@Schmytzi"
+maintainers:
+  - "@Schmytzi"
diff --git a/modules/nf-core/trgt/merge/tests/main.nf.test b/modules/nf-core/trgt/merge/tests/main.nf.test
@@ -0,0 +1,193 @@
+// nf-core modules test trgt/merge
+nextflow_process {
+
+    name "Test Process TRGT_MERGE"
+    script "../main.nf"
+    process "TRGT_MERGE"
+    config "./nextflow.config"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "trgt"
+    tag "trgt/merge"
+    tag "trgt/genotype"
+    tag "samtools/faidx"
+    tag "samtools/sort"
+    tag "samtools/index"
+    tag "bcftools/sort"
+    tag "gunzip"
+
+    setup {
+        run("GUNZIP"){
+            script "../../../gunzip/main.nf"
+            process {
+                """
+                input[0] = [
+                    [ id : 'chr22' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22_chr22_KI270734v1_random/sequence/genome.fa.gz', checkIfExists: true)
+                ]
+                """
+            }
+        }
+        run("SAMTOOLS_FAIDX"){
+            script "../../../samtools/faidx/main.nf"
+            process {
+                """
+                input[0] = GUNZIP.out.gunzip
+                input[1] = [[],[]]
+                """
+            }
+        }
+        run("TRGT_GENOTYPE"){
+            script "../../genotype/main.nf"
+            process {
+                """
+                input[0] = Channel.fromList([
+                    tuple(
+                        [ id:'test1' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
+                        'XX'
+                    ),
+                    tuple(
+                        [ id:'test2' ],
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
+                        'XX'
+                    )
+                ])
+                input[1] = GUNZIP.out.gunzip
+                input[2] = SAMTOOLS_FAIDX.out.fai
+                input[3] = Channel.of('chr22\t18890357\t18890451\tID=TEST;MOTIFS=AT;STRUC=(AT)n')
+                            .collectFile(name : 'repeats.bed', newLine: false)
+                            .map { file -> [ [ id : 'chr22' ], file ] }
+                            .first()
+                """
+            }
+        }
+        run("BCFTOOLS_SORT"){
+            script "../../../bcftools/sort/main.nf"
+            process {
+                """
+                input[0] = TRGT_GENOTYPE.out.vcf
+                """
+            }
+        }
+    }
+    test("homo sapiens - 2 VCFs") {
+        when {
+            process {
+                """
+                input[0] = BCFTOOLS_SORT.out.vcf
+                            .join(BCFTOOLS_SORT.out.tbi)
+                            .map { meta, vcf, tbi -> [ [ id : 'test' ], vcf, tbi ] }
+                            .groupTuple()
+                input[1] = [[],[]]
+                input[2] = [[],[]]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    path(process.out.vcf.get(0).get(1)).vcf.variantsMD5,
+                    path(process.out.vcf.get(0).get(1)).vcf.summary,
+                    path(process.out.vcf.get(0).get(1)).vcf.sampleCount,
+                    process.out.versions
+                ).match() }
+            )
+        }
+    }
+    test("homo sapiens - 2 VCFs - reference") {
+        when {
+            process {
+                """
+                input[0] = BCFTOOLS_SORT.out.vcf
+                            .join(BCFTOOLS_SORT.out.tbi)
+                            .map { meta, vcf, tbi -> [ [ id : 'test' ], vcf, tbi ] }
+                            .groupTuple()
+                input[1] = GUNZIP.out.gunzip
+                input[2] = SAMTOOLS_FAIDX.out.fai
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    path(process.out.vcf.get(0).get(1)).vcf.variantsMD5,
+                    path(process.out.vcf.get(0).get(1)).vcf.summary,
+                    path(process.out.vcf.get(0).get(1)).vcf.sampleCount,
+                    process.out.versions
+                ).match() }
+            )
+        }
+    }
+    test("homo sapiens - 1 VCF") {
+        when {
+            process {
+                """
+                input[0] = BCFTOOLS_SORT.out.vcf
+                            .join(BCFTOOLS_SORT.out.tbi)
+                            .first()
+                input[1] = GUNZIP.out.gunzip
+                input[2] = SAMTOOLS_FAIDX.out.fai
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert !process.success }
+            )
+        }
+    }
+    test("homo sapiens - 1 VCF - --force-single") {
+        when {
+            params {
+                trgt_merge_args = "--force-single"
+            }
+            process {
+                """
+                input[0] = BCFTOOLS_SORT.out.vcf
+                            .join(BCFTOOLS_SORT.out.tbi)
+                            .first()
+                input[1] = GUNZIP.out.gunzip
+                input[2] = SAMTOOLS_FAIDX.out.fai
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    path(process.out.vcf.get(0).get(1)).vcf.variantsMD5,
+                    path(process.out.vcf.get(0).get(1)).vcf.summary,
+                    path(process.out.vcf.get(0).get(1)).vcf.sampleCount,
+                    process.out.versions
+                ).match() }
+            )
+        }
+    }
+    test("homo sapiens - 2 VCFs - stub") {
+        options "-stub"
+        when {
+            process {
+                """
+                input[0] = BCFTOOLS_SORT.out.vcf
+                            .join(BCFTOOLS_SORT.out.tbi)
+                            .map { meta, vcf, tbi -> [ [ id : 'test' ], vcf, tbi ] }
+                            .groupTuple()
+                input[1] = [[],[]]
+                input[2] = [[],[]]
+                """
+            }
+        }
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}