Added module trgt/genotype (#7002)

* Imported genotype module from trgt branch * Fixed test issues --------- Co-authored-by: Sateesh_Peri <[email protected]>
nf-core · Nov 16, 2024 · 484afd1 · 484afd1
1 parent 21f230b
commit 484afd1
Show file tree

Hide file tree

Showing 5 changed files with 513 additions and 0 deletions.
diff --git a/modules/nf-core/trgt/genotype/environment.yml b/modules/nf-core/trgt/genotype/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - "bioconda::trgt=1.2.0"
diff --git a/modules/nf-core/trgt/genotype/main.nf b/modules/nf-core/trgt/genotype/main.nf
@@ -0,0 +1,56 @@
+process TRGT_GENOTYPE {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/trgt:1.2.0--h9ee0642_0':
+        'biocontainers/trgt:1.2.0--h9ee0642_0' }"
+
+    input:
+    tuple val(meta) , path(bam), path(bai), val(karyotype)
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(fai)
+    tuple val(meta4), path(repeats)
+
+    output:
+    tuple val(meta), path("*.vcf.gz")      , emit: vcf
+    tuple val(meta), path("*.spanning.bam"), emit: bam     , optional: true
+    path "versions.yml"                    , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def karyo = karyotype ? "--karyotype ${karyotype}" : ""
+    """
+    trgt genotype \\
+        $args \\
+        --genome ${fasta} \\
+        --reads ${bam} \\
+        --repeats ${repeats} \\
+        ${karyo} \\
+        --threads ${task.cpus} \\
+        --output-prefix ${prefix}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        trgt: \$(trgt --version |& sed '1!d ; s/trgt //')
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.spanning.bam
+    echo "" | gzip > ${prefix}.vcf.gz
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        trgt: \$(trgt --version |& sed '1!d ; s/trgt //')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/trgt/genotype/meta.yml b/modules/nf-core/trgt/genotype/meta.yml
@@ -0,0 +1,98 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "trgt_genotype"
+description: Tandem repeat genotyping from PacBio HiFi data
+keywords:
+  - repeat expansion
+  - pacbio
+  - genomics
+tools:
+  - "trgt":
+      description: "Tandem repeat genotyping and visualization from PacBio HiFi data"
+      homepage: "https://github.com/PacificBiosciences/trgt"
+      documentation: "https://github.com/PacificBiosciences/trgt/blob/main/docs/tutorial.md"
+      tool_dev_url: "https://github.com/PacificBiosciences/trgt"
+      doi: "10.1038/s41587-023-02057-3"
+      licence:
+        ["Pacific Biosciences Software License (https://github.com/PacificBiosciences/trgt/blob/main/LICENSE.md)"]
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1', single_end:false ]`
+    - bam:
+        type: file
+        description: Sorted BAM file
+        pattern: "*.bam"
+    - bai:
+        type: file
+        description: "Index of the BAM file"
+        pattern: "*.bam"
+    - karyotype:
+        type: string
+        description: "Karyotype of the sample. Either XX or XY. Defaults to XX if not given"
+        enum:
+          - XX
+          - XY
+  - - meta2:
+        type: map
+        description: |
+          Groovy map containing reference information
+          e.g. `[ id: 'genome' ]`
+    - fasta:
+        type: file
+        description: "FASTA reference file"
+        pattern: "*.{fasta,fa,fna}"
+  - - meta3:
+        type: map
+        description: |
+          Groovy map containing reference information
+          e.g. `[ id: 'genome' ]`
+    - fai:
+        type: file
+        description: "Index for FASTA file"
+        pattern: "*.fai"
+  - - meta4:
+        type: map
+        description: |
+          Groovy map containing repeat information
+          e.g. `[ id: 'repeats' ]`
+    - repeats:
+        type: file
+        description: "BED file with repeat coordinates"
+        pattern: "*.bed"
+
+output:
+  - vcf:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+      - "*.vcf.gz":
+          type: file
+          description: "VCF file with repeat genotypes"
+          pattern: "*.vcf.gz"
+  - bam:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1', single_end:false ]`
+      - "*.spanning.bam":
+          type: file
+          description: BAM file with pieces of reads aligning to repeats
+          pattern: "*.spanning.bam"
+  - versions:
+      - "versions.yml":
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+
+authors:
+  - "@Schmytzi"
+  - "@fellen31"
+maintainers:
+  - "@Schmytzi"
diff --git a/modules/nf-core/trgt/genotype/tests/main.nf.test b/modules/nf-core/trgt/genotype/tests/main.nf.test
@@ -0,0 +1,154 @@
+// nf-core modules test trgt
+nextflow_process {
+
+    name "Test Process TRGT_GENOTYPE"
+    script "../main.nf"
+    process "TRGT_GENOTYPE"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "trgt"
+    tag "trgt/genotype"
+    tag "samtools/faidx"
+    tag "gunzip"
+
+    setup {
+        run("GUNZIP"){
+            script "../../../gunzip/main.nf"
+            process {
+                """
+                input[0] = [
+                    [ id : 'chr22' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr22_chr22_KI270734v1_random/sequence/genome.fa.gz', checkIfExists: true)
+                ]
+            """
+            }
+        }
+        run("SAMTOOLS_FAIDX"){
+            script "../../../samtools/faidx/main.nf"
+            process {
+                """
+                input[0] = GUNZIP.out.gunzip
+                input[1] = [[],[]]
+                """
+            }
+        }
+    }
+
+    test("homo sapiens - [bam,bai,XX], [fa,fai], bed") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
+                    'XX'
+                ]
+                input[1] = GUNZIP.out.gunzip
+                input[2] = SAMTOOLS_FAIDX.out.fai
+                input[3] = Channel.of('chr22\t18890357\t18890451\tID=TEST;MOTIFS=AT;STRUC=(AT)n')
+                            .collectFile(name : 'repeats.bed', newLine: false)
+                            .map { file -> [ [ id : 'chr22' ], file ] }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("homo sapiens - [bam,bai,[]], [fa,fai], bed") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
+                    []
+                ]
+                input[1] = GUNZIP.out.gunzip
+                input[2] = SAMTOOLS_FAIDX.out.fai
+                input[3] = Channel.of('chr22\t18890357\t18890451\tID=TEST;MOTIFS=AT;STRUC=(AT)n')
+                            .collectFile(name : 'repeats.bed', newLine: false)
+                            .map { file -> [ [ id : 'chr22' ], file ] }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("homo sapiens - [bam,bai,XX], [fa,fai], bed - stub") {
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
+                    'XX'
+                ]
+                input[1] = GUNZIP.out.gunzip
+                input[2] = SAMTOOLS_FAIDX.out.fai
+                input[3] = Channel.of('chr22\t18890357\t18890451\tID=TEST;MOTIFS=AT;STRUC=(AT)n')
+                                .collectFile(name : 'repeats.bed', newLine: false)
+                                .map { file -> [ [ id : 'chr22' ], file ] }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("homo sapiens - [bam,bai,[]], [fa,fai], bed - stub") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/bam/test.sorted.bam.bai', checkIfExists: true),
+                    []
+                ]
+                input[1] = GUNZIP.out.gunzip
+                input[2] = SAMTOOLS_FAIDX.out.fai
+                input[3] = Channel.of('chr22\t18890357\t18890451\tID=TEST;MOTIFS=AT;STRUC=(AT)n')
+                            .collectFile(name : 'repeats.bed', newLine: false)
+                            .map { file -> [ [ id : 'chr22' ], file ] }
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+}