diff --git a/tools/dragen-germline/4.3.6/dragen-germline__4.3.6.cwl b/tools/dragen-germline/4.3.6/dragen-germline__4.3.6.cwl index 4d930037..0a31c748 100644 --- a/tools/dragen-germline/4.3.6/dragen-germline__4.3.6.cwl +++ b/tools/dragen-germline/4.3.6/dragen-germline__4.3.6.cwl @@ -33,6 +33,7 @@ hints: DockerRequirement: dockerPull: 699120554104.dkr.ecr.us-east-1.amazonaws.com/public/dragen:4.3.6 + requirements: ResourceRequirement: tmpdirMin: | @@ -81,11 +82,56 @@ requirements: # Run dragen command and import options from cli "$(get_dragen_bin_path())" "\${@}" + + # DRAGEN Multi-region Joint Detection (MRJD) is a de novo germline small variant caller for paralogous regions. + # MRJD is compatible with the hg38, hg19 and GRCh37 reference genomes. + # https://help.dragen.illumina.com/product-guides/dragen-v4.3/dragen-dna-pipeline/small-variant-calling/multi-region-joint-detection + # + # Multi Region Joint Detection (MRJD) Caller should be runs as standalone pipeline on DRAGEN™ server (not in integrated with Germline Small VC) + # https://support.illumina.com/content/dam/illumina-support/documents/downloads/software/dragen/release-notes/200056923_00_DRAGEN_4_3_6_Customer-Release-Notes.pdf + if [[ \\ + "$(get_bool_value_as_str(inputs.enable_mrjd))" == "true" \\ + ]]; then + echo "Optionally run MRJD if relevant parameter is enabled" 1>&2 + "$(get_dragen_bin_path())" \\ + --ref-dir="$(get_ref_path(inputs.reference_tar))" \\ + --bam-input="$(inputs.output_directory)/$(inputs.output_prefix).bam" \\ + --enable-map-align="false" \\ + --enable-mrjd="true" \\ + --mrjd-enable-high-sensitivity-mode="$(get_bool_value_as_str(inputs.mrjd_enable_high_sensitivity_mode))" \\ + --output-directory="$(inputs.output_directory)/mrjd/" \\ + --output-file-prefix="$(inputs.output_file_prefix)" + + # Merge MRJD VCF with the original VCF + mrjd_utility_url="https://webdata.illumina.com/downloads/software/dragen/resource-files/mrjd-utility-1.0.tar.gz" + wget \\ + --output-document /dev/stdout \\ + --quiet \\ + "\${mrjd_utility_url}" | \\ + tar \\ + --extract \\ + --gzip \\ + --file - \\ + mrjd_utility/mrjd.bed \\ + mrjd_utility/merge_vc_mrjd_vcf.py + + # Run MRJD utility + # FIXME - might want to put this in the mrjd output directory + # FIMXE - just want to see what data is generated from what program + mkdir "$(inputs.output_directory)/mrjd-merged" + python3 "./mrjd_utility/merge_vc_mrjd_vcf.py" \\ + --bed "mrjd_utility/mrjd.bed" \\ + --vc "$(inputs.output_directory)/$(inputs.output_prefix).vcf" \\ + --mrjd "$(inputs.output_directory)/mrjd/$(inputs.output_prefix).vcf" \\ + --outdir "$(inputs.output_directory)/mrjd-merged/" + fi - | ${ return generate_germline_mount_points(inputs); } + + baseCommand: [ "bash" ] arguments: @@ -747,7 +793,7 @@ inputs: enable_hla: label: enable hla doc: | - Enable HLA typing by setting --enable-hla flag to true + Enable HLA typing for class I genes by setting --enable-hla flag to true type: boolean? inputBinding: prefix: "--enable-hla=" @@ -756,7 +802,7 @@ inputs: hla_enable_class_2: label: hla enable class 2 doc: | - Enable class II HLA typing by setting --hla-enable-class-2 flag to true + Enable HLA typing for class II genes by setting --hla-enable-class-2 flag to true type: boolean? inputBinding: prefix: "--hla-enable-class-2=" @@ -825,6 +871,22 @@ inputs: inputBinding: prefix: "--hla-min-reads=" separate: False + + # Multi-Region Joint Detection + enable_mrjd: + label: enable multi-region joint detection + doc: | + In DRAGEN v4.3, MRJD covers regions that include six clinically relevant genes: NEB, TTN, SMN1/2, PMS2, STRC, and IKBKG. + With this option enabled, the following two types of variants are reported: 1. Uniquely placed variants; 2. Region-ambiguous variants. + type: boolean? + mrjd_enable_high_sensitivity_mode: + label: enable multi-region joint detection high sensitivity mode + doc: | + In addition to 1. Uniquely placed variants and 2. Region-ambiguous variants, with this option enabled, + the following two types of variants are reported: 3. Positions where the reference alleles in all paralogous regions are not the same; + 4. Variants that have been placed uniquely in one of the paralogous regions and no variant in the corresponding position in the other region + type: boolean? + # Miscellaneous options lic_instance_id_location: label: license instance id location diff --git a/tools/dragen-somatic/4.3.6/dragen-somatic__4.3.6.cwl b/tools/dragen-somatic/4.3.6/dragen-somatic__4.3.6.cwl index 73e4aaeb..20c20d73 100644 --- a/tools/dragen-somatic/4.3.6/dragen-somatic__4.3.6.cwl +++ b/tools/dragen-somatic/4.3.6/dragen-somatic__4.3.6.cwl @@ -1338,81 +1338,6 @@ inputs: prefix: "--tmb-db-threshold=" separate: False - # HLA calling - # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/HLACaller.htm - enable_hla: - label: enable hla - doc: | - Enable HLA typing by setting --enable-hla flag to true - type: boolean? - inputBinding: - prefix: "--enable-hla=" - separate: False - valueFrom: "$(self.toString())" - hla_bed_file: - label: hla bed file - doc: | - Use the HLA region BED input file to specify the region to extract HLA reads from. - DRAGEN HLA Caller parses the input file for regions within the BED file, and then - extracts reads accordingly to align with the HLA allele reference. - type: File? - inputBinding: - prefix: "--hla-bed-file=" - separate: False - hla_reference_file: - label: hla reference file - doc: | - Use the HLA allele reference file to specify the reference alleles to align against. - The input HLA reference file must be in FASTA format and contain the protein sequence separated into exons. - If --hla-reference-file is not specified, DRAGEN uses hla_classI_ref_freq.fasta from /opt/edico/config/. - The reference HLA sequences are obtained from the IMGT/HLA database. - type: File? - inputBinding: - prefix: "--hla-reference-file=" - separate: False - hla_allele_frequency_file: - label: hla allele frequency file - doc: | - Use the population-level HLA allele frequency file to break ties if one or more HLA allele produces the same or similar results. - The input HLA allele frequency file must be in CSV format and contain the HLA alleles and the occurrence frequency in population. - If --hla-allele-frequency-file is not specified, DRAGEN automatically uses hla_classI_allele_frequency.csv from /opt/edico/config/. - Population-level allele frequencies can be obtained from the Allele Frequency Net database. - type: File? - inputBinding: - prefix: "--hla-allele-frequency-file=" - separate: False - hla_tiebreaker_threshold: - label: hla tiebreaker threshold - doc: | - If more than one allele has a similar number of reads aligned and there is not a clear indicator for the best allele, - the alleles are considered as ties. The HLA Caller places the tied alleles into a candidate set for tie breaking based - on the population allele frequency. If an allele has more than the specified fraction of reads aligned (normalized to - the top hit), then the allele is included into the candidate set for tie breaking. The default value is 0.97. - type: float? - inputBinding: - prefix: "--hla-tiebreaker-threshold=" - separate: False - hla_zygosity_threshold: - label: hla zygosity threshold - doc: | - If the minor allele at a given locus has fewer reads mapped than a fraction of the read count of the major allele, - then the HLA Caller infers homozygosity for the given HLA-I gene. You can use this option to specify the fraction value. - The default value is 0.15. - type: float? - inputBinding: - prefix: "--hla zygosity threshold=" - separate: False - hla_min_reads: - label: hla min reads - doc: | - Set the minimum number of reads to align to HLA alleles to ensure sufficient coverage and perform HLA typing. - The default value is 1000 and suggested for WES samples. If using samples with less coverage, you can use a - lower threshold value. - type: int? - inputBinding: - prefix: "--hla-min-reads=" - separate: False - # RNA # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/TPipelineIntro_fDG.htm enable_rna: diff --git a/workflows/dragen-somatic-with-germline-pipeline/4.3.6/dragen-somatic-with-germline-pipeline__4.3.6.cwl b/workflows/dragen-somatic-with-germline-pipeline/4.3.6/dragen-somatic-with-germline-pipeline__4.3.6.cwl index 496f85bf..8b76ee22 100644 --- a/workflows/dragen-somatic-with-germline-pipeline/4.3.6/dragen-somatic-with-germline-pipeline__4.3.6.cwl +++ b/workflows/dragen-somatic-with-germline-pipeline/4.3.6/dragen-somatic-with-germline-pipeline__4.3.6.cwl @@ -242,14 +242,14 @@ inputs: enable_cnv_germline: label: enable cnv germline doc: | - Enable CNV processing in the DRAGEN Host Software (somatic only) + Enable CNV processing in the DRAGEN Host Software (germline only) type: boolean? enable_cnv_somatic: label: enable cnv somatic doc: | - Enable CNV processing in the DRAGEN Host Software (germline only) + Enable CNV processing in the DRAGEN Host Software (somatic only) type: boolean? - + # Phased / MNV Calling options vc_combine_phased_variants_distance_somatic: label: vc combine phased variants distance somatic @@ -460,6 +460,7 @@ inputs: doc: | The -vc-enable-vcf-output option enables VCF file output during a gVCF run. The default value is false. type: boolean? + # Downsampling options vc_max_reads_per_active_region: label: vc max reads per active region @@ -473,6 +474,7 @@ inputs: specifies the maximum number of reads covering a given raw region. Default is 30000 for the somatic workflow type: int? + # Ploidy support sample_sex: label: sample sex @@ -484,6 +486,7 @@ inputs: symbols: - male - female + # ROH options vc_enable_roh: label: vc enable roh @@ -497,12 +500,14 @@ inputs: DRAGEN distributes blacklist files for all popular human genomes and automatically selects a blacklist to match the genome in use, unless this option is used explicitly select a file. type: File? + # BAF options vc_enable_baf: label: vc enable baf doc: | Enable or disable B-allele frequency output. Enabled by default. type: boolean? + # Somatic calling options vc_min_tumor_read_qual: label: vc min tumor read qual @@ -568,6 +573,7 @@ inputs: The default is C/T,G/T, which correspond to OxoG and FFPE artifacts. Valid values include C/T, or G/T, or C/T,G/T,C/A. An artifact (or an artifact and its reverse compliment) cannot be listed twice. For example, C/T,G/A is not valid, because C->G and T->A are reverse compliments. + # Post somatic calling filtering options # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/PostSomaticFilters.htm vc_hard_filter: @@ -810,12 +816,18 @@ inputs: from the TMB calculation. The default value is 10. type: int? - # HLA calling + # HLA calling only available to germline mode # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/HLACaller.htm + # https://help.dragen.illumina.com/product-guides/dragen-v4.3/dragen-reference-support enable_hla: label: enable hla doc: | - Enable HLA typing by setting --enable-hla flag to true + Enable HLA typing for class I genes by setting --enable-hla flag to true + type: boolean? + hla_enable_class_2: + label: enable hla class 2 + doc: | + Enable HLA typing for class II genes by setting --hla-enable-class-2 flag to true type: boolean? hla_bed_file: label: hla bed file @@ -863,6 +875,23 @@ inputs: lower threshold value. type: int? + # Multi-Region Joint Detection only available to germline mode + # https://help.dragen.illumina.com/product-guides/dragen-v4.3/dragen-dna-pipeline/small-variant-calling/multi-region-joint-detection + # https://help.dragen.illumina.com/product-guides/dragen-v4.3/dragen-reference-support + enable_mrjd: + label: enable multi-region joint detection (mrjd) + doc: | + In DRAGEN v4.3, MRJD covers regions that include six clinically relevant genes: NEB, TTN, SMN1/2, PMS2, STRC, and IKBKG. + With this option enabled, the following two types of variants are reported: 1. Uniquely placed variants; 2. Region-ambiguous variants. + type: boolean? + mrjd_enable_high_sensitivity_mode: + label: enable multi-region joint detection high sensitivity mode + doc: | + In addition to 1. Uniquely placed variants and 2. Region-ambiguous variants, with this option enabled, + the following two types of variants are reported: 3. Positions where the reference alleles in all paralogous regions are not the same; + 4. Variants that have been placed uniquely in one of the paralogous regions and no variant in the corresponding position in the other region + type: boolean? + # RNA # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/TPipelineIntro_fDG.htm enable_rna: @@ -1065,10 +1094,10 @@ steps: source: sv_tin_contam_tolerance dbsnp_annotation: source: dbsnp_annotation - #cnv options + # cnv options cnv_enable_self_normalization: source: cnv_enable_self_normalization - #qc options + # qc options qc_coverage_region_1: source: qc_coverage_region_1 qc_coverage_region_2: @@ -1077,9 +1106,11 @@ steps: source: qc_coverage_region_3 qc_coverage_ignore_overlaps: source: qc_coverage_ignore_overlaps - #hla + # hla options enable_hla: source: enable_hla + hla_enable_class_2: + source: hla_enable_class_2 hla_bed_file: source: hla_bed_file hla_reference_file: @@ -1094,6 +1125,11 @@ steps: source: hla_min_reads lic_instance_id_location: source: lic_instance_id_location + # multi-region join detection options + enable_mrjd: + source: enable_mrjd + mrjd_enable_high_sensitivity_mode: + source: mrjd_enable_high_sensitivity_mode out: - id: dragen_germline_output_directory - id: dragen_bam_out @@ -1362,22 +1398,6 @@ steps: source: tmb_vaf_threshold tmb_db_threshold: source: tmb_db_threshold - # HLA calling - # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/HLACaller.htm - enable_hla: - source: enable_hla - hla_bed_file: - source: hla_bed_file - hla_reference_file: - source: hla_reference_file - hla_allele_frequency_file: - source: hla_allele_frequency_file - hla_tiebreaker_threshold: - source: hla_tiebreaker_threshold - hla_zygosity_threshold: - source: hla_zygosity_threshold - hla_min_reads: - source: hla_min_reads # RNA # https://support-docs.illumina.com/SW/DRAGEN_v40/Content/SW/DRAGEN/TPipelineIntro_fDG.htm enable_rna: