From 14eccf405392a22712b6dd53599740157e06e821 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 29 Oct 2024 18:10:32 +0100 Subject: [PATCH 1/2] Add haploid variant calling workflow --- .../.dockstore.yml | 11 + .../CHANGELOG.md | 6 + .../haploid-variant-calling-wgs-pe/README.md | 15 + ...ariant-calling-in-haploid-system-tests.yml | 49 ++ ...GS-PE-variant-calling-in-haploid-system.ga | 784 ++++++++++++++++++ .../test-data/Annotated Variants.tabular | 5 + 6 files changed, 870 insertions(+) create mode 100644 workflows/variant-calling/haploid-variant-calling-wgs-pe/.dockstore.yml create mode 100644 workflows/variant-calling/haploid-variant-calling-wgs-pe/CHANGELOG.md create mode 100644 workflows/variant-calling/haploid-variant-calling-wgs-pe/README.md create mode 100644 workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system-tests.yml create mode 100644 workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system.ga create mode 100644 workflows/variant-calling/haploid-variant-calling-wgs-pe/test-data/Annotated Variants.tabular diff --git a/workflows/variant-calling/haploid-variant-calling-wgs-pe/.dockstore.yml b/workflows/variant-calling/haploid-variant-calling-wgs-pe/.dockstore.yml new file mode 100644 index 000000000..747c69ed7 --- /dev/null +++ b/workflows/variant-calling/haploid-variant-calling-wgs-pe/.dockstore.yml @@ -0,0 +1,11 @@ +version: 1.2 +workflows: +- name: main + subclass: Galaxy + publish: true + primaryDescriptorPath: /WGS-PE-variant-calling-in-haploid-system.ga + testParameterFiles: + - /WGS-PE-variant-calling-in-haploid-system-tests.yml + authors: + - name: Anton Nekrutenko + orcid: 0000-0002-5987-8032 diff --git a/workflows/variant-calling/haploid-variant-calling-wgs-pe/CHANGELOG.md b/workflows/variant-calling/haploid-variant-calling-wgs-pe/CHANGELOG.md new file mode 100644 index 000000000..fda7e77e6 --- /dev/null +++ b/workflows/variant-calling/haploid-variant-calling-wgs-pe/CHANGELOG.md @@ -0,0 +1,6 @@ +# Changelog + + +## [0.1] + +- Initial version of Paired end variant calling in haploid system workflow diff --git a/workflows/variant-calling/haploid-variant-calling-wgs-pe/README.md b/workflows/variant-calling/haploid-variant-calling-wgs-pe/README.md new file mode 100644 index 000000000..0aabda928 --- /dev/null +++ b/workflows/variant-calling/haploid-variant-calling-wgs-pe/README.md @@ -0,0 +1,15 @@ +# Haploid variant calling for whole genome sequencing paired end data + +## Inputs dataset + +- The workflow needs a list of paired end fastq files +- A GTF containtaing the Gene annotation for the selected haploid genome +- A fasta file for the haploid genome to call variants against + +## Processing + +- The workflow will remove adapters using fastp +- The filtered reads are aligned with bwa-mem. +- Only properly aligned mate pairs are retained, PCR duplicates are removed. +- Alignments are re-aligned using lofreq viterbi and variants are called with lofreq call. +- Variants are annotated with snpeff eff diff --git a/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system-tests.yml b/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system-tests.yml new file mode 100644 index 000000000..3c44a9780 --- /dev/null +++ b/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system-tests.yml @@ -0,0 +1,49 @@ +- doc: Test outline for WGS-PE-variant-calling-in-haploid-system + job: + Annotation GTF: + class: File + location: https://zenodo.org/records/14009320/files/Annotation%20GTF.gtf?download=1 + filetype: gtf + Genome fasta: + class: File + location: https://zenodo.org/records/14009320/files/Genome%20fasta.fasta.gz?download=1 + filetype: fasta.gz + Paired Collection: + class: Collection + collection_type: list:paired + elements: + - class: Collection + type: paired + identifier: ERR018930 + elements: + - class: File + identifier: forward + location: https://zenodo.org/records/14009320/files/ERR018930_forward.fastqsanger.gz?download=1 + - class: File + identifier: reverse + location: https://zenodo.org/records/14009320/files/ERR018930_reverse.fastqsanger.gz?download=1 + - class: Collection + type: paired + identifier: ERR1035492 + elements: + - class: File + identifier: forward + location: https://zenodo.org/records/14009320/files/ERR1035492_forward.fastqsanger.gz?download=1 + - class: File + identifier: reverse + location: https://zenodo.org/records/14009320/files/ERR1035492_reverse.fastqsanger.gz?download=1 + outputs: + Annotated Variants: + path: test-data/Annotated Variants.tabular + SnpEff variants: + element_tests: + ERR018930: + asserts: + - has_line: + line: 'NC_009906.1 3204 . A G 120.0 PASS DP=22;AF=0.727273;SB=2;DP4=2,3,3,14;EFF=INTRAGENIC(MODIFIER|||||PVX_087665||NON_CODING|||G)' + - has_line: + line: 'NC_009906.1 3261 . C A 52.0 PASS DP=15;AF=0.333333;SB=0;DP4=3,7,2,3;EFF=INTRAGENIC(MODIFIER|||||PVX_087665||NON_CODING|||A)' + ERR1035492: + asserts: + has_line: + line: 'NC_009906.1 2975 . A G 75.0 PASS DP=26;AF=0.692308;SB=0;DP4=5,3,12,6;EFF=INTRAGENIC(MODIFIER|||||PVX_087665||NON_CODING|||G)' diff --git a/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system.ga b/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system.ga new file mode 100644 index 000000000..97786122c --- /dev/null +++ b/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system.ga @@ -0,0 +1,784 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "Workflow for variant analysis against a reference genome in GenBank format", + "comments": [], + "creator": [ + { + "class": "Person", + "identifier": "https://orcid.org/0000-0002-5987-8032", + "name": "Anton Nekrutenko" + } + ], + "format-version": "0.1", + "license": "MIT", + "name": "Paired end variant calling in haploid system", + "release": "0.1", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Illumina reads with fastqsanger encoding", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Illumina reads with fastqsanger encoding", + "name": "Paired Collection" + } + ], + "label": "Paired Collection", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 0, + "top": 0 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"fastqsanger\", \"fastqsanger.gz\"], \"tag\": \"\", \"collection_type\": \"list:paired\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "87e2a757-c63f-42c4-ac41-2ff9802fc230", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "GTF with gene annotation, will be used for annotating variants", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "GTF with gene annotation, will be used for annotating variants", + "name": "Annotation GTF" + } + ], + "label": "Annotation GTF", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 15.4000244140625, + "top": 149.5 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "cc6b08f6-0cac-43b9-a6ac-5cb9e809aef0", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "Fasta file to use as reference for variant calling", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "Fasta file to use as reference for variant calling", + "name": "Genome fasta" + } + ], + "label": "Genome fasta", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 68.79998779296875, + "top": 361.1807963463069 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "56f0bac5-1144-424e-a2ee-e7ce408ae855", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/fastp/fastp/0.23.2+galaxy0", + "errors": null, + "id": 3, + "input_connections": { + "single_paired|paired_input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool fastp", + "name": "single_paired" + } + ], + "label": null, + "name": "fastp", + "outputs": [ + { + "name": "output_paired_coll", + "type": "input" + }, + { + "name": "report_html", + "type": "html" + }, + { + "name": "report_json", + "type": "json" + } + ], + "position": { + "left": 472.81558221323667, + "top": 210.69642625646713 + }, + "post_job_actions": { + "HideDatasetActionreport_json": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "report_json" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/fastp/fastp/0.23.2+galaxy0", + "tool_shed_repository": { + "changeset_revision": "65b93b623c77", + "name": "fastp", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"filter_options\": {\"quality_filtering_options\": {\"disable_quality_filtering\": false, \"qualified_quality_phred\": null, \"unqualified_percent_limit\": null, \"n_base_limit\": null}, \"length_filtering_options\": {\"disable_length_filtering\": false, \"length_required\": null, \"length_limit\": null}, \"low_complexity_filter\": {\"enable_low_complexity_filter\": false, \"complexity_threshold\": null}}, \"output_options\": {\"report_html\": true, \"report_json\": true}, \"overrepresented_sequence_analysis\": {\"overrepresentation_analysis\": false, \"overrepresentation_sampling\": null}, \"read_mod_options\": {\"polyg_tail_trimming\": {\"trimming_select\": \"\", \"__current_case__\": 1, \"poly_g_min_len\": null}, \"polyx_tail_trimming\": {\"polyx_trimming_select\": \"\", \"__current_case__\": 1}, \"umi_processing\": {\"umi\": false, \"umi_loc\": \"\", \"umi_len\": null, \"umi_prefix\": \"\"}, \"cutting_by_quality_options\": {\"cut_by_quality5\": false, \"cut_by_quality3\": false, \"cut_window_size\": null, \"cut_mean_quality\": null}, \"base_correction_options\": {\"correction\": false}}, \"single_paired\": {\"single_paired_selector\": \"paired_collection\", \"__current_case__\": 2, \"paired_input\": {\"__class__\": \"ConnectedValue\"}, \"adapter_trimming_options\": {\"disable_adapter_trimming\": false, \"adapter_sequence1\": \"\", \"adapter_sequence2\": \"\"}, \"global_trimming_options\": {\"trim_front1\": null, \"trim_tail1\": null, \"trim_front2\": null, \"trim_tail2\": null}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.23.2+galaxy0", + "type": "tool", + "uuid": "aab66ae3-1955-46f0-b0b8-9d6374b81d8a", + "when": null, + "workflow_outputs": [ + { + "label": "fastp html report", + "output_name": "report_html", + "uuid": "f780b8d7-8dbb-4a04-9522-8a2c99b317d9" + } + ] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snpeff/snpEff_build_gb/4.3+T.galaxy6", + "errors": null, + "id": 4, + "input_connections": { + "input_type|input": { + "id": 1, + "output_name": "output" + }, + "input_type|reference_source|input_fasta": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnpEff build:", + "name": "input_type" + } + ], + "label": null, + "name": "SnpEff build:", + "outputs": [ + { + "name": "snpeff_output", + "type": "snpeffdb" + } + ], + "position": { + "left": 437.79998779296875, + "top": 805.1807963463069 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snpeff/snpEff_build_gb/4.3+T.galaxy6", + "tool_shed_repository": { + "changeset_revision": "6322be79bd8e", + "name": "snpeff", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"codon_table\": \"Standard\", \"genome_version\": \"snpeff_db\", \"input_type\": {\"input_type_selector\": \"gtf\", \"__current_case__\": 2, \"input\": {\"__class__\": \"ConnectedValue\"}, \"reference_source\": {\"reference_source_selector\": \"history\", \"__current_case__\": 1, \"input_fasta\": {\"__class__\": \"ConnectedValue\"}}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "4.3+T.galaxy6", + "type": "tool", + "uuid": "691404df-5586-476d-a11e-28ccba3ddd55", + "when": null, + "workflow_outputs": [ + { + "label": "SnpEff4.3 database", + "output_name": "snpeff_output", + "uuid": "8d680888-a2b7-465f-890d-81e78aaa0396" + } + ] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.2", + "errors": null, + "id": 5, + "input_connections": { + "fastq_input|fastq_input1": { + "id": 3, + "output_name": "output_paired_coll" + }, + "reference_source|ref_file": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Map with BWA-MEM", + "name": "fastq_input" + }, + { + "description": "runtime parameter for tool Map with BWA-MEM", + "name": "reference_source" + } + ], + "label": null, + "name": "Map with BWA-MEM", + "outputs": [ + { + "name": "bam_output", + "type": "bam" + } + ], + "position": { + "left": 738.3546544092493, + "top": 442.2276665604545 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa/bwa_mem/0.7.17.2", + "tool_shed_repository": { + "changeset_revision": "64f11cf59c6e", + "name": "bwa", + "owner": "devteam", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"analysis_type\": {\"analysis_type_selector\": \"full\", \"__current_case__\": 4, \"algorithmic_options\": {\"algorithmic_options_selector\": \"do_not_set\", \"__current_case__\": 1}, \"scoring_options\": {\"scoring_options_selector\": \"do_not_set\", \"__current_case__\": 1}, \"io_options\": {\"io_options_selector\": \"set\", \"__current_case__\": 0, \"five\": false, \"q\": true, \"T\": \"30\", \"h\": \"5\", \"a\": false, \"C\": false, \"V\": false, \"Y\": true, \"M\": false}}, \"fastq_input\": {\"fastq_input_selector\": \"paired_collection\", \"__current_case__\": 2, \"fastq_input1\": {\"__class__\": \"ConnectedValue\"}, \"iset_stats\": \"\"}, \"output_sort\": \"coordinate\", \"reference_source\": {\"reference_source_selector\": \"history\", \"__current_case__\": 1, \"ref_file\": {\"__class__\": \"ConnectedValue\"}, \"index_a\": \"auto\"}, \"rg\": {\"rg_selector\": \"set\", \"__current_case__\": 1, \"read_group_id_conditional\": {\"do_auto_name\": true, \"__current_case__\": 0}, \"read_group_sm_conditional\": {\"do_auto_name\": true, \"__current_case__\": 0}, \"PL\": \"ILLUMINA\", \"read_group_lb_conditional\": {\"do_auto_name\": true, \"__current_case__\": 0}, \"CN\": null, \"DS\": null, \"DT\": null, \"FO\": null, \"KS\": null, \"PG\": null, \"PI\": null, \"PU\": null}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.7.17.2", + "type": "tool", + "uuid": "b227a5b7-bd10-45f2-8b3c-9797ec9fe6c9", + "when": null, + "workflow_outputs": [] + }, + "6": { + "annotation": "Retains alignments if read is paired and mapped in proper orientation ", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_view/samtools_view/1.13+galaxy1", + "errors": null, + "id": 6, + "input_connections": { + "input": { + "id": 5, + "output_name": "bam_output" + } + }, + "inputs": [], + "label": null, + "name": "Samtools view", + "outputs": [ + { + "name": "outputsam", + "type": "input" + } + ], + "position": { + "left": 996.7531127929688, + "top": 307.0401713463069 + }, + "post_job_actions": { + "RenameDatasetActionoutputsam": { + "action_arguments": { + "newname": "Mapped read pairs (filtered bwa-mem result)" + }, + "action_type": "RenameDatasetAction", + "output_name": "outputsam" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/samtools_view/samtools_view/1.13+galaxy1", + "tool_shed_repository": { + "changeset_revision": "c370440f901e", + "name": "samtools_view", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"addref_cond\": {\"addref_select\": \"no\", \"__current_case__\": 0}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"mode\": {\"outtype\": \"selected_reads\", \"__current_case__\": 1, \"filter_config\": {\"cond_region\": {\"select_region\": \"no\", \"__current_case__\": 0}, \"cond_rg\": {\"select_rg\": \"no\", \"__current_case__\": 0}, \"quality\": \"0\", \"library\": \"\", \"cigarcons\": null, \"inclusive_filter\": [\"1\", \"2\"], \"exclusive_filter\": null, \"exclusive_filter_all\": null, \"tag\": \"\", \"qname_file\": {\"__class__\": \"RuntimeValue\"}}, \"subsample_config\": {\"subsampling_mode\": {\"select_subsample\": \"fraction\", \"__current_case__\": 0, \"factor\": \"1.0\", \"seed\": null}}, \"output_options\": {\"reads_report_type\": \"retained\", \"__current_case__\": 0, \"complementary_output\": false, \"adv_output\": {\"readtags\": [], \"collapsecigar\": false}, \"output_format\": {\"oformat\": \"bam\", \"__current_case__\": 2}}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.13+galaxy1", + "type": "tool", + "uuid": "9cd7e5e8-fdf0-4c0b-9921-a6fd0d8cc735", + "when": null, + "workflow_outputs": [] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.2+galaxy2", + "errors": null, + "id": 7, + "input_connections": { + "input": { + "id": 6, + "output_name": "outputsam" + } + }, + "inputs": [], + "label": null, + "name": "Samtools stats", + "outputs": [ + { + "name": "output", + "type": "tabular" + } + ], + "position": { + "left": 1274.0343627929688, + "top": 236.8057963463069 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.2+galaxy2", + "tool_shed_repository": { + "changeset_revision": "145f6d74ff5e", + "name": "samtools_stats", + "owner": "devteam", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"addref_cond\": {\"addref_select\": \"no\", \"__current_case__\": 0}, \"cond_region\": {\"select_region\": \"no\", \"__current_case__\": 0}, \"cov_threshold\": null, \"coverage_cond\": {\"coverage_select\": \"no\", \"__current_case__\": 0}, \"filter_by_flags\": {\"filter_flags\": \"nofilter\", \"__current_case__\": 1}, \"gc_depth\": null, \"input\": {\"__class__\": \"ConnectedValue\"}, \"insert_size\": null, \"most_inserts\": null, \"read_length\": null, \"remove_dups\": false, \"remove_overlaps\": false, \"sparse\": false, \"split_output_cond\": {\"split_output_selector\": \"no\", \"__current_case__\": 0}, \"trim_quality\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.0.2+galaxy2", + "type": "tool", + "uuid": "1c709200-f5e5-4699-a881-428fe57ec1fd", + "when": null, + "workflow_outputs": [] + }, + "8": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_MarkDuplicates/2.18.2.2", + "errors": null, + "id": 8, + "input_connections": { + "inputFile": { + "id": 6, + "output_name": "outputsam" + } + }, + "inputs": [], + "label": null, + "name": "MarkDuplicates", + "outputs": [ + { + "name": "metrics_file", + "type": "txt" + }, + { + "name": "outFile", + "type": "bam" + } + ], + "position": { + "left": 1306.4093627929688, + "top": 431.1964213463069 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/picard/picard_MarkDuplicates/2.18.2.2", + "tool_shed_repository": { + "changeset_revision": "a1f0b3f4b781", + "name": "picard", + "owner": "devteam", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"assume_sorted\": true, \"barcode_tag\": \"\", \"comments\": [], \"duplicate_scoring_strategy\": \"SUM_OF_BASE_QUALITIES\", \"inputFile\": {\"__class__\": \"ConnectedValue\"}, \"optical_duplicate_pixel_distance\": \"100\", \"read_name_regex\": \"\", \"remove_duplicates\": true, \"validation_stringency\": \"LENIENT\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.18.2.2", + "type": "tool", + "uuid": "32e3534c-283c-45a3-845e-edadfe29343a", + "when": null, + "workflow_outputs": [] + }, + "9": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/multiqc/multiqc/1.11+galaxy0", + "errors": null, + "id": 9, + "input_connections": { + "results_0|software_cond|input": { + "id": 3, + "output_name": "report_json" + }, + "results_1|software_cond|output_0|type|input": { + "id": 7, + "output_name": "output" + }, + "results_2|software_cond|output_0|input": { + "id": 8, + "output_name": "metrics_file" + } + }, + "inputs": [], + "label": null, + "name": "MultiQC", + "outputs": [ + { + "name": "stats", + "type": "input" + }, + { + "name": "html_report", + "type": "html" + } + ], + "position": { + "left": 1684.5108939673896, + "top": 115.18079634630689 + }, + "post_job_actions": { + "HideDatasetActionstats": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "stats" + }, + "RenameDatasetActionhtml_report": { + "action_arguments": { + "newname": "Preprocessing and mapping reports" + }, + "action_type": "RenameDatasetAction", + "output_name": "html_report" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/multiqc/multiqc/1.11+galaxy0", + "tool_shed_repository": { + "changeset_revision": "9a913cdee30e", + "name": "multiqc", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"comment\": \"\", \"export\": false, \"flat\": false, \"results\": [{\"__index__\": 0, \"software_cond\": {\"software\": \"fastp\", \"__current_case__\": 7, \"input\": {\"__class__\": \"ConnectedValue\"}}}, {\"__index__\": 1, \"software_cond\": {\"software\": \"samtools\", \"__current_case__\": 24, \"output\": [{\"__index__\": 0, \"type\": {\"type\": \"stats\", \"__current_case__\": 0, \"input\": {\"__class__\": \"ConnectedValue\"}}}]}}, {\"__index__\": 2, \"software_cond\": {\"software\": \"picard\", \"__current_case__\": 17, \"output\": [{\"__index__\": 0, \"type\": \"markdups\", \"input\": {\"__class__\": \"ConnectedValue\"}}]}}], \"saveLog\": false, \"title\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.11+galaxy0", + "type": "tool", + "uuid": "827f0bc5-b7b3-472f-95fc-5bf34fffb9fd", + "when": null, + "workflow_outputs": [ + { + "label": "Preprocessing and mapping reports", + "output_name": "html_report", + "uuid": "73117bba-89ef-49b8-a29f-df28a3bba689" + } + ] + }, + "10": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/lofreq_viterbi/lofreq_viterbi/2.1.5+galaxy0", + "errors": null, + "id": 10, + "input_connections": { + "reads": { + "id": 8, + "output_name": "outFile" + }, + "reference_source|ref": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Realign reads", + "name": "reference_source" + } + ], + "label": null, + "name": "Realign reads", + "outputs": [ + { + "name": "realigned", + "type": "bam" + } + ], + "position": { + "left": 1652.2999877929688, + "top": 581.6807963463069 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/lofreq_viterbi/lofreq_viterbi/2.1.5+galaxy0", + "tool_shed_repository": { + "changeset_revision": "aa35ee7f3ab2", + "name": "lofreq_viterbi", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"adv_options\": {\"keepflags\": false, \"bq2_handling\": {\"replace_bq2\": \"keep\", \"__current_case__\": 0, \"defqual\": \"2\"}}, \"reads\": {\"__class__\": \"ConnectedValue\"}, \"reference_source\": {\"ref_selector\": \"history\", \"__current_case__\": 1, \"ref\": {\"__class__\": \"ConnectedValue\"}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1.5+galaxy0", + "type": "tool", + "uuid": "299c5dae-cb7b-4de5-bdeb-6f5f2470488a", + "when": null, + "workflow_outputs": [] + }, + "11": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/lofreq_call/lofreq_call/2.1.5+galaxy3", + "errors": null, + "id": 11, + "input_connections": { + "reads": { + "id": 10, + "output_name": "realigned" + }, + "reference_source|ref": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Call variants", + "name": "reference_source" + } + ], + "label": null, + "name": "Call variants", + "outputs": [ + { + "name": "variants", + "type": "vcf" + } + ], + "position": { + "left": 1911.7999877929688, + "top": 689.1807963463069 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/lofreq_call/lofreq_call/2.1.5+galaxy3", + "tool_shed_repository": { + "changeset_revision": "b11e8e9c23bf", + "name": "lofreq_call", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"call_control\": {\"set_call_options\": \"yes\", \"__current_case__\": 1, \"coverage\": {\"min_cov\": \"10\", \"max_depth\": \"1000000\"}, \"pe\": {\"use_orphan\": false}, \"bc_quals\": {\"min_bq\": \"20\", \"min_alt_bq\": \"20\", \"alt_bq\": {\"modify\": \"\", \"__current_case__\": 0}}, \"align_quals\": {\"alnqual\": {\"use_alnqual\": \"\", \"__current_case__\": 0, \"alnqual_choice\": {\"alnquals_to_use\": \"\", \"__current_case__\": 1, \"extended_baq\": true}}}, \"map_quals\": {\"min_mq\": \"0\", \"use_mq\": {\"no_mq\": \"\", \"__current_case__\": 0, \"max_mq\": \"255\"}}, \"source_qual\": {\"use_src_qual\": {\"src_qual\": \"\", \"__current_case__\": 0}}, \"joint_qual\": {\"min_jq\": \"0\", \"min_alt_jq\": \"0\", \"def_alt_jq\": \"0\"}}, \"filter_control\": {\"filter_type\": \"set_lofreq_standard\", \"__current_case__\": 2, \"sig\": \"0.01\", \"bonf\": \"dynamic\", \"others\": \"\"}, \"reads\": {\"__class__\": \"ConnectedValue\"}, \"reference_source\": {\"ref_selector\": \"history\", \"__current_case__\": 1, \"ref\": {\"__class__\": \"ConnectedValue\"}}, \"regions\": {\"restrict_to_region\": \"genome\", \"__current_case__\": 0}, \"variant_types\": \"--call-indels\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1.5+galaxy3", + "type": "tool", + "uuid": "6df71467-a7b6-45f5-ab1d-1a9672d4f81c", + "when": null, + "workflow_outputs": [] + }, + "12": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "errors": null, + "id": 12, + "input_connections": { + "infile": { + "id": 11, + "output_name": "variants" + } + }, + "inputs": [], + "label": null, + "name": "Text reformatting", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 2075.2999877929688, + "top": 471.4999963463248 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"code\": \"/^#/ { print; next } ($4 ~ /^[ACGT]$/ && $5 ~ /^[ACGT]$/) { print }\", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "60e608c1-4b33-4d09-b3e5-be4c9334a233", + "when": null, + "workflow_outputs": [] + }, + "13": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snpeff/snpEff/4.3+T.galaxy2", + "errors": null, + "id": 13, + "input_connections": { + "input": { + "id": 12, + "output_name": "outfile" + }, + "snpDb|snpeff_db": { + "id": 4, + "output_name": "snpeff_output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SnpEff eff:", + "name": "intervals" + }, + { + "description": "runtime parameter for tool SnpEff eff:", + "name": "snpDb" + }, + { + "description": "runtime parameter for tool SnpEff eff:", + "name": "transcripts" + } + ], + "label": null, + "name": "SnpEff eff:", + "outputs": [ + { + "name": "snpeff_output", + "type": "vcf" + }, + { + "name": "statsFile", + "type": "html" + } + ], + "position": { + "left": 2109.8781127929688, + "top": 850.8214213463069 + }, + "post_job_actions": { + "TagDatasetActionsnpeff_output": { + "action_arguments": { + "tags": "VariantsAsVCF" + }, + "action_type": "TagDatasetAction", + "output_name": "snpeff_output" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snpeff/snpEff/4.3+T.galaxy2", + "tool_shed_repository": { + "changeset_revision": "6322be79bd8e", + "name": "snpeff", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"annotations\": [\"-formatEff\", \"-classic\"], \"chr\": \"\", \"csvStats\": false, \"filter\": {\"specificEffects\": \"no\", \"__current_case__\": 0}, \"filterOut\": [\"-no-downstream\", \"-no-intergenic\", \"-no-intron\", \"-no-upstream\", \"-no-utr\"], \"generate_stats\": true, \"input\": {\"__class__\": \"ConnectedValue\"}, \"inputFormat\": \"vcf\", \"intervals\": {\"__class__\": \"RuntimeValue\"}, \"noLog\": true, \"offset\": \"default\", \"outputConditional\": {\"outputFormat\": \"vcf\", \"__current_case__\": 0}, \"snpDb\": {\"genomeSrc\": \"custom\", \"__current_case__\": 3, \"snpeff_db\": {\"__class__\": \"ConnectedValue\"}, \"codon_table\": \"Standard\"}, \"spliceRegion\": {\"setSpliceRegions\": \"no\", \"__current_case__\": 0}, \"spliceSiteSize\": null, \"transcripts\": {\"__class__\": \"RuntimeValue\"}, \"udLength\": \"0\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "4.3+T.galaxy2", + "type": "tool", + "uuid": "e3d2c332-c029-463c-bb8b-771716db7d67", + "when": null, + "workflow_outputs": [ + { + "label": "SnpEff variants", + "output_name": "snpeff_output", + "uuid": "8f3cd914-554d-4fab-97ca-245245a63cb3" + }, + { + "label": "SnpEff eff reports", + "output_name": "statsFile", + "uuid": "37d8fb6f-b2e6-4f91-9f8e-58a30354b35d" + } + ] + }, + "14": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/snpsift/snpSift_extractFields/4.3+t.galaxy0", + "errors": null, + "id": 14, + "input_connections": { + "input": { + "id": 13, + "output_name": "snpeff_output" + } + }, + "inputs": [], + "label": null, + "name": "SnpSift Extract Fields", + "outputs": [ + { + "name": "output", + "type": "tabular" + } + ], + "position": { + "left": 2416.2999877929688, + "top": 510.1807963463069 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/snpsift/snpSift_extractFields/4.3+t.galaxy0", + "tool_shed_repository": { + "changeset_revision": "5fab4f81391d", + "name": "snpsift", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"empty_text\": \".\", \"extract\": \"CHROM POS FILTER REF ALT DP AF DP4 SB EFF[*].IMPACT EFF[*].FUNCLASS EFF[*].EFFECT EFF[*].GENE EFF[*].CODON EFF[*].AA EFF[*].TRID\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"one_effect_per_line\": true, \"separator\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "4.3+t.galaxy0", + "type": "tool", + "uuid": "767ff87e-6a49-4bd4-ba05-27b4d349e695", + "when": null, + "workflow_outputs": [] + }, + "15": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/5.1.0", + "errors": null, + "id": 15, + "input_connections": { + "input_list": { + "id": 14, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Collapse Collection", + "outputs": [ + { + "name": "output", + "type": "input" + } + ], + "position": { + "left": 2588.2999877929688, + "top": 751.1807963463069 + }, + "post_job_actions": { + "TagDatasetActionoutput": { + "action_arguments": { + "tags": "VariantsAsTSV" + }, + "action_type": "TagDatasetAction", + "output_name": "output" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/nml/collapse_collections/collapse_dataset/5.1.0", + "tool_shed_repository": { + "changeset_revision": "90981f86000f", + "name": "collapse_collections", + "owner": "nml", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"filename\": {\"add_name\": true, \"__current_case__\": 0, \"place_name\": \"same_multiple\"}, \"input_list\": {\"__class__\": \"ConnectedValue\"}, \"one_header\": true, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.1.0", + "type": "tool", + "uuid": "5feceb7a-ba26-4974-9215-dfdbbcc5b296", + "when": null, + "workflow_outputs": [ + { + "label": "Annotated Variants", + "output_name": "output", + "uuid": "301f944e-0f65-415f-a56f-9899dca85e88" + } + ] + } + }, + "tags": [ + "generic", + "VeuPath", + "Haploid" + ], + "uuid": "d5b4eace-d876-4999-bad3-14c1cb02b0c9", + "version": 5 +} diff --git a/workflows/variant-calling/haploid-variant-calling-wgs-pe/test-data/Annotated Variants.tabular b/workflows/variant-calling/haploid-variant-calling-wgs-pe/test-data/Annotated Variants.tabular new file mode 100644 index 000000000..2d9214d11 --- /dev/null +++ b/workflows/variant-calling/haploid-variant-calling-wgs-pe/test-data/Annotated Variants.tabular @@ -0,0 +1,5 @@ +Sample CHROM POS FILTER REF ALT DP AF DP4 SB EFF[*].IMPACT EFF[*].FUNCLASS EFF[*].EFFECT EFF[*].GENE EFF[*].CODON EFF[*].AA EFF[*].TRID +ERR018930 NC_009906.1 3204 PASS A G 22 0.727273 2,3,3,14 2 MODIFIER NONE INTRAGENIC PVX_087665 . . . +ERR018930 NC_009906.1 3261 PASS C A 15 0.333333 3,7,2,3 0 MODIFIER NONE INTRAGENIC PVX_087665 . . . +ERR018930 NC_009906.1 482116 PASS A G 20 0.2 5,9,4,0 10 . . . . . . . +ERR1035492 NC_009906.1 2975 PASS A G 26 0.692308 5,3,12,6 0 MODIFIER NONE INTRAGENIC PVX_087665 . . . From 2b9da240be06e895cb658f6dd6865ce6c6c7b3bc Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 29 Oct 2024 20:09:58 +0100 Subject: [PATCH 2/2] Minor readme tweaks --- .../haploid-variant-calling-wgs-pe/README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/workflows/variant-calling/haploid-variant-calling-wgs-pe/README.md b/workflows/variant-calling/haploid-variant-calling-wgs-pe/README.md index 0aabda928..12ff08b33 100644 --- a/workflows/variant-calling/haploid-variant-calling-wgs-pe/README.md +++ b/workflows/variant-calling/haploid-variant-calling-wgs-pe/README.md @@ -1,11 +1,18 @@ # Haploid variant calling for whole genome sequencing paired end data +This workflow uses Illumina or Element read data to discover variants (short nucleotide polymorphisms, SNPs, and small indels) in haploid genomes with multiple genomic sequences (contigs, scaffolds, or chromosomes). + ## Inputs dataset - The workflow needs a list of paired end fastq files - A GTF containtaing the Gene annotation for the selected haploid genome - A fasta file for the haploid genome to call variants against +## Outputs + +- Tab-delimited summary of annotated variants +- Report summarizing the quality of input data and mapping results + ## Processing - The workflow will remove adapters using fastp