diff --git a/workflows/genome_annotation/annotation_maker/.dockstore.yml b/workflows/genome_annotation/annotation_maker/.dockstore.yml new file mode 100644 index 000000000..22f9838e5 --- /dev/null +++ b/workflows/genome_annotation/annotation_maker/.dockstore.yml @@ -0,0 +1,13 @@ +version: 1.2 +workflows: +- name: main + subclass: Galaxy + publish: true + primaryDescriptorPath: /Genome_annotation_with_maker_short.ga + testParameterFiles: + - /Genome_annotation_with_maker_short_tests.yml + authors: + - name: Romane Libouban + email: romane.libouban@irisa.fr + orcid: 0009-0001-4920-9951 + diff --git a/workflows/genome_annotation/annotation_maker/CHANGELOG.md b/workflows/genome_annotation/annotation_maker/CHANGELOG.md new file mode 100644 index 000000000..3594f992f --- /dev/null +++ b/workflows/genome_annotation/annotation_maker/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## [0.1] + +Initial version of the genome annotation workflow with maker. \ No newline at end of file diff --git a/workflows/genome_annotation/annotation_maker/Genome_annotation_with_maker_short.ga b/workflows/genome_annotation/annotation_maker/Genome_annotation_with_maker_short.ga new file mode 100644 index 000000000..dd8247d56 --- /dev/null +++ b/workflows/genome_annotation/annotation_maker/Genome_annotation_with_maker_short.ga @@ -0,0 +1,822 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "This workflow uses Maker to annotate a genome.", + "comments": [ + { + "child_steps": [ + 12 + ], + "color": "pink", + "data": { + "title": "Visualization" + }, + "id": 5, + "position": [ + 1400, + 640 + ], + "size": [ + 240, + 244.8 + ], + "type": "frame" + }, + { + "child_steps": [ + 0, + 1, + 2, + 3, + 4 + ], + "color": "blue", + "data": { + "title": "Inputs" + }, + "id": 0, + "position": [ + 0, + 430 + ], + "size": [ + 240, + 682.6 + ], + "type": "frame" + }, + { + "child_steps": [ + 6, + 5 + ], + "color": "green", + "data": { + "title": "Genome quality evaluation" + }, + "id": 1, + "position": [ + 430, + 0 + ], + "size": [ + 470, + 400 + ], + "type": "frame" + }, + { + "child_steps": [ + 8, + 11 + ], + "color": "turquoise", + "data": { + "title": "Evaluation - Predicted protein from annotation" + }, + "id": 2, + "position": [ + 770, + 470 + ], + "size": [ + 480, + 478.8 + ], + "type": "frame" + }, + { + "child_steps": [ + 7 + ], + "color": "yellow", + "data": { + "title": "Annotation with Maker" + }, + "id": 3, + "position": [ + 400, + 930 + ], + "size": [ + 240, + 742 + ], + "type": "frame" + }, + { + "child_steps": [ + 9 + ], + "color": "green", + "data": { + "title": "Annotation statistics" + }, + "id": 4, + "position": [ + 750, + 1040 + ], + "size": [ + 240, + 260 + ], + "type": "frame" + }, + { + "child_steps": [ + 10 + ], + "color": "black", + "data": { + "title": "Improving gene naming" + }, + "id": 6, + "position": [ + 1180, + 1100 + ], + "size": [ + 240, + 224.4 + ], + "type": "frame" + } + ], + "creator": [ + { + "class": "Person", + "identifier": "0009-0001-4920-9951", + "name": "Romane Libouban" + } + ], + "format-version": "0.1", + "license": "MIT", + "release": "0.1", + "name": "Genome annotation with Maker (short)", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Genome sequence", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Genome sequence", + "name": "Genome sequence" + } + ], + "label": "Genome sequence", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 20, + "top": 470 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "d4a25db2-d25c-4fce-bd4e-d48cdcf73e84", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "Genome assembly", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "Genome assembly", + "name": "Genome assembly" + } + ], + "label": "Genome assembly", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 20, + "top": 600 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "68b09574-db05-4b22-bd53-1bca87898d91", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "Protein sequences", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "Protein sequences", + "name": "Protein sequences" + } + ], + "label": "Protein sequences", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 20, + "top": 730 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "51e28342-f0d8-4be6-915c-336c13e342c7", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "Augustus training", + "content_id": null, + "errors": null, + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "Augustus training", + "name": "Augustus training" + } + ], + "label": "Augustus training", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 20, + "top": 850 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "062a77a0-a4c2-4ac9-a73e-489f64a82ebb", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "SNAP training", + "content_id": null, + "errors": null, + "id": 4, + "input_connections": {}, + "inputs": [ + { + "description": "SNAP training", + "name": "SNAP training" + } + ], + "label": "SNAP training", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 20, + "top": 990 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "507ffef8-8c0d-4d81-a946-c79956cde331", + "when": null, + "workflow_outputs": [] + }, + "5": { + "annotation": "Fasta Statistics on the genome", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/fasta_stats/fasta-stats/2.0", + "errors": null, + "id": 5, + "input_connections": { + "fasta": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "label": " Fasta Statistics ", + "name": "Fasta Statistics", + "outputs": [ + { + "name": "stats_output", + "type": "tabular" + } + ], + "position": { + "left": 450, + "top": 100 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/fasta_stats/fasta-stats/2.0", + "tool_shed_repository": { + "changeset_revision": "0dbb995c7d35", + "name": "fasta_stats", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"fasta\": {\"__class__\": \"ConnectedValue\"}, \"gaps_option\": false, \"genome_size\": null, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.0", + "type": "tool", + "uuid": "0808d952-600b-46f0-9575-8fb8b8966c48", + "when": null, + "workflow_outputs": [ + { + "label": "fasta stats genome", + "output_name": "stats_output", + "uuid": "57ba9a76-95e9-4f6a-9ba2-bfeff0f05be8" + } + ] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", + "errors": null, + "id": 6, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Busco", + "name": "input" + } + ], + "label": null, + "name": "Busco", + "outputs": [ + { + "name": "busco_sum", + "type": "txt" + }, + { + "name": "busco_table", + "type": "tabular" + }, + { + "name": "busco_missing", + "type": "tabular" + }, + { + "name": "summary_image", + "type": "png" + }, + { + "name": "busco_gff", + "type": "gff3" + } + ], + "position": { + "left": 680, + "top": 40 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2babe6d5c561", + "name": "busco", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"tran\", \"__current_case__\": 1}, \"input\": {\"__class__\": \"RuntimeValue\"}, \"lineage\": {\"lineage_mode\": \"select_lineage\", \"__current_case__\": 1, \"lineage_dataset\": \"fungi_odb10\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"all+2024-03-21-114020\"}, \"outputs\": [\"short_summary\", \"image\", \"gff\", \"missing\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.7.1+galaxy0", + "type": "tool", + "uuid": "670eb200-8cae-4de7-a3e7-6c6e95b9e92f", + "when": null, + "workflow_outputs": [ + { + "label": "busco sum genome", + "output_name": "busco_sum", + "uuid": "9632ecea-72ce-4cb8-8368-13a89125c141" + }, + { + "label": "busco gff genome", + "output_name": "busco_gff", + "uuid": "d67de38f-78f0-4be9-b699-945c9ccf2a87" + }, + { + "label": "busco image genome", + "output_name": "summary_image", + "uuid": "a866af88-80c4-4e61-939c-953f9a8d81fc" + }, + { + "label": "busco table genome", + "output_name": "busco_table", + "uuid": "18c8704b-f281-472c-a81e-d7b05fc98d69" + }, + { + "label": "busco missing genome", + "output_name": "busco_missing", + "uuid": "84b2b45d-2095-4d24-898e-dba1f491f385" + } + ] + }, + "7": { + "annotation": "Annotation with Maker", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/maker/maker/2.31.11+galaxy2", + "errors": null, + "id": 7, + "input_connections": { + "abinitio_gene_prediction|aug_prediction|augustus_model": { + "id": 3, + "output_name": "output" + }, + "abinitio_gene_prediction|snaphmm": { + "id": 4, + "output_name": "output" + }, + "est_evidences|est": { + "id": 1, + "output_name": "output" + }, + "genome": { + "id": 0, + "output_name": "output" + }, + "protein_evidences|protein": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Maker", + "name": "abinitio_gene_prediction" + }, + { + "description": "runtime parameter for tool Maker", + "name": "est_evidences" + }, + { + "description": "runtime parameter for tool Maker", + "name": "protein_evidences" + } + ], + "label": "Maker", + "name": "Maker", + "outputs": [ + { + "name": "output_gff", + "type": "gff3" + }, + { + "name": "output_evidences", + "type": "gff3" + }, + { + "name": "output_full", + "type": "gff3" + } + ], + "position": { + "left": 420, + "top": 970 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/maker/maker/2.31.11+galaxy2", + "tool_shed_repository": { + "changeset_revision": "370c210d9541", + "name": "maker", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"abinitio_gene_prediction\": {\"snaphmm\": {\"__class__\": \"ConnectedValue\"}, \"aug_prediction\": {\"augustus_mode\": \"history\", \"__current_case__\": 1, \"augustus_model\": {\"__class__\": \"ConnectedValue\"}}, \"unmask\": false}, \"advanced\": {\"fix_nucleotides\": false, \"other_gff\": null, \"alt_peptide\": \"C\", \"max_dna_len\": \"100000\", \"min_contig\": \"1\", \"pred_flank\": \"200\", \"pred_stats\": false, \"AED_threshold\": \"1.0\", \"min_protein\": \"0\", \"alt_splice\": false, \"always_complete\": false, \"map_forward\": false, \"keep_preds\": \"0.0\", \"split_hit\": \"10000\", \"correct_est_fusion\": false, \"single_exon\": {\"single_exon\": \"0\", \"__current_case__\": 0}}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"est_evidences\": {\"est2genome\": false, \"est\": {\"__class__\": \"ConnectedValue\"}, \"altest\": null, \"est_gff\": null, \"altest_gff\": null}, \"gene_prediction\": {\"pred_gff\": null, \"model_gff\": null, \"trna\": false, \"snoscan_rrna\": null}, \"genome\": {\"__class__\": \"ConnectedValue\"}, \"license_agreement\": true, \"organism_type\": \"eukaryotic\", \"protein_evidences\": {\"protein2genome\": false, \"protein\": {\"__class__\": \"ConnectedValue\"}, \"protein_gff\": null}, \"reannotation\": {\"reannotate\": \"no\", \"__current_case__\": 0}, \"repeat_masking\": {\"repeat_source\": {\"source_type\": \"no\", \"__current_case__\": 3}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.31.11+galaxy2", + "type": "tool", + "uuid": "88a77e0a-3060-46e2-a885-0644ee1d2d8e", + "when": null, + "workflow_outputs": [ + { + "label": "maker gff", + "output_name": "output_gff", + "uuid": "00c3d37b-dfaa-474d-9cd1-c878eedb032e" + }, + { + "label": "maker evidences", + "output_name": "output_evidences", + "uuid": "9051492b-9f9a-47cf-8308-759833f99979" + }, + { + "label": "maker full", + "output_name": "output_full", + "uuid": "81a7537f-174d-4db3-a518-c3bf45ba4857" + } + ] + }, + "8": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/gffread/gffread/2.2.1.4+galaxy0", + "errors": null, + "id": 8, + "input_connections": { + "input": { + "id": 7, + "output_name": "output_gff" + }, + "reference_genome|genome_fasta": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool gffread", + "name": "reference_genome" + } + ], + "label": "GFFRead", + "name": "gffread", + "outputs": [ + { + "name": "output_exons", + "type": "fasta" + } + ], + "position": { + "left": 790, + "top": 550 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/gffread/gffread/2.2.1.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "3e436657dcd0", + "name": "gffread", + "owner": "devteam", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chr_replace\": null, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"decode_url\": true, \"expose\": true, \"filtering\": null, \"full_gff_attribute_preservation\": true, \"gffs\": {\"gff_fmt\": \"none\", \"__current_case__\": 0}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"maxintron\": null, \"merging\": {\"merge_sel\": \"none\", \"__current_case__\": 0}, \"reference_genome\": {\"source\": \"history\", \"__current_case__\": 2, \"genome_fasta\": {\"__class__\": \"ConnectedValue\"}, \"ref_filtering\": null, \"fa_outputs\": [\"-w exons.fa\"]}, \"region\": {\"region_filter\": \"none\", \"__current_case__\": 0}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.2.1.4+galaxy0", + "type": "tool", + "uuid": "b2ddf33f-d870-41cf-88e1-2ecafe34a2d3", + "when": null, + "workflow_outputs": [ + { + "label": "gffread exons", + "output_name": "output_exons", + "uuid": "03ea976d-027f-49a2-8495-b95e1750ba2d" + } + ] + }, + "9": { + "annotation": "Genome annotation statistics on the maker's annotation", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jcvi_gff_stats/jcvi_gff_stats/0.8.4", + "errors": null, + "id": 9, + "input_connections": { + "gff": { + "id": 7, + "output_name": "output_gff" + }, + "ref_genome|genome": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Genome annotation statistics", + "name": "ref_genome" + } + ], + "label": "Genome annotation statistics", + "name": "Genome annotation statistics", + "outputs": [ + { + "name": "summary", + "type": "txt" + }, + { + "name": "graphs", + "type": "pdf" + } + ], + "position": { + "left": 770, + "top": 1080 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jcvi_gff_stats/jcvi_gff_stats/0.8.4", + "tool_shed_repository": { + "changeset_revision": "8cffbd184762", + "name": "jcvi_gff_stats", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"gff\": {\"__class__\": \"ConnectedValue\"}, \"ref_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"ConnectedValue\"}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.8.4", + "type": "tool", + "uuid": "13b8f842-4061-4fce-8beb-610000044cdf", + "when": null, + "workflow_outputs": [ + { + "label": "graphs genome", + "output_name": "graphs", + "uuid": "3992f3ad-3cf7-45ad-b31d-e324c35468f6" + }, + { + "label": "summary genome", + "output_name": "summary", + "uuid": "8ae8f720-48aa-484b-a9f2-42420db1caee" + } + ] + }, + "10": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/maker_map_ids/maker_map_ids/2.31.11", + "errors": null, + "id": 10, + "input_connections": { + "maker_gff": { + "id": 7, + "output_name": "output_gff" + } + }, + "inputs": [], + "label": "Map annotation ids", + "name": "Map annotation ids", + "outputs": [ + { + "name": "renamed", + "type": "gff" + }, + { + "name": "id_map", + "type": "tabular" + } + ], + "position": { + "left": 1200, + "top": 1140 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/maker_map_ids/maker_map_ids/2.31.11", + "tool_shed_repository": { + "changeset_revision": "e906fa778440", + "name": "maker_map_ids", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"justify\": \"6\", \"maker_gff\": {\"__class__\": \"ConnectedValue\"}, \"prefix\": \"TEST_\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.31.11", + "type": "tool", + "uuid": "2e2af2b8-1bbb-4b0e-9f3d-9f9fe41213cd", + "when": null, + "workflow_outputs": [ + { + "label": "renamed gff3", + "output_name": "renamed", + "uuid": "d20f31b9-834c-4bff-b48d-e1892e740897" + }, + { + "label": "id map", + "output_name": "id_map", + "uuid": "ff088733-2509-4811-af89-3017731e341f" + } + ] + }, + "11": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", + "errors": null, + "id": 11, + "input_connections": { + "input": { + "id": 8, + "output_name": "output_exons" + } + }, + "inputs": [], + "label": null, + "name": "Busco", + "outputs": [ + { + "name": "busco_sum", + "type": "txt" + }, + { + "name": "busco_table", + "type": "tabular" + }, + { + "name": "busco_missing", + "type": "tabular" + }, + { + "name": "summary_image", + "type": "png" + }, + { + "name": "busco_gff", + "type": "gff3" + } + ], + "position": { + "left": 1030, + "top": 510 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2babe6d5c561", + "name": "busco", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"tran\", \"__current_case__\": 1}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"lineage\": {\"lineage_mode\": \"select_lineage\", \"__current_case__\": 1, \"lineage_dataset\": \"fungi_odb10\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"all+2024-03-21-114020\"}, \"outputs\": [\"short_summary\", \"missing\", \"image\", \"gff\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.7.1+galaxy0", + "type": "tool", + "uuid": "ba1f8c50-d478-45a2-bcf9-3bba389ba763", + "when": null, + "workflow_outputs": [ + { + "label": "busco image predicted proteins", + "output_name": "summary_image", + "uuid": "45dccf85-b254-4da5-bf65-ca33897043e1" + }, + { + "label": "busco gff predicted proteins", + "output_name": "busco_gff", + "uuid": "55ca4792-9cc7-4e11-8e98-cde89a980f9b" + }, + { + "label": "busco sum predicted proteins", + "output_name": "busco_sum", + "uuid": "bd03a2eb-813e-4bc3-a80d-2c32ffe1ff5b" + }, + { + "label": "busco table predicted proteins", + "output_name": "busco_table", + "uuid": "65394361-dd3c-483e-adbd-82e3777a24ce" + }, + { + "label": "busco missing predicted proteins", + "output_name": "busco_missing", + "uuid": "9a72bf77-47b8-457e-9147-349a75cd00cf" + } + ] + }, + "12": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy1", + "errors": null, + "id": 12, + "input_connections": { + "reference_genome|genome": { + "id": 0, + "output_name": "output" + }, + "track_groups_0|data_tracks_0|data_format|annotation": { + "id": 10, + "output_name": "renamed" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool JBrowse", + "name": "reference_genome" + } + ], + "label": null, + "name": "JBrowse", + "outputs": [ + { + "name": "output", + "type": "html" + } + ], + "position": { + "left": 1420, + "top": 680 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy1", + "tool_shed_repository": { + "changeset_revision": "a6e57ff585c0", + "name": "jbrowse", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"fasta\", \"action\": {\"action_select\": \"create\", \"__current_case__\": 0}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"gencode\": \"1\", \"jbgen\": {\"defaultLocation\": \"\", \"trackPadding\": \"20\", \"shareLink\": true, \"aboutDescription\": \"\", \"show_tracklist\": true, \"show_nav\": true, \"show_overview\": true, \"show_menu\": true, \"hideGenomeOptions\": false}, \"plugins\": {\"BlastView\": true, \"ComboTrackSelector\": false, \"GCContent\": false}, \"reference_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"ConnectedValue\"}}, \"standalone\": \"minimal\", \"track_groups\": [{\"__index__\": 0, \"category\": \"Maker annotation\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"gene_calls\", \"__current_case__\": 2, \"annotation\": {\"__class__\": \"ConnectedValue\"}, \"match_part\": {\"match_part_select\": false, \"__current_case__\": 1}, \"index\": false, \"track_config\": {\"track_class\": \"NeatHTMLFeatures/View/Track/NeatFeatures\", \"__current_case__\": 3, \"html_options\": {\"topLevelFeatures\": null}}, \"jbstyle\": {\"style_classname\": \"feature\", \"style_label\": \"product,name,id\", \"style_description\": \"note,description\", \"style_height\": \"10px\", \"max_height\": \"600\"}, \"jbcolor_scale\": {\"color_score\": {\"color_score_select\": \"none\", \"__current_case__\": 0, \"color\": {\"color_select\": \"automatic\", \"__current_case__\": 0}}}, \"jb_custom_config\": {\"option\": []}, \"jbmenu\": {\"track_menu\": []}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}], \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.16.11+galaxy1", + "type": "tool", + "uuid": "6972ade5-efc7-4987-86ea-0f38b92351e1", + "when": null, + "workflow_outputs": [] + } + }, + "tags": [], + "uuid": "8ef0e1cf-c22b-4452-9581-d364e32635e9", + "version": 1 +} \ No newline at end of file diff --git a/workflows/genome_annotation/annotation_maker/Genome_annotation_with_maker_short_tests.yml b/workflows/genome_annotation/annotation_maker/Genome_annotation_with_maker_short_tests.yml new file mode 100644 index 000000000..180538cca --- /dev/null +++ b/workflows/genome_annotation/annotation_maker/Genome_annotation_with_maker_short_tests.yml @@ -0,0 +1,130 @@ +- doc: Test outline for Genome_annotation_with_maker_short.ga + job: + Genome sequence: + class: File + location: https://zenodo.org/records/13987091/files/S_pombe_chrIII_genome.fasta + filetype: fasta + Genome assembly: + class: File + location: https://zenodo.org/records/13987091/files/S_pombe_trinity_assembly.fasta + filetype: fasta + Protein sequences: + class: File + location: https://zenodo.org/records/13987091/files/Swissprot_no_S_pombe.fasta + filetype: fasta + Augustus training: + class: File + location: https://zenodo.org/records/13987091/files/augustus_training.tar.gz.augustus + filetype: augustus + SNAP training: + class: File + location: https://zenodo.org/records/13987091/files/snap_training.snaphmm + filetype: snaphmm + + + outputs: + fasta stats genome: + asserts: + - has_n_lines: + n: 30 + + busco sum genome: + asserts: + - has_n_lines: + n: 20 + - has_text: + text: "BUSCO version is: 5.7.1" + text: "The lineage dataset is: fungi_odb10" + busco table genome: + asserts: + - has_text: + text: "BUSCO version is: 5.7.1" + text: "The lineage dataset is: fungi_odb10" + busco missing genome: + asserts: + - has_text: + text: "BUSCO version is: 5.7.1" + text: "The lineage dataset is: fungi_odb10" + + gffread exons: + asserts: + - has_text: + text: ">snap-NC_003421.2-processed-gene-0.2-mRNA-1 CDS=1-441 Name=snap-NC_003421.2-processed-gene-0.2-mRNA-1;_AED=1.00;_eAED=1.00;_QI=0|0|0|0|1|1|3|0|146" + text: "ATGATAGGAAGAGCCGACATCGAAGAATCAAAAAGCAACGTCGCTATGAACGCTTGGCTGCCACAAGCCA" + + maker gff: + asserts: + - has_n_lines: + n: 6253 + delta: 10 + - has_text: + text: "##gff-version 3" + text: "NC_003421.2" + maker evidences: + asserts: + - has_n_lines: + n: 74613 + delta: 10 + - has_text: + text: "##gff-version 3" + text: "NC_003421.2" + maker full: + asserts: + - has_text: + text: "##gff-version 3" + text: "NC_003421.2" + + summary genome: + asserts: + - has_n_lines: + n: 27 + graphs genome: + location: https://zenodo.org/records/14276084/files/Genome_annotation_statistics_graphs.pdf?download=1 + compare: sim_size + delta: 30000 + + renamed gff3: + asserts: + - has_n_lines: + n: 6253 + delta: 10 + - has_text: + text: "##gff-version 3" + text: "ID=TEST_000012-RA:exon:0;Parent=TEST_000012-RA" + id map: + asserts: + - has_n_lines: + n: 1728 + - has_text: + text: "snap-NC_003421.2-processed-gene-0.0" + text: "TEST_000008" + + busco missing predicted proteins: + asserts: + - has_text: + text: "BUSCO version is: 5.7.1" + text: "The lineage dataset is: fungi_odb10" + text: "106281at4751" + busco sum predicted proteins: + asserts: + - has_n_lines: + n: 20 + - has_text: + text: "BUSCO version is: 5.7.1" + busco table predicted proteins: + asserts: + - has_text: + text: "BUSCO version is: 5.7.1" + text: "The lineage dataset is: fungi_odb10" + busco image predicted proteins: + compare: sim_size + delta: 30000 + busco gff predicted proteins: + asserts: + - has_text: + text: "gff-version 3" + text: "MetaEuk" + + + + diff --git a/workflows/genome_annotation/annotation_maker/README.md b/workflows/genome_annotation/annotation_maker/README.md new file mode 100644 index 000000000..aeedd1f56 --- /dev/null +++ b/workflows/genome_annotation/annotation_maker/README.md @@ -0,0 +1,37 @@ +# Genome annotation workflow with Maker + +This workflow allows for genome annotation using Maker and evaluates the quality of the annotation with BUSCO and genome annotation statistics. The annotation can then be improved, standardized, and visualized with additional tools. + +**Maker** is a genome model prediction software that uses ab initio predictors (SANP and Augustus) to improve its predictions. Maker is capable of annotating both prokaryotes and eukaryotes. It works by aligning as much evidence as possible along the genome sequence, then reconciling all these signals to determine likely genetic structures. + +## Workflow Steps + +- Annotation with Maker: Maker uses the genome sequence, protein evidence, ab-initio predictions, and ESTs to produce the annotation. +- Quality Evaluation: + - Run Fasta Statistics to assess genome assembly quality. + - Use BUSCO to evaluate annotation completeness. +- Annotation Statistics: Analyze the annotation using Genome Annotation Statistics, producing graphical and textual summaries. +- Sequence Extraction: Extract predicted protein sequences using GFFRead for downstream analysis. +- Improve Gene Names: Standardize gene names using Map annotation ids for better readability. +- Visualization: Load the genome sequence and annotation into JBrowse for interactive browsing. + +## Input data +The following input files are required for the workflow: +- Genome sequence (FASTA format): The genome to be annotated. Used by Maker, Fasta Statistics, and BUSCO. +- Protein sequences (FASTA format): Evidence to assist annotation in Maker. +- EST evidences (FASTA format): Alignments used as evidence by Maker. +- Ab-initio gene predictions: Supplementary data for Maker to refine annotations. + + +## Output Data +The workflow generates the following outputs: +- Annotation file (GFF3): Contains the final consensus gene models produced by Maker. +- Genome statistics: A tabular file summarizing contig sizes and base content, produced by Fasta Statistics. +- BUSCO results: Assess the completeness of the annotation and include: + - A summary of results. + - A table of all searched BUSCO genes with their status. + - A table of missing BUSCO genes. +- Annotation statistics: Summary and graphical analyses of the annotation, produced by Genome Annotation Statistics. +- Protein sequences (FASTA): Predicted from the annotation using GFFRead. +- Renamed GFF annotation file: Contains standardized gene names, produced by Map annotation ids. +- Genome browser visualization (HTML): An interactive genome view produced by JBrowse. \ No newline at end of file