From c2cf176f00ef29927801d575804b863e89406c2e Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 22 Oct 2024 10:28:56 +0200 Subject: [PATCH 1/9] add lncRNAs annotation workflow --- .../lncRNAs_annotation/.dockstore.yml | 11 + .../lncRNAs_annotation/CHANGELOG.md | 5 + ...flow-lncRNAs_annotation_workflow-tests.yml | 40 +++ ...xy-Workflow-lncRNAs_annotation_workflow.ga | 317 ++++++++++++++++++ .../lncRNAs_annotation/README.md | 46 +++ 5 files changed, 419 insertions(+) create mode 100644 workflows/genome_annotation/lncRNAs_annotation/.dockstore.yml create mode 100644 workflows/genome_annotation/lncRNAs_annotation/CHANGELOG.md create mode 100644 workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml create mode 100644 workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga create mode 100644 workflows/genome_annotation/lncRNAs_annotation/README.md diff --git a/workflows/genome_annotation/lncRNAs_annotation/.dockstore.yml b/workflows/genome_annotation/lncRNAs_annotation/.dockstore.yml new file mode 100644 index 000000000..0f1660c7c --- /dev/null +++ b/workflows/genome_annotation/lncRNAs_annotation/.dockstore.yml @@ -0,0 +1,11 @@ +version: 1.2 +workflows: +- name: main + subclass: Galaxy + publish: true + primaryDescriptorPath: /Galaxy-Workflow-lncRNAs_annotation_workflow.ga + testParameterFiles: + - /Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml + authors: + - name: Romane Libouban + email: romane.libouban@irisa.fr diff --git a/workflows/genome_annotation/lncRNAs_annotation/CHANGELOG.md b/workflows/genome_annotation/lncRNAs_annotation/CHANGELOG.md new file mode 100644 index 000000000..c22e18ddc --- /dev/null +++ b/workflows/genome_annotation/lncRNAs_annotation/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## [0.1] + +Initial version of the lncRNAs annotation workflow. \ No newline at end of file diff --git a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml new file mode 100644 index 000000000..94615826b --- /dev/null +++ b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml @@ -0,0 +1,40 @@ +- doc: Test outline for Galaxy-Workflow-lncRNAs_annotation_workflow.ga + job: + Genome assembly: + class: File + location: https://zenodo.org/records/11367439/files/genome_assembly.fasta + filetype: fasta + Genome annotation: + class: File + location: https://zenodo.org/records/11367439/files/genome_annotation.gff3 + filetype: gff3 + RNA-Seq: + class: File + location: https://zenodo.org/records/11367439/files/SRR8534859_RNASeq_mapped.bam + filetype: bam + + + outputs: + genome_annotation_gtf: + location: https://zenodo.org/api/records/13941438/draft/files/gffread.gtf/content + compare: sim_size + delta: 300000 + + stringtie_gtf: + location: https://zenodo.org/records/13941438/files/gffread.gtf?download=1 + compare: sim_size + delta: 300000 + + lcnRNA_annotation: + location: https://zenodo.org/api/records/13941438/draft/files/lncRNA_annotation_FEELnc.gtf/content + compare: sim_size + delta: 300000 + classification: + location: https://zenodo.org/api/records/13941438/draft/files/Classifier_FEELnc.txt/content + compare: sim_size + delta: 300000 + + lncRNA_genome_annotation: + location: https://zenodo.org/api/records/13941438/draft/files/Concatenate_datasets.gtf/content + compare: sim_size + delta: 300000 diff --git a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga new file mode 100644 index 000000000..aa6cb9864 --- /dev/null +++ b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga @@ -0,0 +1,317 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "This workflow uses the FEELnc tool to annotate long non-coding RNAs. Before annotating these long non-coding RNAs, StringTie will be used to assemble the RNA-seq alignments into potential trancriptions. The gffread tool provides a genome annotation file in GTF format.", + "comments": [], + "creator": [ + { + "class": "Person", + "email": "mailto:romane.libouban@irisa.fr", + "name": "Romane Libouban" + } + ], + "format-version": "0.1", + "license": "CC-BY-4.0", + "name": "lncRNAs annotation workflow", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Genome assembly", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Genome assembly" + } + ], + "label": "Genome assembly", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 140, + "top": 130 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "6c129c58-d982-445f-b41c-c2c7387c5e81", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "Genome annotation", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Genome annotation" + } + ], + "label": "Genome annotation", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 0, + "top": 290 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"gff3\"], \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "c221afda-d336-4416-bfa0-f41a19c50097", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "RNA-Seq", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "RNA-Seq" + } + ], + "label": "RNA-Seq", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 40, + "top": 460 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"bam\"], \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "1eccf2fb-d243-4be7-a952-a9d6d1375b89", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "protein sequences extracted", + "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/gffread/gffread/2.2.1.4+galaxy0", + "errors": null, + "id": 3, + "input_connections": { + "input": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "label": "gffread", + "name": "gffread", + "outputs": [ + { + "name": "output_gtf", + "type": "gtf" + } + ], + "position": { + "left": 550, + "top": 380 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/gffread/gffread/2.2.1.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "3e436657dcd0", + "name": "gffread", + "owner": "devteam", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chr_replace\": null, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"decode_url\": false, \"expose\": false, \"filtering\": null, \"full_gff_attribute_preservation\": false, \"gffs\": {\"gff_fmt\": \"gtf\", \"__current_case__\": 2, \"tname\": \"\"}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"maxintron\": null, \"merging\": {\"merge_sel\": \"none\", \"__current_case__\": 0}, \"reference_genome\": {\"source\": \"none\", \"__current_case__\": 0}, \"region\": {\"region_filter\": \"none\", \"__current_case__\": 0}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.2.1.4+galaxy0", + "type": "tool", + "uuid": "b40d9096-4d41-47f7-a7ff-d23efa36c788", + "when": null, + "workflow_outputs": [ + { + "label": "genome_annotation_gtf", + "output_name": "output_gtf", + "uuid": "4357f14d-bf94-480b-ad72-a79ca909041d" + } + ] + }, + "4": { + "annotation": "Assembly step", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/2.2.3+galaxy0", + "errors": null, + "id": 4, + "input_connections": { + "guide|guide_source|ref_hist": { + "id": 1, + "output_name": "output" + }, + "input_options|input_bam": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool StringTie", + "name": "input_options" + } + ], + "label": "StringTie", + "name": "StringTie", + "outputs": [ + { + "name": "output_gtf", + "type": "gtf" + } + ], + "position": { + "left": 590, + "top": 0 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/2.2.3+galaxy0", + "tool_shed_repository": { + "changeset_revision": "cbf488da3b2c", + "name": "stringtie", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"adv\": {\"abundance_estimation\": false, \"omit_sequences\": \"\", \"name_prefix\": null, \"fraction\": \"0.01\", \"min_tlen\": \"200\", \"min_anchor_len\": \"10\", \"min_anchor_cov\": \"1\", \"min_bundle_cov\": \"1\", \"bdist\": \"50\", \"bundle_fraction\": \"1.0\", \"disable_trimming\": false, \"multi_mapping\": false, \"point_features\": null}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"guide\": {\"use_guide\": \"yes\", \"__current_case__\": 1, \"guide_source\": {\"guide_gff_select\": \"history\", \"__current_case__\": 1, \"ref_hist\": {\"__class__\": \"ConnectedValue\"}}, \"input_estimation\": false, \"special_outputs\": {\"special_outputs_select\": \"no\", \"__current_case__\": 2}, \"coverage_file\": false}, \"input_options\": {\"input_mode\": \"short_reads\", \"__current_case__\": 0, \"input_bam\": {\"__class__\": \"ConnectedValue\"}}, \"rna_strandness\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.2.3+galaxy0", + "type": "tool", + "uuid": "632f7928-d838-4bbf-b0f9-6a94ea9eb427", + "when": null, + "workflow_outputs": [ + { + "label": "stringtie_gtf", + "output_name": "output_gtf", + "uuid": "6edddada-3f7f-4a62-bbb7-8f412234342f" + } + ] + }, + "5": { + "annotation": "annotation of lncRNAs", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/feelnc/feelnc/0.2.1+galaxy0", + "errors": null, + "id": 5, + "input_connections": { + "candidate": { + "id": 4, + "output_name": "output_gtf" + }, + "genome": { + "id": 0, + "output_name": "output" + }, + "reference": { + "id": 3, + "output_name": "output_gtf" + } + }, + "inputs": [], + "label": "FEELnc", + "name": "FEELnc", + "outputs": [ + { + "name": "candidate_lncRNA", + "type": "gtf" + }, + { + "name": "candidate_mRNA", + "type": "gtf" + }, + { + "name": "classifier", + "type": "txt" + } + ], + "position": { + "left": 920, + "top": 160 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/feelnc/feelnc/0.2.1+galaxy0", + "tool_shed_repository": { + "changeset_revision": "55daa4712413", + "name": "feelnc", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"candidate\": {\"__class__\": \"ConnectedValue\"}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"genome\": {\"__class__\": \"ConnectedValue\"}, \"reference\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.2.1+galaxy0", + "type": "tool", + "uuid": "e7d02c0b-09a9-46e0-836a-048a3a5fdc33", + "when": null, + "workflow_outputs": [ + { + "label": "classification", + "output_name": "classifier", + "uuid": "a1e803b3-e991-4c7f-969c-7e73b0d19b62" + }, + { + "label": "lcnRNA_annotation", + "output_name": "candidate_lncRNA", + "uuid": "d146e06c-9a4f-4860-8dcd-d7524dee7ef5" + }, + { + "label": "mRNAs_annotation", + "output_name": "candidate_mRNA", + "uuid": "0313d195-5bc6-4fd0-99cc-9e4a21960ae8" + } + ] + }, + "6": { + "annotation": "final annotation", + "content_id": "cat1", + "errors": null, + "id": 6, + "input_connections": { + "input1": { + "id": 5, + "output_name": "candidate_lncRNA" + }, + "queries_0|input2": { + "id": 3, + "output_name": "output_gtf" + } + }, + "inputs": [], + "label": "Concatenate Dataset", + "name": "Concatenate datasets", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 1420, + "top": 370 + }, + "post_job_actions": {}, + "tool_id": "cat1", + "tool_state": "{\"__input_ext\": \"gtf\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"queries\": [{\"__index__\": 0, \"input2\": {\"__class__\": \"ConnectedValue\"}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "27355e91-6006-443f-af12-33b801ec371b", + "when": null, + "workflow_outputs": [ + { + "label": "lncRNA_genome_annotation", + "output_name": "out_file1", + "uuid": "39a4ad30-330d-4cf3-934c-6facba9a92d9" + } + ] + } + }, + "tags": [], + "uuid": "7699f72e-1083-442e-b4cc-90ab57e4783c", + "version": 1 +} \ No newline at end of file diff --git a/workflows/genome_annotation/lncRNAs_annotation/README.md b/workflows/genome_annotation/lncRNAs_annotation/README.md new file mode 100644 index 000000000..8a09cdc15 --- /dev/null +++ b/workflows/genome_annotation/lncRNAs_annotation/README.md @@ -0,0 +1,46 @@ +# lncRNAs annotation workflow + +This workflow uses the FEELnc tool to annotate long non-coding RNAs. Before annotating these long non-coding RNAs, StringTie will be used to assemble the RNA-seq alignments into potential trancriptions. The gffread tool provides a genome annotation file in GTF format. + +For future analyses, it would be interesting to use an updated annotation containing messenger RNA and long non-coding RNA. The concatenante tool merges the reference annotation with the long non-coding RNA annotation obtained with FEELnc. + +FEELnc is a 3-step pipeline: +- The first FEELnc “filter” step: consists of extracting and filtering out unwanted transcripts and transcripts overlapping the exons of the reference annotation. +- The second step, “codpot”: for coding potential, consists in calculating the coding potential of the transcripts. This step differentiates long non-coding RNAs from potential coding RNAs. +- The final step, “classifyier”, classifies the new long non-coding RNAs according to the location and direction of transcription of the proximal transcribed RNAs. + +## Input dataset for StringTie +StringTie requires two inputs: +- the RNA-seq alignment in bam format +- the genome annotation file in gff3 format + +## Outputs dataset for StringTie +StringTie generates an annotation file in GTF format. This file contains all the assembled transcripts present in the RNAseq data. + +## Input dataset for gffread +GFFRead requires an input file: the genome annotaton in gff3 format. + + +## Outputs dataset for gffread +An output file is generated in gtf format. + +## Input dataset for FEELnc +FEELnc requires 3 inputs: +- Transcript assembly in gtf format (corresponding to the StringTie output file) +- Reference annotation in gft format (corresponding to the gffread output file) +- Genome sequence in fasta format + +## Outputs dataset for FEELnc +FEELnc generates 3 output files: +- Long non-coding RNA annotation file in gtf format +- Annotation file for messenger RNAs in gtf format +- Classifier output file: table containing the classification of lncRNAs according to their genomic location in relation to other transcripts + + +## Input dataset for concatenate +Concatenate requires 2 inputs: +- genome annotation in gtf format +- annotation of long non-coding RNAs in gtf format + +## Outputs dataset for concatenate +An output file in GTF format is generated, containing the genome annotation and the annotation of long non-coding RNAs. \ No newline at end of file From b48793af8e4784fc79401d95f406f64b9aad2685 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 22 Oct 2024 10:54:30 +0200 Subject: [PATCH 2/9] add release and change license --- .../Galaxy-Workflow-lncRNAs_annotation_workflow.ga | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga index aa6cb9864..d388be68a 100644 --- a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga +++ b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga @@ -10,7 +10,8 @@ } ], "format-version": "0.1", - "license": "CC-BY-4.0", + "license": "MIT", + "release": "0.1", "name": "lncRNAs annotation workflow", "report": { "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" From 3ac89f06fb2487c55485a5ab67a7dd2f0dbaac50 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 22 Oct 2024 12:22:30 +0200 Subject: [PATCH 3/9] error test --- .../Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml index 94615826b..56a455986 100644 --- a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml +++ b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml @@ -21,7 +21,7 @@ delta: 300000 stringtie_gtf: - location: https://zenodo.org/records/13941438/files/gffread.gtf?download=1 + location: https://zenodo.org/records/13941438/files/StringTie.gtf?download=1 compare: sim_size delta: 300000 From 2174cfa2bc8b8cc96ee4c832499d21a135d983d7 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 22 Oct 2024 13:42:31 +0200 Subject: [PATCH 4/9] url error --- .../Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml index 56a455986..a323d5823 100644 --- a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml +++ b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml @@ -16,7 +16,7 @@ outputs: genome_annotation_gtf: - location: https://zenodo.org/api/records/13941438/draft/files/gffread.gtf/content + location: https://zenodo.org/records/13941438/files/gffread.gtf?download=1 compare: sim_size delta: 300000 From 5ccad2a6e00373cfed624112c5bb4f4dab2f02df Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 22 Oct 2024 15:00:25 +0200 Subject: [PATCH 5/9] url error --- .../Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml index a323d5823..8df077656 100644 --- a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml +++ b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml @@ -26,15 +26,15 @@ delta: 300000 lcnRNA_annotation: - location: https://zenodo.org/api/records/13941438/draft/files/lncRNA_annotation_FEELnc.gtf/content + location: https://zenodo.org/records/13941438/files/lncRNA_annotation_FEELnc.gtf?download=1 compare: sim_size delta: 300000 classification: - location: https://zenodo.org/api/records/13941438/draft/files/Classifier_FEELnc.txt/content + location: https://zenodo.org/records/13941438/files/Classifier_FEELnc.txt?download=1 compare: sim_size delta: 300000 lncRNA_genome_annotation: - location: https://zenodo.org/api/records/13941438/draft/files/Concatenate_datasets.gtf/content + location: https://zenodo.org/records/13941438/files/Concatenate_datasets.gtf?download=1 compare: sim_size delta: 300000 From cc83d92287cb769160beb3228664327febc6c04c Mon Sep 17 00:00:00 2001 From: rlibouban Date: Tue, 12 Nov 2024 14:09:12 +0100 Subject: [PATCH 6/9] addition of coloured borders and titles --- ...xy-Workflow-lncRNAs_annotation_workflow.ga | 100 +++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga index d388be68a..cb96004e8 100644 --- a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga +++ b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga @@ -1,7 +1,105 @@ { "a_galaxy_workflow": "true", "annotation": "This workflow uses the FEELnc tool to annotate long non-coding RNAs. Before annotating these long non-coding RNAs, StringTie will be used to assemble the RNA-seq alignments into potential trancriptions. The gffread tool provides a genome annotation file in GTF format.", - "comments": [], + "comments": [ + { + "child_steps": [ + 3 + ], + "color": "pink", + "data": { + "title": "Conversion from GFF3 to GTF format" + }, + "id": 2, + "position": [ + 443, + 398.4 + ], + "size": [ + 239, + 285 + ], + "type": "frame" + }, + { + "child_steps": [ + 1, + 2, + 0 + ], + "color": "yellow", + "data": { + "title": "Inputs" + }, + "id": 0, + "position": [ + 0, + 206.4 + ], + "size": [ + 243, + 326.2 + ], + "type": "frame" + }, + { + "child_steps": [ + 6 + ], + "color": "blue", + "data": { + "title": "Annotation with mRNA and lncRNA" + }, + "id": 4, + "position": [ + 1134.3, + 434.4 + ], + "size": [ + 268, + 262 + ], + "type": "frame" + }, + { + "child_steps": [ + 5 + ], + "color": "red", + "data": { + "title": "lncRNAs annotation with FEELnc" + }, + "id": 3, + "position": [ + 762.1, + 132.1 + ], + "size": [ + 240, + 322 + ], + "type": "frame" + }, + { + "child_steps": [ + 4 + ], + "color": "green", + "data": { + "title": "Transcripts assembly with StringTie" + }, + "id": 1, + "position": [ + 437.8, + 0.0 + ], + "size": [ + 236, + 256 + ], + "type": "frame" + } + ], "creator": [ { "class": "Person", From 0704b9bd5b7c298ff0522011935e50f34f3ccc7e Mon Sep 17 00:00:00 2001 From: rlibouban Date: Wed, 15 Jan 2025 11:09:16 +0100 Subject: [PATCH 7/9] update README --- .../lncRNAs_annotation/README.md | 63 +++++++------------ 1 file changed, 24 insertions(+), 39 deletions(-) diff --git a/workflows/genome_annotation/lncRNAs_annotation/README.md b/workflows/genome_annotation/lncRNAs_annotation/README.md index 8a09cdc15..68db9e43c 100644 --- a/workflows/genome_annotation/lncRNAs_annotation/README.md +++ b/workflows/genome_annotation/lncRNAs_annotation/README.md @@ -4,43 +4,28 @@ This workflow uses the FEELnc tool to annotate long non-coding RNAs. Before anno For future analyses, it would be interesting to use an updated annotation containing messenger RNA and long non-coding RNA. The concatenante tool merges the reference annotation with the long non-coding RNA annotation obtained with FEELnc. -FEELnc is a 3-step pipeline: -- The first FEELnc “filter” step: consists of extracting and filtering out unwanted transcripts and transcripts overlapping the exons of the reference annotation. -- The second step, “codpot”: for coding potential, consists in calculating the coding potential of the transcripts. This step differentiates long non-coding RNAs from potential coding RNAs. -- The final step, “classifyier”, classifies the new long non-coding RNAs according to the location and direction of transcription of the proximal transcribed RNAs. -## Input dataset for StringTie -StringTie requires two inputs: -- the RNA-seq alignment in bam format -- the genome annotation file in gff3 format - -## Outputs dataset for StringTie -StringTie generates an annotation file in GTF format. This file contains all the assembled transcripts present in the RNAseq data. - -## Input dataset for gffread -GFFRead requires an input file: the genome annotaton in gff3 format. - - -## Outputs dataset for gffread -An output file is generated in gtf format. - -## Input dataset for FEELnc -FEELnc requires 3 inputs: -- Transcript assembly in gtf format (corresponding to the StringTie output file) -- Reference annotation in gft format (corresponding to the gffread output file) -- Genome sequence in fasta format - -## Outputs dataset for FEELnc -FEELnc generates 3 output files: -- Long non-coding RNA annotation file in gtf format -- Annotation file for messenger RNAs in gtf format -- Classifier output file: table containing the classification of lncRNAs according to their genomic location in relation to other transcripts - - -## Input dataset for concatenate -Concatenate requires 2 inputs: -- genome annotation in gtf format -- annotation of long non-coding RNAs in gtf format - -## Outputs dataset for concatenate -An output file in GTF format is generated, containing the genome annotation and the annotation of long non-coding RNAs. \ No newline at end of file +## Workflows steps +- Transcript Assembly with StringTie: RNA-seq alignments are assembled into potential transcripts to provide a comprehensive view of expressed regions. +- Genome Annotation Conversion with GFFRead: Genome annotations are converted into a standardized format (GTF) to ensure compatibility with downstream tools. +- lncRNA Annotation with FEELnc: The FEELnc pipeline identifies and classifies long non-coding RNAs (lncRNAs) through three main steps: + - Filter: Removes unwanted transcripts and those overlapping reference exons. + - Codpot: Evaluates coding potential to differentiate lncRNAs from coding RNAs. + - Classifier: Assigns lncRNAs to categories based on their genomic location and transcriptional direction. +- Annotation Merging with Concatenate: The lncRNA annotation is merged with the reference annotation to create a unified genome annotation containing both mRNAs and lncRNAs. + +## Input data +The following input files are required for the workflow: +- RNA-seq alignments (BAM format): Required by StringTie for transcript assembly. +- Genome annotation (GFF3 format): Used by StringTie and GFFRead for processing. +- Genome sequence (FASTA format): Required by FEELnc for lncRNA identification. +- Reference annotation (GTF format): Provided by GFFRead for FEELnc analysis. + +## Output data +The workflow produces the following outputs: +- Transcript annotation (GTF format): Generated by StringTie, containing assembled transcripts from RNA-seq data. +- Converted genome annotation (GTF format): Produced by GFFRead, used as input for FEELnc. +- lncRNA annotation (GTF format): Generated by FEELnc, containing identified lncRNAs. +- mRNA annotation (GTF format): Produced by FEELnc for downstream use. +- lncRNA classification table: Produced by FEELnc, detailing genomic relationships of lncRNAs. +- Comprehensive genome annotation (GTF format): Generated by Concatenate, combining mRNA and lncRNA annotations. From 943ec1a42126b0681b5de7f75dda02c722e2a3a2 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Thu, 16 Jan 2025 09:50:41 +0100 Subject: [PATCH 8/9] adding label and correcting test --- .../lncRNAs_annotation/.dockstore.yml | 1 + ...flow-lncRNAs_annotation_workflow-tests.yml | 48 ++++-- ...xy-Workflow-lncRNAs_annotation_workflow.ga | 147 +++++++++--------- 3 files changed, 110 insertions(+), 86 deletions(-) diff --git a/workflows/genome_annotation/lncRNAs_annotation/.dockstore.yml b/workflows/genome_annotation/lncRNAs_annotation/.dockstore.yml index 0f1660c7c..e01aed030 100644 --- a/workflows/genome_annotation/lncRNAs_annotation/.dockstore.yml +++ b/workflows/genome_annotation/lncRNAs_annotation/.dockstore.yml @@ -9,3 +9,4 @@ workflows: authors: - name: Romane Libouban email: romane.libouban@irisa.fr + orcid: 0009-0001-4920-9951 diff --git a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml index 8df077656..614bd087c 100644 --- a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml +++ b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow-tests.yml @@ -16,25 +16,43 @@ outputs: genome_annotation_gtf: - location: https://zenodo.org/records/13941438/files/gffread.gtf?download=1 - compare: sim_size - delta: 300000 + asserts: + - has_n_lines: + n: 137448 + - has_text: + text: "funannotate" + text: "transcript" stringtie_gtf: - location: https://zenodo.org/records/13941438/files/StringTie.gtf?download=1 - compare: sim_size - delta: 300000 + asserts: + - has_n_lines: + n: 78389 + - has_text: + text: "StringTie version 2.2.3" + text: "transcript" + lcnRNA_annotation: - location: https://zenodo.org/records/13941438/files/lncRNA_annotation_FEELnc.gtf?download=1 - compare: sim_size - delta: 300000 + asserts: + - has_n_lines: + n: 268 + - has_text: + text: "StringTie" + text: "scaffold_49" classification: - location: https://zenodo.org/records/13941438/files/Classifier_FEELnc.txt?download=1 - compare: sim_size - delta: 300000 + asserts: + - has_n_lines: + n: 772 + - has_text: + text: "STRG.6410" + text: "antisense" + lncRNA_genome_annotation: - location: https://zenodo.org/records/13941438/files/Concatenate_datasets.gtf?download=1 - compare: sim_size - delta: 300000 + asserts: + - has_n_lines: + n: 137716 + - has_text: + text: "funannotate" + text: "StringTie" + \ No newline at end of file diff --git a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga index cb96004e8..eb9eddf4b 100644 --- a/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga +++ b/workflows/genome_annotation/lncRNAs_annotation/Galaxy-Workflow-lncRNAs_annotation_workflow.ga @@ -4,41 +4,20 @@ "comments": [ { "child_steps": [ - 3 - ], - "color": "pink", - "data": { - "title": "Conversion from GFF3 to GTF format" - }, - "id": 2, - "position": [ - 443, - 398.4 - ], - "size": [ - 239, - 285 - ], - "type": "frame" - }, - { - "child_steps": [ - 1, - 2, - 0 + 5 ], - "color": "yellow", + "color": "red", "data": { - "title": "Inputs" + "title": "lncRNAs annotation with FEELnc" }, - "id": 0, + "id": 3, "position": [ - 0, - 206.4 + 790, + 160 ], "size": [ - 243, - 326.2 + 260, + 360 ], "type": "frame" }, @@ -63,20 +42,22 @@ }, { "child_steps": [ - 5 + 1, + 2, + 0 ], - "color": "red", + "color": "yellow", "data": { - "title": "lncRNAs annotation with FEELnc" + "title": "Inputs" }, - "id": 3, + "id": 0, "position": [ - 762.1, - 132.1 + 0, + 206.4 ], "size": [ - 240, - 322 + 243, + 326.2 ], "type": "frame" }, @@ -91,11 +72,30 @@ "id": 1, "position": [ 437.8, - 0.0 + 0 ], "size": [ - 236, - 256 + 240, + 270 + ], + "type": "frame" + }, + { + "child_steps": [ + 3 + ], + "color": "pink", + "data": { + "title": "Conversion from GFF3 to GTF format" + }, + "id": 2, + "position": [ + 443, + 398.4 + ], + "size": [ + 239, + 285 ], "type": "frame" } @@ -104,6 +104,7 @@ { "class": "Person", "email": "mailto:romane.libouban@irisa.fr", + "identifier": "https://orcid.org/0009-0001-4920-9951", "name": "Romane Libouban" } ], @@ -116,7 +117,7 @@ }, "steps": { "0": { - "annotation": "Genome assembly", + "annotation": "", "content_id": null, "errors": null, "id": 0, @@ -131,8 +132,8 @@ "name": "Input dataset", "outputs": [], "position": { - "left": 140, - "top": 130 + "left": 20, + "top": 240 }, "tool_id": null, "tool_state": "{\"optional\": false, \"format\": [\"fasta\"], \"tag\": null}", @@ -143,7 +144,7 @@ "workflow_outputs": [] }, "1": { - "annotation": "Genome annotation", + "annotation": "", "content_id": null, "errors": null, "id": 1, @@ -158,8 +159,8 @@ "name": "Input dataset", "outputs": [], "position": { - "left": 0, - "top": 290 + "left": 20, + "top": 350 }, "tool_id": null, "tool_state": "{\"optional\": false, \"format\": [\"gff3\"], \"tag\": null}", @@ -170,7 +171,7 @@ "workflow_outputs": [] }, "2": { - "annotation": "RNA-Seq", + "annotation": "", "content_id": null, "errors": null, "id": 2, @@ -185,7 +186,7 @@ "name": "Input dataset", "outputs": [], "position": { - "left": 40, + "left": 20, "top": 460 }, "tool_id": null, @@ -197,7 +198,7 @@ "workflow_outputs": [] }, "3": { - "annotation": "protein sequences extracted", + "annotation": "Protein sequences extracted from genomic annotation", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/gffread/gffread/2.2.1.4+galaxy0", "errors": null, "id": 3, @@ -217,8 +218,8 @@ } ], "position": { - "left": 550, - "top": 380 + "left": 460, + "top": 460 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/gffread/gffread/2.2.1.4+galaxy0", @@ -242,7 +243,7 @@ ] }, "4": { - "annotation": "Assembly step", + "annotation": "StringTie is a fast, highly efficient assembler of RNA-Seq alignments into potential transcripts. ", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/2.2.3+galaxy0", "errors": null, "id": 4, @@ -257,12 +258,16 @@ } }, "inputs": [ + { + "description": "runtime parameter for tool StringTie", + "name": "adv" + }, { "description": "runtime parameter for tool StringTie", "name": "input_options" } ], - "label": "StringTie", + "label": "Assembly step with StringTie", "name": "StringTie", "outputs": [ { @@ -271,8 +276,8 @@ } ], "position": { - "left": 590, - "top": 0 + "left": 460, + "top": 60 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/stringtie/stringtie/2.2.3+galaxy0", @@ -282,7 +287,7 @@ "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu" }, - "tool_state": "{\"__input_ext\": \"input\", \"adv\": {\"abundance_estimation\": false, \"omit_sequences\": \"\", \"name_prefix\": null, \"fraction\": \"0.01\", \"min_tlen\": \"200\", \"min_anchor_len\": \"10\", \"min_anchor_cov\": \"1\", \"min_bundle_cov\": \"1\", \"bdist\": \"50\", \"bundle_fraction\": \"1.0\", \"disable_trimming\": false, \"multi_mapping\": false, \"point_features\": null}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"guide\": {\"use_guide\": \"yes\", \"__current_case__\": 1, \"guide_source\": {\"guide_gff_select\": \"history\", \"__current_case__\": 1, \"ref_hist\": {\"__class__\": \"ConnectedValue\"}}, \"input_estimation\": false, \"special_outputs\": {\"special_outputs_select\": \"no\", \"__current_case__\": 2}, \"coverage_file\": false}, \"input_options\": {\"input_mode\": \"short_reads\", \"__current_case__\": 0, \"input_bam\": {\"__class__\": \"ConnectedValue\"}}, \"rna_strandness\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"adv\": {\"abundance_estimation\": false, \"omit_sequences\": \"\", \"name_prefix\": null, \"fraction\": \"0.01\", \"min_tlen\": \"200\", \"min_anchor_len\": \"10\", \"min_anchor_cov\": \"1\", \"min_bundle_cov\": \"1\", \"bdist\": \"50\", \"bundle_fraction\": \"1.0\", \"disable_trimming\": false, \"multi_mapping\": false, \"point_features\": {\"__class__\": \"RuntimeValue\"}}, \"guide\": {\"use_guide\": \"yes\", \"__current_case__\": 1, \"guide_source\": {\"guide_gff_select\": \"history\", \"__current_case__\": 1, \"ref_hist\": {\"__class__\": \"ConnectedValue\"}}, \"input_estimation\": false, \"special_outputs\": {\"special_outputs_select\": \"no\", \"__current_case__\": 2}, \"coverage_file\": false}, \"input_options\": {\"input_mode\": \"short_reads\", \"__current_case__\": 0, \"input_bam\": {\"__class__\": \"ConnectedValue\"}}, \"rna_strandness\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "2.2.3+galaxy0", "type": "tool", "uuid": "632f7928-d838-4bbf-b0f9-6a94ea9eb427", @@ -296,7 +301,7 @@ ] }, "5": { - "annotation": "annotation of lncRNAs", + "annotation": "The FEELnc pipeline identifies and classifies long non-coding RNAs (lncRNAs) through three main steps: filter, codpot and classifier.", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/feelnc/feelnc/0.2.1+galaxy0", "errors": null, "id": 5, @@ -315,7 +320,7 @@ } }, "inputs": [], - "label": "FEELnc", + "label": "lncRNA annotation step with FEELnc", "name": "FEELnc", "outputs": [ { @@ -332,8 +337,8 @@ } ], "position": { - "left": 920, - "top": 160 + "left": 830, + "top": 220 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/feelnc/feelnc/0.2.1+galaxy0", @@ -349,6 +354,11 @@ "uuid": "e7d02c0b-09a9-46e0-836a-048a3a5fdc33", "when": null, "workflow_outputs": [ + { + "label": "mRNAs_annotation", + "output_name": "candidate_mRNA", + "uuid": "0313d195-5bc6-4fd0-99cc-9e4a21960ae8" + }, { "label": "classification", "output_name": "classifier", @@ -358,16 +368,11 @@ "label": "lcnRNA_annotation", "output_name": "candidate_lncRNA", "uuid": "d146e06c-9a4f-4860-8dcd-d7524dee7ef5" - }, - { - "label": "mRNAs_annotation", - "output_name": "candidate_mRNA", - "uuid": "0313d195-5bc6-4fd0-99cc-9e4a21960ae8" } ] }, "6": { - "annotation": "final annotation", + "annotation": " The lncRNA annotation is merged with the reference annotation to create a unified genome annotation containing both mRNAs and lncRNAs.", "content_id": "cat1", "errors": null, "id": 6, @@ -382,7 +387,7 @@ } }, "inputs": [], - "label": "Concatenate Dataset", + "label": "Concatenation step", "name": "Concatenate datasets", "outputs": [ { @@ -391,8 +396,8 @@ } ], "position": { - "left": 1420, - "top": 370 + "left": 1160, + "top": 500 }, "post_job_actions": {}, "tool_id": "cat1", @@ -411,6 +416,6 @@ } }, "tags": [], - "uuid": "7699f72e-1083-442e-b4cc-90ab57e4783c", - "version": 1 + "uuid": "ac3e37c2-0b14-421f-8d09-4a8a7bb500f5", + "version": 6 } \ No newline at end of file From 5547f3e47d79658ce37d2cf49f1b6569897c0540 Mon Sep 17 00:00:00 2001 From: rlibouban Date: Thu, 16 Jan 2025 17:54:06 +0100 Subject: [PATCH 9/9] rename RNA-Seq by Alignments from RNA-seq --- .../Genome_annotation_with_Funannotate.ga | 1044 +++++++++++++++++ ...nome_annotation_with_Funannotate_tests.yml | 235 ++++ 2 files changed, 1279 insertions(+) create mode 100644 workflows/genome_annotation/annotation-funannotate/Genome_annotation_with_Funannotate.ga create mode 100644 workflows/genome_annotation/annotation-funannotate/Genome_annotation_with_Funannotate_tests.yml diff --git a/workflows/genome_annotation/annotation-funannotate/Genome_annotation_with_Funannotate.ga b/workflows/genome_annotation/annotation-funannotate/Genome_annotation_with_Funannotate.ga new file mode 100644 index 000000000..f3db6357b --- /dev/null +++ b/workflows/genome_annotation/annotation-funannotate/Genome_annotation_with_Funannotate.ga @@ -0,0 +1,1044 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "This workflow allows to annotate a genome with Funannotate and to evaluate the quality of the annotation using BUSCO.", + "comments": [ + { + "child_steps": [ + 13 + ], + "color": "turquoise", + "data": { + "title": "Evaluation" + }, + "id": 7, + "position": [ + 2210, + 1330 + ], + "size": [ + 240, + 214 + ], + "type": "frame" + }, + { + "child_steps": [ + 11 + ], + "color": "pink", + "data": { + "title": "Visualisation" + }, + "id": 5, + "position": [ + 1700, + 520 + ], + "size": [ + 240, + 295.6 + ], + "type": "frame" + }, + { + "child_steps": [ + 12 + ], + "color": "black", + "data": { + "title": "Comparing annotations" + }, + "id": 6, + "position": [ + 2070, + 880 + ], + "size": [ + 240, + 224.4 + ], + "type": "frame" + }, + { + "child_steps": [ + 0, + 1, + 2, + 3, + 5, + 4 + ], + "color": "blue", + "data": { + "title": "Inputs" + }, + "id": 0, + "position": [ + 0, + 620 + ], + "size": [ + 240, + 680 + ], + "type": "frame" + }, + { + "child_steps": [ + 6 + ], + "color": "lime", + "data": { + "title": "Preparing RNASeq data" + }, + "id": 1, + "position": [ + 350, + 560 + ], + "size": [ + 240, + 376.8 + ], + "type": "frame" + }, + { + "child_steps": [ + 7 + ], + "color": "yellow", + "data": { + "title": "Structural annotation" + }, + "id": 3, + "position": [ + 610, + 1000 + ], + "size": [ + 240, + 1485.6 + ], + "type": "frame" + }, + { + "child_steps": [ + 8, + 9 + ], + "color": "orange", + "data": { + "title": "Functional annotation" + }, + "id": 2, + "position": [ + 970, + 510 + ], + "size": [ + 260, + 534.8 + ], + "type": "frame" + }, + { + "child_steps": [ + 10 + ], + "color": "red", + "data": { + "title": "Integrating structural and functional annotation" + }, + "id": 4, + "position": [ + 1350, + 0 + ], + "size": [ + 250, + 2060 + ], + "type": "frame" + } + ], + "creator": [ + { + "class": "Organization", + "identifier": "0009-0001-4920-9951", + "name": "Romane Libouban" + } + ], + "format-version": "0.1", + "license": "MIT", + "release": "0.1", + "name": "Genome annotation with Funannotate", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Genome sequence", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Genome sequence", + "name": "Genome sequence" + } + ], + "label": "Genome sequence", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 20, + "top": 660 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "beb25416-4173-495f-896b-42b659a12ac3", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "RNASeq R1", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "RNASeq R1", + "name": "RNASeq R1" + } + ], + "label": "RNASeq R1", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 20, + "top": 780 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "10b077d4-8250-4185-9053-c3c6f16e136a", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "RNASeq R2", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "RNASeq R2", + "name": "RNASeq R2" + } + ], + "label": "RNASeq R2", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 20, + "top": 890 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "ca94cbfb-c29e-42e5-8b3f-518994937918", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "SwissProt", + "content_id": null, + "errors": null, + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "SwissProt", + "name": "SwissProt" + } + ], + "label": "SwissProt", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 20, + "top": 990 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "e5d8a046-b409-41aa-a73f-f4c54f49a911", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "alternate annotation (gbk)", + "content_id": null, + "errors": null, + "id": 4, + "input_connections": {}, + "inputs": [ + { + "description": "alternate annotation (gbk)", + "name": "alternate annotation (gbk)" + } + ], + "label": "alternate annotation (gbk)", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 20, + "top": 1100 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "5e1369c9-0af5-4173-8691-f69dc7229e44", + "when": null, + "workflow_outputs": [] + }, + "5": { + "annotation": "alternate annotation (gff3)", + "content_id": null, + "errors": null, + "id": 5, + "input_connections": {}, + "inputs": [ + { + "description": "alternate annotation (gff3)", + "name": "alternate annotation (gff3)" + } + ], + "label": "alternate annotation (gff3)", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 20, + "top": 1200 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "8da9582d-f70b-48af-b44e-0014ae257301", + "when": null, + "workflow_outputs": [] + }, + "6": { + "annotation": "Preparing RNASeq data", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar/rna_star/2.7.11a+galaxy1", + "errors": null, + "id": 6, + "input_connections": { + "refGenomeSource|genomeFastaFiles": { + "id": 0, + "output_name": "output" + }, + "singlePaired|input1": { + "id": 1, + "output_name": "output" + }, + "singlePaired|input2": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool RNA STAR", + "name": "refGenomeSource" + }, + { + "description": "runtime parameter for tool RNA STAR", + "name": "singlePaired" + }, + { + "description": "runtime parameter for tool RNA STAR", + "name": "singlePaired" + } + ], + "label": "RNA STAR", + "name": "RNA STAR", + "outputs": [ + { + "name": "output_log", + "type": "txt" + }, + { + "name": "splice_junctions", + "type": "interval" + }, + { + "name": "mapped_reads", + "type": "bam" + } + ], + "position": { + "left": 370, + "top": 600 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/rgrnastar/rna_star/2.7.11a+galaxy1", + "tool_shed_repository": { + "changeset_revision": "53255f6eecfc", + "name": "rgrnastar", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"algo\": {\"params\": {\"settingsType\": \"default\", \"__current_case__\": 0}}, \"chimOutType\": \"\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"filter\": {\"basic_filters\": null, \"output_params2\": {\"output_select2\": \"no\", \"__current_case__\": 1}}, \"oformat\": {\"outSAMattributes\": [\"NH\", \"HI\", \"AS\", \"nM\", \"ch\"], \"HI_offset\": \"1\", \"outSAMprimaryFlag\": \"OneBestScore\", \"outSAMmapqUnique\": \"60\", \"wasp_conditional\": {\"waspOutputMode\": \"\", \"__current_case__\": 1}}, \"outWig\": {\"outWigType\": \"None\", \"__current_case__\": 0, \"outWigStrand\": \"false\"}, \"perf\": {\"outBAMsortingBinsN\": \"50\", \"winAnchorMultimapNmax\": \"50\"}, \"refGenomeSource\": {\"geneSource\": \"history\", \"__current_case__\": 1, \"genomeFastaFiles\": {\"__class__\": \"ConnectedValue\"}, \"genomeSAindexNbases\": \"11\", \"GTFconditional\": {\"GTFselect\": \"without-gtf\", \"__current_case__\": 1, \"quantmode_output\": {\"quantMode\": \"-\", \"__current_case__\": 0}}, \"diploidconditional\": {\"diploid\": \"No\", \"__current_case__\": 1}}, \"singlePaired\": {\"sPaired\": \"paired\", \"__current_case__\": 1, \"input1\": {\"__class__\": \"ConnectedValue\"}, \"input2\": {\"__class__\": \"ConnectedValue\"}}, \"twopass\": {\"twopassMode\": \"None\", \"__current_case__\": 0, \"twopass_read_subset\": \"\", \"sj_precalculated\": \"\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.7.11a+galaxy1", + "type": "tool", + "uuid": "719af766-076b-449c-8fd4-cc9532b18385", + "when": null, + "workflow_outputs": [ + { + "label": "rna star log", + "output_name": "output_log", + "uuid": "a2493c26-5037-4dab-bfc8-75874c9749fa" + }, + { + "label": "rna star slice junctions", + "output_name": "splice_junctions", + "uuid": "a4eb3388-5dac-417b-aa96-01b653e096ae" + }, + { + "label": "rna star mapped reads", + "output_name": "mapped_reads", + "uuid": "e342c94b-f27a-419a-a223-a22881ad8fff" + } + ] + }, + "7": { + "annotation": "Structural annotation", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/funannotate_predict/funannotate_predict/1.8.15+galaxy5", + "errors": null, + "id": 7, + "input_connections": { + "evidences|prot_evidence|protein_evidence": { + "id": 3, + "output_name": "output" + }, + "evidences|rna_bam": { + "id": 6, + "output_name": "mapped_reads" + }, + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Funannotate predict annotation", + "name": "evidences" + } + ], + "label": "Funannotate predict annotation", + "name": "Funannotate predict annotation", + "outputs": [ + { + "name": "annot_gbk", + "type": "genbank" + }, + { + "name": "annot_tbl", + "type": "txt" + }, + { + "name": "annot_gff3", + "type": "gff3" + }, + { + "name": "fasta_proteins", + "type": "fasta" + }, + { + "name": "fasta_transcripts_mrna", + "type": "fasta" + }, + { + "name": "fasta_transcripts_cds", + "type": "fasta" + }, + { + "name": "tbl2asn_report", + "type": "txt" + }, + { + "name": "tbl2asn_error", + "type": "txt" + }, + { + "name": "tbl2asn_validation", + "type": "txt" + }, + { + "name": "stats", + "type": "json" + } + ], + "position": { + "left": 630, + "top": 1040 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/funannotate_predict/funannotate_predict/1.8.15+galaxy5", + "tool_shed_repository": { + "changeset_revision": "4b96ced19f75", + "name": "funannotate_predict", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"augustus\": {\"augustus_species\": \"none\", \"min_training_models\": \"200\", \"optimize_augustus\": false}, \"busco\": {\"busco_db\": \"mucorales_odb10\", \"busco_seed_species\": \"rhizopus_oryzae\"}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"database\": \"2023-09-28-003158\", \"evidences\": {\"rna_bam\": {\"__class__\": \"ConnectedValue\"}, \"transcript_evidence\": null, \"prot_evidence\": {\"prot_evidence_source\": \"custom\", \"__current_case__\": 1, \"protein_evidence\": {\"__class__\": \"ConnectedValue\"}}, \"p2g_pident\": \"80\", \"p2g_prefilter\": \"diamond\"}, \"evm\": {\"repeats2evm\": false, \"evm_partitioning\": {\"evm_partition\": \"yes\", \"__current_case__\": 0, \"evm_partition_interval\": \"1500\"}, \"weights\": null}, \"filtering\": {\"min_intronlen\": \"10\", \"max_intronlen\": \"3000\", \"min_protlen\": \"50\", \"keep_no_stops\": false, \"repeat_filter\": \"overlap blast\"}, \"force\": true, \"genemark\": {\"genemark_license\": null, \"genemark_mode\": \"ES\", \"genemark_mod\": null, \"soft_mask\": \"2000\"}, \"header_length\": \"16\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"organism\": {\"species\": \"Mucor mucedo\", \"isolate\": \"\", \"strain\": \"muc1\", \"organism\": false, \"ploidy\": \"1\", \"SeqCenter\": \"CFMR\", \"SeqAccession\": \"12345\", \"name\": \"FUN_\", \"numbering\": \"1\"}, \"other_predictors\": {\"stringtie\": null, \"maker_gff\": null, \"pasa_gff\": null, \"pasa_gff_weight\": \"1\", \"other_gff\": null, \"other_gff_weight\": \"1\"}, \"outputs\": [\"gbk\", \"gff3\", \"proteins_fa\", \"mrna_transcripts_fa\", \"cds_transcripts_fa\", \"tbl2asn_report\", \"tbl2asn_error\", \"tbl2asn_validation\", \"stats\", \"tbl\"], \"parameters\": null, \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.8.15+galaxy5", + "type": "tool", + "uuid": "476ff4eb-6414-4cef-bb8c-d4472c3fa9e4", + "when": null, + "workflow_outputs": [ + { + "label": "Funannotate predict annotation fasta transcripts mrna", + "output_name": "fasta_transcripts_mrna", + "uuid": "7c72713c-db2f-45ce-bd7f-5345a2152529" + }, + { + "label": "Funannotate predict annotation fasta transcripts cds", + "output_name": "fasta_transcripts_cds", + "uuid": "9ab0ab15-8970-4ae7-99c2-cf7b69285e1b" + }, + { + "label": "Funannotate predict annotation tbl2asn report", + "output_name": "tbl2asn_report", + "uuid": "8ef1a827-bc8d-43d1-bd0b-b8f64045c08c" + }, + { + "label": "Funannotate predict annotation tbl2asn error", + "output_name": "tbl2asn_error", + "uuid": "22cfe8f7-f5a4-4092-9907-b717925936c6" + }, + { + "label": "Funannotate predict annotation tbl2asn validation", + "output_name": "tbl2asn_validation", + "uuid": "55b2a4e4-59f4-40f5-89a4-3c243a03e5aa" + }, + { + "label": "Funannotate predict annotation stats", + "output_name": "stats", + "uuid": "0309d4ba-5fc5-4271-b876-3386a8d4a861" + }, + { + "label": "Funannotate predict annotation annot tbl", + "output_name": "annot_tbl", + "uuid": "f5463ebe-4fa9-4fed-a890-bf56347fc7a3" + }, + { + "label": "Funannotate predict annotation annot gbk", + "output_name": "annot_gbk", + "uuid": "26dc459d-3fb2-4273-abff-9495636b3ca5" + }, + { + "label": "Funannotate predict annotation annot gff3", + "output_name": "annot_gff3", + "uuid": "8e94cc93-0054-4178-a87a-e155ce86c87f" + }, + { + "label": "Funannotate predict annotation fasta proteins", + "output_name": "fasta_proteins", + "uuid": "ea3cd362-9bf2-4d3d-b08f-4d33bdbc6e7e" + } + ] + }, + "8": { + "annotation": "Functional annotation with eggNOG Mapper ", + "content_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/eggnog_mapper/eggnog_mapper/2.1.8+galaxy4", + "errors": null, + "id": 8, + "input_connections": { + "ortho_method|input": { + "id": 7, + "output_name": "fasta_proteins" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool eggNOG Mapper", + "name": "ortho_method" + } + ], + "label": " eggNOG Mapper ", + "name": "eggNOG Mapper", + "outputs": [ + { + "name": "seed_orthologs", + "type": "tabular" + }, + { + "name": "annotations", + "type": "tabular" + } + ], + "position": { + "left": 990, + "top": 550 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/galaxyp/eggnog_mapper/eggnog_mapper/2.1.8+galaxy4", + "tool_shed_repository": { + "changeset_revision": "d9c3016f7283", + "name": "eggnog_mapper", + "owner": "galaxyp", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"annotation_options\": {\"no_annot\": \"\", \"__current_case__\": 0, \"seed_ortholog_evalue\": \"0.001\", \"seed_ortholog_score\": null, \"tax_scope\": null, \"target_orthologs\": \"all\", \"go_evidence\": \"non-electronic\"}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"eggnog_data\": \"5.0.2\", \"ortho_method\": {\"m\": \"diamond\", \"__current_case__\": 0, \"input\": {\"__class__\": \"ConnectedValue\"}, \"input_trans\": {\"itype\": \"proteins\", \"__current_case__\": 0}, \"matrix_gapcosts\": {\"matrix\": \"BLOSUM62\", \"__current_case__\": 2, \"gap_costs\": \"--gapopen 11 --gapextend 1\"}, \"sensmode\": \"sensitive\", \"dmnd_iterate\": false, \"dmnd_ignore_warnings\": false, \"query_cover\": null, \"subject_cover\": null, \"pident\": null, \"evalue\": null, \"score\": \"0.001\"}, \"output_options\": {\"no_file_comments\": false, \"report_orthologs\": false, \"md5\": false}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1.8+galaxy4", + "type": "tool", + "uuid": "dcccd97a-52ca-4c32-9ba9-0f234578b490", + "when": null, + "workflow_outputs": [ + { + "label": " eggNOG Mapper seed orthologs", + "output_name": "seed_orthologs", + "uuid": "3a6e9212-b746-4991-9c9a-e2fe6575d35a" + }, + { + "label": " eggNOG Mapper annotations", + "output_name": "annotations", + "uuid": "8d5fb69c-93e0-4b62-8c39-ea1c4915ddf9" + } + ] + }, + "9": { + "annotation": "Functional annotation with InterProScan", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/interproscan/interproscan/5.59-91.0+galaxy3", + "errors": null, + "id": 9, + "input_connections": { + "input": { + "id": 7, + "output_name": "fasta_proteins" + } + }, + "inputs": [], + "label": " InterProScan", + "name": "InterProScan", + "outputs": [ + { + "name": "outfile_tsv", + "type": "tabular" + }, + { + "name": "outfile_xml", + "type": "xml" + } + ], + "position": { + "left": 1010, + "top": 840 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/interproscan/interproscan/5.59-91.0+galaxy3", + "tool_shed_repository": { + "changeset_revision": "74810db257cc", + "name": "interproscan", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"applications\": [\"TIGRFAM\", \"FunFam\", \"SFLD\", \"SUPERFAMILY\", \"PANTHER\", \"Gene3D\", \"Hamap\", \"PrositeProfiles\", \"Coils\", \"SMART\", \"CDD\", \"PRINTS\", \"PIRSR\", \"PrositePatterns\", \"AntiFam\", \"Pfam\", \"MobiDBLite\", \"PIRSF\"], \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"database\": \"5.59-91.0\", \"goterms\": true, \"input\": {\"__class__\": \"ConnectedValue\"}, \"iprlookup\": false, \"licensed\": {\"use\": \"false\", \"__current_case__\": 0}, \"oformat\": [\"TSV\", \"XML\"], \"pathways\": true, \"seqtype\": \"p\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.59-91.0+galaxy3", + "type": "tool", + "uuid": "1bfd7e13-afa4-4947-ab83-8e0ae49a3080", + "when": null, + "workflow_outputs": [ + { + "label": " InterProScan xml", + "output_name": "outfile_xml", + "uuid": "735b5593-3a35-4419-97d0-c4847be6875b" + }, + { + "label": " InterProScan tsv", + "output_name": "outfile_tsv", + "uuid": "ad1c72a9-893b-49f7-80cd-c923489aa263" + } + ] + }, + "10": { + "annotation": "Integrating structural and functional annotation\n", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/funannotate_annotate/funannotate_annotate/1.8.15+galaxy5", + "errors": null, + "id": 10, + "input_connections": { + "eggnog": { + "id": 8, + "output_name": "annotations" + }, + "input|genbank": { + "id": 7, + "output_name": "annot_gbk" + }, + "iprscan": { + "id": 9, + "output_name": "outfile_xml" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Funannotate functional", + "name": "input" + } + ], + "label": " Funannotate functional annotation", + "name": "Funannotate functional", + "outputs": [ + { + "name": "sqn", + "type": "input" + }, + { + "name": "gbk", + "type": "genbank" + }, + { + "name": "annot", + "type": "tabular" + }, + { + "name": "contigs_fsa", + "type": "fasta" + }, + { + "name": "agp", + "type": "tabular" + }, + { + "name": "tbl", + "type": "txt" + }, + { + "name": "fa_scaffolds", + "type": "fasta" + }, + { + "name": "fa_proteins", + "type": "fasta" + }, + { + "name": "fa_transcripts_mrna", + "type": "fasta" + }, + { + "name": "fa_transcripts_cds", + "type": "fasta" + }, + { + "name": "gff3", + "type": "gff3" + }, + { + "name": "tbl2asn_report", + "type": "txt" + }, + { + "name": "stats", + "type": "json" + }, + { + "name": "must_fix", + "type": "json" + }, + { + "name": "need_curating", + "type": "json" + }, + { + "name": "new_names_passed", + "type": "json" + } + ], + "position": { + "left": 1370, + "top": 100 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/funannotate_annotate/funannotate_annotate/1.8.15+galaxy5", + "tool_shed_repository": { + "changeset_revision": "9c15ca7e764e", + "name": "funannotate_annotate", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"annotations\": null, \"antismash\": null, \"busco_db\": \"mucorales_odb10\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"database\": \"2023-09-28-003158\", \"eggnog\": {\"__class__\": \"ConnectedValue\"}, \"fix\": null, \"header_length\": \"16\", \"input\": {\"input_type\": \"gbk\", \"__current_case__\": 0, \"genbank\": {\"__class__\": \"ConnectedValue\"}}, \"iprscan\": {\"__class__\": \"ConnectedValue\"}, \"isolate\": null, \"outputs\": [\"gbk\", \"annotations\", \"contigs_fsa\", \"agp\", \"tbl\", \"sqn\", \"scaffolds_fa\", \"proteins_fa\", \"mrna_transcripts_fa\", \"cds_transcripts_fa\", \"gff3\", \"discrepency\", \"stats\", \"must_fix\", \"need_curating\", \"new_names_passed\"], \"phobius\": null, \"remove\": null, \"rename\": \"MMUCEDO_\", \"sbt\": null, \"strain\": \"muc1\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.8.15+galaxy5", + "type": "tool", + "uuid": "65c768b5-1df1-4dc2-beeb-89202f580176", + "when": null, + "workflow_outputs": [ + { + "label": " Funannotate functional annotation must fix", + "output_name": "must_fix", + "uuid": "32a18a41-50b2-46cf-9c63-169ddc9091ac" + }, + { + "label": " Funannotate functional annotation need curating", + "output_name": "need_curating", + "uuid": "591703d6-f8df-4db7-b1e4-daa97eecb8c8" + }, + { + "label": " Funannotate functional annotation new names passed", + "output_name": "new_names_passed", + "uuid": "147ba19e-8c75-422b-a151-e2284f8f2b49" + }, + { + "label": " Funannotate functional annotation sqn", + "output_name": "sqn", + "uuid": "aa55f332-a8f4-4822-b168-dd6094d02eea" + }, + { + "label": " Funannotate functional annotation gbk", + "output_name": "gbk", + "uuid": "005b523a-508e-4a93-bd0b-d404315d2b04" + }, + { + "label": " Funannotate functional annotation annot", + "output_name": "annot", + "uuid": "4fd64b9f-0045-47bc-a19f-0b9415106a44" + }, + { + "label": " Funannotate functional annotation contigs fsa", + "output_name": "contigs_fsa", + "uuid": "c747ac9d-4262-459e-a52e-073642790283" + }, + { + "label": " Funannotate functional annotation agp", + "output_name": "agp", + "uuid": "f307bfef-07e4-4afb-b646-f520b95c00c0" + }, + { + "label": " Funannotate functional annotation tbl", + "output_name": "tbl", + "uuid": "5d27a120-0bd7-4952-95e8-87d90ec8e0b4" + }, + { + "label": " Funannotate functional annotation fa scaffolds", + "output_name": "fa_scaffolds", + "uuid": "041887f9-d998-49c5-b54a-7640e0573dd2" + }, + { + "label": " Funannotate functional annotation fa proteins", + "output_name": "fa_proteins", + "uuid": "b0960890-d00d-433b-afb7-799230f3152e" + }, + { + "label": " Funannotate functional annotation fa transcripts mrna", + "output_name": "fa_transcripts_mrna", + "uuid": "a33e6d36-7206-4332-980f-9d2c7a3d4757" + }, + { + "label": " Funannotate functional annotation fa transcripts cds", + "output_name": "fa_transcripts_cds", + "uuid": "a53840d7-8d76-4668-ab83-241816fe6fe8" + }, + { + "label": " Funannotate functional annotation gff3", + "output_name": "gff3", + "uuid": "e81cfe02-9a88-4e34-87a3-85c46cbf0e83" + }, + { + "label": " Funannotate functional annotation tbl2asn report", + "output_name": "tbl2asn_report", + "uuid": "91c0c710-4d3f-4ff5-8c1e-b3b862725423" + }, + { + "label": " Funannotate functional annotation stats", + "output_name": "stats", + "uuid": "c0810f5d-4e68-42d4-9e3d-ba7b7275081d" + } + ] + }, + "11": { + "annotation": "Visualisation with a genome browser", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy1", + "errors": null, + "id": 11, + "input_connections": { + "reference_genome|genome": { + "id": 0, + "output_name": "output" + }, + "track_groups_0|data_tracks_0|data_format|annotation": { + "id": 10, + "output_name": "gff3" + }, + "track_groups_1|data_tracks_0|data_format|annotation": { + "id": 6, + "output_name": "mapped_reads" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool JBrowse", + "name": "reference_genome" + } + ], + "label": " JBrowse", + "name": "JBrowse", + "outputs": [ + { + "name": "output", + "type": "html" + } + ], + "position": { + "left": 1720, + "top": 560 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/jbrowse/jbrowse/1.16.11+galaxy1", + "tool_shed_repository": { + "changeset_revision": "a6e57ff585c0", + "name": "jbrowse", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"fasta\", \"action\": {\"action_select\": \"create\", \"__current_case__\": 0}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"gencode\": \"1\", \"jbgen\": {\"defaultLocation\": \"\", \"trackPadding\": \"20\", \"shareLink\": true, \"aboutDescription\": \"\", \"show_tracklist\": true, \"show_nav\": true, \"show_overview\": true, \"show_menu\": true, \"hideGenomeOptions\": false}, \"plugins\": {\"BlastView\": true, \"ComboTrackSelector\": false, \"GCContent\": false}, \"reference_genome\": {\"genome_type_select\": \"history\", \"__current_case__\": 1, \"genome\": {\"__class__\": \"ConnectedValue\"}}, \"standalone\": \"minimal\", \"track_groups\": [{\"__index__\": 0, \"category\": \"Annotation\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"gene_calls\", \"__current_case__\": 2, \"annotation\": {\"__class__\": \"ConnectedValue\"}, \"match_part\": {\"match_part_select\": false, \"__current_case__\": 1}, \"index\": false, \"track_config\": {\"track_class\": \"NeatHTMLFeatures/View/Track/NeatFeatures\", \"__current_case__\": 3, \"html_options\": {\"topLevelFeatures\": null}}, \"jbstyle\": {\"style_classname\": \"feature\", \"style_label\": \"product,name,id\", \"style_description\": \"note,description\", \"style_height\": \"10px\", \"max_height\": \"600\"}, \"jbcolor_scale\": {\"color_score\": {\"color_score_select\": \"none\", \"__current_case__\": 0, \"color\": {\"color_select\": \"automatic\", \"__current_case__\": 0}}}, \"jb_custom_config\": {\"option\": []}, \"jbmenu\": {\"track_menu\": []}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}, {\"__index__\": 1, \"category\": \"RNASeq\", \"data_tracks\": [{\"__index__\": 0, \"data_format\": {\"data_format_select\": \"pileup\", \"__current_case__\": 3, \"annotation\": {\"__class__\": \"ConnectedValue\"}, \"auto_snp\": true, \"chunkSizeLimit\": \"5000000\", \"jb_custom_config\": {\"option\": []}, \"track_visibility\": \"default_off\", \"override_apollo_plugins\": \"False\", \"override_apollo_drag\": \"False\"}}]}], \"uglyTestingHack\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.16.11+galaxy1", + "type": "tool", + "uuid": "9d0a123f-ae0b-4e6c-84bb-894eb6657097", + "when": null, + "workflow_outputs": [ + { + "label": " JBrowse html", + "output_name": "output", + "uuid": "5a21aa8e-5d07-4450-9c89-ed26dc72d62e" + } + ] + }, + "12": { + "annotation": "Comparing annotations", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/aegean_parseval/aegean_parseval/0.16.0+galaxy1", + "errors": null, + "id": 12, + "input_connections": { + "predictiongff3": { + "id": 5, + "output_name": "output" + }, + "referencegff3": { + "id": 10, + "output_name": "gff3" + } + }, + "inputs": [], + "label": " AEGeAn ParsEval ", + "name": "AEGeAn ParsEval", + "outputs": [ + { + "name": "output_html", + "type": "html" + } + ], + "position": { + "left": 2090, + "top": 920 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/aegean_parseval/aegean_parseval/0.16.0+galaxy1", + "tool_shed_repository": { + "changeset_revision": "d6c074a93c51", + "name": "aegean_parseval", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"delta\": \"0\", \"maxtrans\": \"32\", \"output_type\": \"html\", \"predictiongff3\": {\"__class__\": \"ConnectedValue\"}, \"predlabel\": \"\", \"referencegff3\": {\"__class__\": \"ConnectedValue\"}, \"refrlabel\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.16.0+galaxy1", + "type": "tool", + "uuid": "8ebd5ef3-886a-4a99-8cbd-0b38ac5677fc", + "when": null, + "workflow_outputs": [ + { + "label": " AEGeAn ParsEval html", + "output_name": "output_html", + "uuid": "592d0cba-e19a-4948-a3d1-e6ad6699f72c" + } + ] + }, + "13": { + "annotation": " BUSCO (Benchmarking Universal Single-Copy Orthologs) is a tool allowing to evaluate the quality of a genome assembly or of a genome annotation. ", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", + "errors": null, + "id": 13, + "input_connections": { + "input": { + "id": 10, + "output_name": "fa_proteins" + } + }, + "inputs": [], + "label": " Busco", + "name": "Busco", + "outputs": [ + { + "name": "busco_sum", + "type": "txt" + }, + { + "name": "busco_table", + "type": "tabular" + }, + { + "name": "summary_image", + "type": "png" + } + ], + "position": { + "left": 2230, + "top": 1370 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/busco/busco/5.7.1+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2babe6d5c561", + "name": "busco", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"adv\": {\"evalue\": \"0.001\", \"limit\": \"3\", \"contig_break\": \"10\"}, \"busco_mode\": {\"mode\": \"prot\", \"__current_case__\": 2}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"lineage\": {\"lineage_mode\": \"select_lineage\", \"__current_case__\": 1, \"lineage_dataset\": \"mucorales_odb10\"}, \"lineage_conditional\": {\"selector\": \"cached\", \"__current_case__\": 0, \"cached_db\": \"all+2024-03-21-114020\"}, \"outputs\": [\"short_summary\", \"image\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.7.1+galaxy0", + "type": "tool", + "uuid": "ecaaeff3-abd2-425a-bfad-cd71533635fe", + "when": null, + "workflow_outputs": [ + { + "label": " Busco image", + "output_name": "summary_image", + "uuid": "3cc1aef6-7dc8-400c-a699-67008ddf8f43" + }, + { + "label": " Busco table", + "output_name": "busco_table", + "uuid": "d4e839c8-aa64-4304-ada7-0d276602106d" + }, + { + "label": " Busco sum", + "output_name": "busco_sum", + "uuid": "df3cc6ae-a871-4716-b17b-1e3aeb20cda8" + } + ] + } + }, + "tags": [], + "uuid": "fcb9deed-d58d-4da3-8fe5-a7c08234d1c5", + "version": 1 +} \ No newline at end of file diff --git a/workflows/genome_annotation/annotation-funannotate/Genome_annotation_with_Funannotate_tests.yml b/workflows/genome_annotation/annotation-funannotate/Genome_annotation_with_Funannotate_tests.yml new file mode 100644 index 000000000..7e651aa52 --- /dev/null +++ b/workflows/genome_annotation/annotation-funannotate/Genome_annotation_with_Funannotate_tests.yml @@ -0,0 +1,235 @@ +- doc: Test outline for Genome_annotation_with_Funannotate.ga + job: + Genome sequence: + class: File + location: https://zenodo.org/records/7867921/files/genome_masked.fasta?download=1 + filetype: fasta + RNASeq R1: + class: File + location: https://zenodo.org/records/7867921/files/rnaseq_R1.fq.gz?download=1 + RNASeq R2: + class: File + location: https://zenodo.org/records/7867921/files/rnaseq_R2.fq.gz?download=1 + SwissProt: + class: File + location: https://zenodo.org/records/7867921/files/SwissProt_subset.fasta?download=1 + filetype: fasta + alternate annotation (gbk): + class: File + location: https://zenodo.org/records/7867921/files/alternate_annotation.gbk?download=1 + alternate annotation (gff3): + class: File + location: https://zenodo.org/records/7867921/files/alternate_annotation.gff3?download=1 + filetype: gff3 + + outputs: + rna star log: + asserts: + - has_n_lines: + n: 36 + rna star slice junctions: + asserts: + - has_text: + text: "scaffold_1" + - has_n_columns: + n: 9 + rna star mapped reads: + asserts: + - has_text: + text: "@HD VN:1.4 SO:coordinate" + + Funannotate predict annotation fasta transcripts mrna: + asserts: + - has_text: + text: ">FUN_000001-T1 FUN_000001" + text: ">FUN_000043-T1 FUN_000043" + Funannotate predict annotation fasta transcripts cds: + asserts: + - has_text: + text: ">FUN_000002-T1 FUN_000002" + text: ">FUN_000042-T1 FUN_000042" + Funannotate predict annotation tbl2asn report: + asserts: + - has_n_lines: + n: 26299 + Funannotate predict annotation tbl2asn error: + asserts: + - has_n_lines: + n: 3 + Funannotate predict annotation tbl2asn validation: + asserts: + - has_n_lines: + n: 997 + Funannotate predict annotation stats: + asserts: + - has_text: + text: "annotation" + text: "14488" + Funannotate predict annotation annot tbl: + asserts: + - has_n_lines: + n: 251010 + Funannotate predict annotation annot gbk: + asserts: + - has_n_lines: + n: 1126359 + Funannotate predict annotation annot gff3: + asserts: + - has_n_columns: + n: 9 + - has_text: + text: "##gff-version 3" + text: "scaffold_1" + Funannotate predict annotation fasta proteins: + asserts: + - has_text: + text: ">FUN_000044-T1 FUN_000044" + text: ">FUN_000084-T1 FUN_000084" + + eggNOG Mapper seed orthologs: + asserts: + - has_n_lines: + n: 14042 + - has_n_columns: + n: 11 + eggNOG Mapper annotations: + asserts: + - has_n_lines: + n: 11440 + - has_n_columns: + n: 21 + + InterProScan xml: + asserts: + - has_text: + text: "5.59-91.0" + text: "Sig_transdc_resp-reg_receiver" + InterProScan tsv: + asserts: + - has_n_columns: + n: 15 + - has_text: + text: "FUN_000556-T1" + text: "cd17546" + + Funannotate functional annotation must fix: + asserts: + - has_text: + text: "tbl2asn" + text: "Product" + Funannotate functional annotation need curating: + asserts: + - has_text: + text: "Original" + text: "Description" + Funannotate functional annotation new names passed: + asserts: + - has_text: + text: "AHNAK2" + text: "RAVER1" + Funannotate functional annotation sqn: + asserts: + - has_n_lines: + n: 8199772 + - has_text: + text: "53726" + text: "Mucoromycetes" + Funannotate functional annotation gbk: + asserts: + - has_n_lines: + n: 1630770 + - has_text: + text: "Mucor mucedo" + text: "Annotated using Funannotate 1.8.15" + Funannotate functional annotation annot: + asserts: + - has_n_lines: + n: 14489 + - has_n_columns: + n: 26 + - has_text: + text: "MMUCEDO_000001" + text: "scaffold_1" + Funannotate functional annotation contigs fsa: + - has_text: + text: ">contig_1" + text: "TGATTCGAAGTATCTTACTGAGTCTATGGGGACCAACACCGTGTTGAAAAAGGGGTCGCA" + Funannotate functional annotation agp: + asserts: + - has_n_lines: + n: 1425 + - has_n_columns: + n: 9 + Funannotate functional annotation tbl: + asserts: + - has_n_lines: + n: 559417 + - has_text: + text: "Feature scaffold_1" + text: "InterPro:IPR020472" + Funannotate functional annotation fa scaffolds: + asserts: + - has_text: + text: ">scaffold_1" + text: "AGTCTGTCTGGTTTCTTGACAAGCTTGAATCATGGTATCTTCGAGACCTGAGTAAACCAAATCGGCTTCT" + Funannotate functional annotation fa proteins: + asserts: + - has_text: + text: ">MMUCEDO_000001-T1 MMUCEDO_000001" + text: ">MMUCEDO_000004-T1 MMUCEDO_000004" + Funannotate functional annotation fa transcripts mrna: + asserts: + - has_text: + text: ">MMUCEDO_000025-T1 MMUCEDO_000025" + text: ">MMUCEDO_000040-T1 MMUCEDO_000040" + Funannotate functional annotation fa transcripts cds: + asserts: + - has_text: + text: ">MMUCEDO_000001-T1 MMUCEDO_000001" + text: ">MMUCEDO_000024-T1 MMUCEDO_000024" + Funannotate functional annotation gff3: + asserts: + - has_text: + text: "##gff-version 3" + text: "scaffold_1" + - has_n_columns: + n: 9 + Funannotate functional annotation tbl2asn report: + asserts: + - has_n_lines: + n: 27003 + Funannotate functional annotation stats: + asserts: + - has_text: + text: "annotation" + text: "mucorales_odb10" + + JBrowse html: + asserts: + - has_size: + value: 0.002M + delta: 0.004M + + AEGeAn ParsEval html: + - has_size: + value: 0.1M + delta: 0.04M + + Busco image: + asserts: + - has_size: + value: 0.1M + delta: 0.04M + Busco table: + asserts: + - has_n_lines: + n: 2484 + - has_n_columns: + n: 7 + Busco sum: + asserts: + - has_n_lines: + n: 12 + - has_text: + text: "BUSCO version is: 5.7.1" + text: "mucorales_odb10"