Skip to content

Commit

Permalink
First pass at generating mermaid diagrams for all workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdbeek committed Nov 30, 2024
1 parent 7fa5211 commit cb06c58
Show file tree
Hide file tree
Showing 83 changed files with 3,807 additions and 0 deletions.
94 changes: 94 additions & 0 deletions scripts/create_mermaid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import argparse
import os
import json
import re
from typing import Literal

STEP_TYPE_TO_SHAPE = {
"data_input": "@{ shape: doc }",
"data_collection_input": "@{ shape: docs }",
"parameter_input": "@{ shape: lean-l }",
"tool": "@{ shape: process }",
"subworkflow": "@{ shape: subprocess }",
}


def escape_mermaid_string(input_string: str) -> str:
# List of characters to escape in Mermaid diagrams
special_chars = r'[\[\]\{\}\|\<\>\\"`\*_#\+\(\)\\]'
return re.sub(special_chars, lambda match: f"\\{match.group(0)}", input_string)


def step_to_mermaid_item(
step_type: Literal[
"parameter_input", "data_input", "data_collection_input", "tool", "subworkflow"
],
step_label: str,
):
step_label_anchor = f'["{step_label}"]'
shape = STEP_TYPE_TO_SHAPE.get(step_type, "")
return f"{step_label_anchor}{shape}"


def workflow_to_mermaid(workflow_json):
"""
Converts a Galaxy workflow JSON to a Mermaid flowchart diagram.
Args:
workflow_json: The JSON representation of the Galaxy workflow.
Returns:
A string representing the Mermaid flowchart diagram.
"""

mermaid_diagram = "graph LR\n"

# Create a mapping of step IDs to their labels
id_step_labels = {
step["id"]: escape_mermaid_string(
step["label"] or step["name"] or step["content_id"] or step["id"]
)
for step in workflow_json["steps"].values()
}

# Iterate through each step and its connections
for step_id, step in workflow_json["steps"].items():
step_label = id_step_labels.get(int(step_id))
mermaid_diagram += (
f'{step_id}{step_to_mermaid_item(step["type"], step_label)}\n'
)
for input_connection in step.get("input_connections", {}).values():
if not isinstance(input_connection, list):
input_connection = [input_connection]
for ic in input_connection:
mermaid_diagram += f"{ic['id']} --> {step_id}\n"

return mermaid_diagram


def walk_directory(directory):
"""
Walk directory and call workflow_to_mermaid on each discovered .ga file.
"""
for root, _, paths in os.walk(directory):
for path in paths:
if path.endswith(".ga"):
file_path = os.path.join(root, path)
with open(file_path, "r") as f:
workflow_data = json.load(f)
mermaid_diagram = workflow_to_mermaid(workflow_data)

mmd_path = f"{os.path.splitext(file_path)[0]}_diagram.md"
with open(mmd_path, "w") as f:
f.write(f"```mermaid\n{mermaid_diagram}\n```")


def parse_args():
parser = argparse.ArgumentParser(description="Process files in a directory")
parser.add_argument("directory", type=str, help="Path to the input directory")
return parser.parse_args()


if __name__ == "__main__":
args = parse_args()
walk_directory(args.directory)
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
```mermaid
graph LR
0["Pacbio Reads Collection"]@{ shape: docs }
1["HiC forward reads"]@{ shape: doc }
2["HiC reverse reads"]@{ shape: doc }
3["Genomescope Summary"]@{ shape: doc }
4["Meryl Database"]@{ shape: doc }
5["Database for Busco Lineage"]@{ shape: lean-l }
6["Lineage"]@{ shape: lean-l }
7["Name for Haplotype 1"]@{ shape: lean-l }
8["Name for Haplotype 2"]@{ shape: lean-l }
9["Bits for bloom filter"]@{ shape: lean-l }
10["SAK input file"]@{ shape: doc }
11["Homozygous Read Coverage"]@{ shape: lean-l }
12["Genomescope Model Parameters"]@{ shape: doc }
13["Cutadapt"]@{ shape: process }
0 --> 13
14["Search in textfiles"]@{ shape: process }
3 --> 14
15["Compute"]@{ shape: process }
12 --> 15
16["MultiQC"]@{ shape: process }
13 --> 16
17["Replace Text"]@{ shape: process }
14 --> 17
18["Cut"]@{ shape: process }
15 --> 18
19["Convert"]@{ shape: process }
17 --> 19
20["Estimated homozygous read coverage"]@{ shape: process }
18 --> 20
21["Cut"]@{ shape: process }
19 --> 21
22["Homozygous read coverage for Hifiasm"]@{ shape: process }
11 --> 22
20 --> 22
23["Estimated genome size"]@{ shape: process }
21 --> 23
24["Hifiasm"]@{ shape: process }
22 --> 24
9 --> 24
1 --> 24
2 --> 24
13 --> 24
25["Raw Unitig Image"]@{ shape: process }
24 --> 25
26["gfastats"]@{ shape: process }
24 --> 26
23 --> 26
27["gfastats"]@{ shape: process }
24 --> 27
23 --> 27
28["gfastats"]@{ shape: process }
24 --> 28
29["gfastats"]@{ shape: process }
24 --> 29
30["gfastats"]@{ shape: process }
24 --> 30
31["gfastats"]@{ shape: process }
24 --> 31
32["gfastats"]@{ shape: process }
24 --> 32
10 --> 32
33["gfastats"]@{ shape: process }
24 --> 33
10 --> 33
34["Text reformatting"]@{ shape: process }
26 --> 34
35["Text reformatting"]@{ shape: process }
27 --> 35
36["Data Prep Hap2"]@{ shape: subprocess }
28 --> 36
37["Data Prep Hap1"]@{ shape: subprocess }
30 --> 37
38["Text transformation"]@{ shape: process }
32 --> 38
39["Text transformation"]@{ shape: process }
33 --> 39
40["Join two Datasets"]@{ shape: process }
35 --> 40
34 --> 40
41["Plot Data"]@{ shape: subprocess }
36 --> 41
8 --> 41
7 --> 41
37 --> 41
42["Busco"]@{ shape: process }
38 --> 42
5 --> 42
6 --> 42
43["Merqury"]@{ shape: process }
39 --> 43
38 --> 43
4 --> 43
44["Busco"]@{ shape: process }
39 --> 44
5 --> 44
6 --> 44
45["Advanced Cut"]@{ shape: process }
40 --> 45
46["output\_merqury.spectra-cn.fl"]@{ shape: process }
43 --> 46
47["output\_merqury.spectra-asm.fl"]@{ shape: process }
43 --> 47
48["merqury\_qv"]@{ shape: process }
43 --> 48
49["output\_merqury.assembly\_01.spectra-cn.fl"]@{ shape: process }
43 --> 49
50["merqury\_stats"]@{ shape: process }
43 --> 50
51["output\_merqury.assembly\_02.spectra-cn.fl"]@{ shape: process }
43 --> 51
52["Replace"]@{ shape: process }
45 --> 52
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
```mermaid
graph LR
0["Pacbio Reads Collection : child"]@{ shape: docs }
1["Paternal Illumina reads \(hap1\)"]@{ shape: docs }
2["Maternal Illumina reads \(hap2\)"]@{ shape: docs }
3["Meryl Database : Child"]@{ shape: doc }
4["Hapmer Database : Paternal"]@{ shape: doc }
5["Hapmer Database : Maternal"]@{ shape: doc }
6["Bits for bloom filter"]@{ shape: lean-l }
7["Database for Busco Lineage"]@{ shape: lean-l }
8["Lineage"]@{ shape: lean-l }
9["Homozygous Read Coverage"]@{ shape: lean-l }
10["Genomescope Model Parameters"]@{ shape: doc }
11["Genomescope Summary"]@{ shape: doc }
12["Utilize homology information to correct trio-phasing errors"]@{ shape: lean-l }
13["SAK input file \(Optional\)"]@{ shape: doc }
14["Name for Haplotype 1"]@{ shape: lean-l }
15["Name for Haplotype 2"]@{ shape: lean-l }
16["Cutadapt"]@{ shape: process }
0 --> 16
17["Compute"]@{ shape: process }
10 --> 17
18["Search in textfiles"]@{ shape: process }
11 --> 18
19["MultiQC"]@{ shape: process }
16 --> 19
20["Cut"]@{ shape: process }
17 --> 20
21["Replace Text"]@{ shape: process }
18 --> 21
22["Parse parameter value"]@{ shape: process }
20 --> 22
23["Convert"]@{ shape: process }
21 --> 23
24["Homozygous read coverage for Hifiasm"]@{ shape: process }
9 --> 24
22 --> 24
25["Cut"]@{ shape: process }
23 --> 25
26["Hifiasm"]@{ shape: process }
24 --> 26
6 --> 26
16 --> 26
12 --> 26
1 --> 26
2 --> 26
27["Estimated genome size"]@{ shape: process }
25 --> 27
28["gfastats"]@{ shape: process }
26 --> 28
13 --> 28
29["gfastats"]@{ shape: process }
26 --> 29
13 --> 29
30["Raw Unitig Image"]@{ shape: process }
26 --> 30
31["gfastats"]@{ shape: process }
26 --> 31
13 --> 31
32["gfastats"]@{ shape: process }
26 --> 32
13 --> 32
33["gfastats"]@{ shape: process }
26 --> 33
34["gfastats"]@{ shape: process }
26 --> 34
35["gfastats"]@{ shape: process }
26 --> 35
27 --> 35
36["gfastats"]@{ shape: process }
26 --> 36
27 --> 36
37["Busco"]@{ shape: process }
31 --> 37
7 --> 37
8 --> 37
38["Busco"]@{ shape: process }
32 --> 38
7 --> 38
8 --> 38
39["Merqury"]@{ shape: process }
31 --> 39
32 --> 39
3 --> 39
5 --> 39
4 --> 39
40["Data prep Hap1"]@{ shape: subprocess }
33 --> 40
41["Data Prep Hap2"]@{ shape: subprocess }
34 --> 41
42["Text reformatting"]@{ shape: process }
35 --> 42
43["Text reformatting"]@{ shape: process }
36 --> 43
44["merqury\_qv"]@{ shape: process }
39 --> 44
45["output\_merqury.spectra-cn.fl"]@{ shape: process }
39 --> 45
46["output\_merqury.spectra-asm.fl"]@{ shape: process }
39 --> 46
47["output\_merqury.assembly\_01.spectra-cn.fl"]@{ shape: process }
39 --> 47
48["output\_merqury.assembly\_02.spectra-cn.fl"]@{ shape: process }
39 --> 48
49["merqury\_stats"]@{ shape: process }
39 --> 49
50["Plots"]@{ shape: subprocess }
41 --> 50
15 --> 50
14 --> 50
40 --> 50
51["Join two Datasets"]@{ shape: process }
43 --> 51
42 --> 51
52["Advanced Cut"]@{ shape: process }
51 --> 52
53["Replace"]@{ shape: process }
52 --> 53
```
Loading

0 comments on commit cb06c58

Please sign in to comment.