diff --git a/scripts/create_mermaid.py b/scripts/create_mermaid.py new file mode 100644 index 000000000..814f054de --- /dev/null +++ b/scripts/create_mermaid.py @@ -0,0 +1,94 @@ +import argparse +import os +import json +import re +from typing import Literal + +STEP_TYPE_TO_SHAPE = { + "data_input": "@{ shape: doc }", + "data_collection_input": "@{ shape: docs }", + "parameter_input": "@{ shape: lean-l }", + "tool": "@{ shape: process }", + "subworkflow": "@{ shape: subprocess }", +} + + +def escape_mermaid_string(input_string: str) -> str: + # List of characters to escape in Mermaid diagrams + special_chars = r'[\[\]\{\}\|\<\>\\"`\*_#\+\(\)\\]' + return re.sub(special_chars, lambda match: f"\\{match.group(0)}", input_string) + + +def step_to_mermaid_item( + step_type: Literal[ + "parameter_input", "data_input", "data_collection_input", "tool", "subworkflow" + ], + step_label: str, +): + step_label_anchor = f'["{step_label}"]' + shape = STEP_TYPE_TO_SHAPE.get(step_type, "") + return f"{step_label_anchor}{shape}" + + +def workflow_to_mermaid(workflow_json): + """ + Converts a Galaxy workflow JSON to a Mermaid flowchart diagram. + + Args: + workflow_json: The JSON representation of the Galaxy workflow. + + Returns: + A string representing the Mermaid flowchart diagram. + """ + + mermaid_diagram = "graph LR\n" + + # Create a mapping of step IDs to their labels + id_step_labels = { + step["id"]: escape_mermaid_string( + step["label"] or step["name"] or step["content_id"] or step["id"] + ) + for step in workflow_json["steps"].values() + } + + # Iterate through each step and its connections + for step_id, step in workflow_json["steps"].items(): + step_label = id_step_labels.get(int(step_id)) + mermaid_diagram += ( + f'{step_id}{step_to_mermaid_item(step["type"], step_label)}\n' + ) + for input_connection in step.get("input_connections", {}).values(): + if not isinstance(input_connection, list): + input_connection = [input_connection] + for ic in input_connection: + mermaid_diagram += f"{ic['id']} --> {step_id}\n" + + return mermaid_diagram + + +def walk_directory(directory): + """ + Walk directory and call workflow_to_mermaid on each discovered .ga file. + """ + for root, _, paths in os.walk(directory): + for path in paths: + if path.endswith(".ga"): + file_path = os.path.join(root, path) + with open(file_path, "r") as f: + workflow_data = json.load(f) + mermaid_diagram = workflow_to_mermaid(workflow_data) + + mmd_path = f"{os.path.splitext(file_path)[0]}_diagram.md" + with open(mmd_path, "w") as f: + f.write(f"```mermaid\n{mermaid_diagram}\n```") + + +def parse_args(): + parser = argparse.ArgumentParser(description="Process files in a directory") + parser.add_argument("directory", type=str, help="Path to the input directory") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + walk_directory(args.directory) diff --git a/workflows/VGP-assembly-v2/Assembly-Hifi-HiC-phasing-VGP4/Assembly-Hifi-HiC-phasing-VGP4_diagram.md b/workflows/VGP-assembly-v2/Assembly-Hifi-HiC-phasing-VGP4/Assembly-Hifi-HiC-phasing-VGP4_diagram.md new file mode 100644 index 000000000..ca202141b --- /dev/null +++ b/workflows/VGP-assembly-v2/Assembly-Hifi-HiC-phasing-VGP4/Assembly-Hifi-HiC-phasing-VGP4_diagram.md @@ -0,0 +1,116 @@ +```mermaid +graph LR +0["Pacbio Reads Collection"]@{ shape: docs } +1["HiC forward reads"]@{ shape: doc } +2["HiC reverse reads"]@{ shape: doc } +3["Genomescope Summary"]@{ shape: doc } +4["Meryl Database"]@{ shape: doc } +5["Database for Busco Lineage"]@{ shape: lean-l } +6["Lineage"]@{ shape: lean-l } +7["Name for Haplotype 1"]@{ shape: lean-l } +8["Name for Haplotype 2"]@{ shape: lean-l } +9["Bits for bloom filter"]@{ shape: lean-l } +10["SAK input file"]@{ shape: doc } +11["Homozygous Read Coverage"]@{ shape: lean-l } +12["Genomescope Model Parameters"]@{ shape: doc } +13["Cutadapt"]@{ shape: process } +0 --> 13 +14["Search in textfiles"]@{ shape: process } +3 --> 14 +15["Compute"]@{ shape: process } +12 --> 15 +16["MultiQC"]@{ shape: process } +13 --> 16 +17["Replace Text"]@{ shape: process } +14 --> 17 +18["Cut"]@{ shape: process } +15 --> 18 +19["Convert"]@{ shape: process } +17 --> 19 +20["Estimated homozygous read coverage"]@{ shape: process } +18 --> 20 +21["Cut"]@{ shape: process } +19 --> 21 +22["Homozygous read coverage for Hifiasm"]@{ shape: process } +11 --> 22 +20 --> 22 +23["Estimated genome size"]@{ shape: process } +21 --> 23 +24["Hifiasm"]@{ shape: process } +22 --> 24 +9 --> 24 +1 --> 24 +2 --> 24 +13 --> 24 +25["Raw Unitig Image"]@{ shape: process } +24 --> 25 +26["gfastats"]@{ shape: process } +24 --> 26 +23 --> 26 +27["gfastats"]@{ shape: process } +24 --> 27 +23 --> 27 +28["gfastats"]@{ shape: process } +24 --> 28 +29["gfastats"]@{ shape: process } +24 --> 29 +30["gfastats"]@{ shape: process } +24 --> 30 +31["gfastats"]@{ shape: process } +24 --> 31 +32["gfastats"]@{ shape: process } +24 --> 32 +10 --> 32 +33["gfastats"]@{ shape: process } +24 --> 33 +10 --> 33 +34["Text reformatting"]@{ shape: process } +26 --> 34 +35["Text reformatting"]@{ shape: process } +27 --> 35 +36["Data Prep Hap2"]@{ shape: subprocess } +28 --> 36 +37["Data Prep Hap1"]@{ shape: subprocess } +30 --> 37 +38["Text transformation"]@{ shape: process } +32 --> 38 +39["Text transformation"]@{ shape: process } +33 --> 39 +40["Join two Datasets"]@{ shape: process } +35 --> 40 +34 --> 40 +41["Plot Data"]@{ shape: subprocess } +36 --> 41 +8 --> 41 +7 --> 41 +37 --> 41 +42["Busco"]@{ shape: process } +38 --> 42 +5 --> 42 +6 --> 42 +43["Merqury"]@{ shape: process } +39 --> 43 +38 --> 43 +4 --> 43 +44["Busco"]@{ shape: process } +39 --> 44 +5 --> 44 +6 --> 44 +45["Advanced Cut"]@{ shape: process } +40 --> 45 +46["output\_merqury.spectra-cn.fl"]@{ shape: process } +43 --> 46 +47["output\_merqury.spectra-asm.fl"]@{ shape: process } +43 --> 47 +48["merqury\_qv"]@{ shape: process } +43 --> 48 +49["output\_merqury.assembly\_01.spectra-cn.fl"]@{ shape: process } +43 --> 49 +50["merqury\_stats"]@{ shape: process } +43 --> 50 +51["output\_merqury.assembly\_02.spectra-cn.fl"]@{ shape: process } +43 --> 51 +52["Replace"]@{ shape: process } +45 --> 52 + +``` \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Assembly-Hifi-Trio-phasing-VGP5/Assembly-Hifi-Trio-phasing-VGP5_diagram.md b/workflows/VGP-assembly-v2/Assembly-Hifi-Trio-phasing-VGP5/Assembly-Hifi-Trio-phasing-VGP5_diagram.md new file mode 100644 index 000000000..8411c6594 --- /dev/null +++ b/workflows/VGP-assembly-v2/Assembly-Hifi-Trio-phasing-VGP5/Assembly-Hifi-Trio-phasing-VGP5_diagram.md @@ -0,0 +1,120 @@ +```mermaid +graph LR +0["Pacbio Reads Collection : child"]@{ shape: docs } +1["Paternal Illumina reads \(hap1\)"]@{ shape: docs } +2["Maternal Illumina reads \(hap2\)"]@{ shape: docs } +3["Meryl Database : Child"]@{ shape: doc } +4["Hapmer Database : Paternal"]@{ shape: doc } +5["Hapmer Database : Maternal"]@{ shape: doc } +6["Bits for bloom filter"]@{ shape: lean-l } +7["Database for Busco Lineage"]@{ shape: lean-l } +8["Lineage"]@{ shape: lean-l } +9["Homozygous Read Coverage"]@{ shape: lean-l } +10["Genomescope Model Parameters"]@{ shape: doc } +11["Genomescope Summary"]@{ shape: doc } +12["Utilize homology information to correct trio-phasing errors"]@{ shape: lean-l } +13["SAK input file \(Optional\)"]@{ shape: doc } +14["Name for Haplotype 1"]@{ shape: lean-l } +15["Name for Haplotype 2"]@{ shape: lean-l } +16["Cutadapt"]@{ shape: process } +0 --> 16 +17["Compute"]@{ shape: process } +10 --> 17 +18["Search in textfiles"]@{ shape: process } +11 --> 18 +19["MultiQC"]@{ shape: process } +16 --> 19 +20["Cut"]@{ shape: process } +17 --> 20 +21["Replace Text"]@{ shape: process } +18 --> 21 +22["Parse parameter value"]@{ shape: process } +20 --> 22 +23["Convert"]@{ shape: process } +21 --> 23 +24["Homozygous read coverage for Hifiasm"]@{ shape: process } +9 --> 24 +22 --> 24 +25["Cut"]@{ shape: process } +23 --> 25 +26["Hifiasm"]@{ shape: process } +24 --> 26 +6 --> 26 +16 --> 26 +12 --> 26 +1 --> 26 +2 --> 26 +27["Estimated genome size"]@{ shape: process } +25 --> 27 +28["gfastats"]@{ shape: process } +26 --> 28 +13 --> 28 +29["gfastats"]@{ shape: process } +26 --> 29 +13 --> 29 +30["Raw Unitig Image"]@{ shape: process } +26 --> 30 +31["gfastats"]@{ shape: process } +26 --> 31 +13 --> 31 +32["gfastats"]@{ shape: process } +26 --> 32 +13 --> 32 +33["gfastats"]@{ shape: process } +26 --> 33 +34["gfastats"]@{ shape: process } +26 --> 34 +35["gfastats"]@{ shape: process } +26 --> 35 +27 --> 35 +36["gfastats"]@{ shape: process } +26 --> 36 +27 --> 36 +37["Busco"]@{ shape: process } +31 --> 37 +7 --> 37 +8 --> 37 +38["Busco"]@{ shape: process } +32 --> 38 +7 --> 38 +8 --> 38 +39["Merqury"]@{ shape: process } +31 --> 39 +32 --> 39 +3 --> 39 +5 --> 39 +4 --> 39 +40["Data prep Hap1"]@{ shape: subprocess } +33 --> 40 +41["Data Prep Hap2"]@{ shape: subprocess } +34 --> 41 +42["Text reformatting"]@{ shape: process } +35 --> 42 +43["Text reformatting"]@{ shape: process } +36 --> 43 +44["merqury\_qv"]@{ shape: process } +39 --> 44 +45["output\_merqury.spectra-cn.fl"]@{ shape: process } +39 --> 45 +46["output\_merqury.spectra-asm.fl"]@{ shape: process } +39 --> 46 +47["output\_merqury.assembly\_01.spectra-cn.fl"]@{ shape: process } +39 --> 47 +48["output\_merqury.assembly\_02.spectra-cn.fl"]@{ shape: process } +39 --> 48 +49["merqury\_stats"]@{ shape: process } +39 --> 49 +50["Plots"]@{ shape: subprocess } +41 --> 50 +15 --> 50 +14 --> 50 +40 --> 50 +51["Join two Datasets"]@{ shape: process } +43 --> 51 +42 --> 51 +52["Advanced Cut"]@{ shape: process } +51 --> 52 +53["Replace"]@{ shape: process } +52 --> 53 + +``` \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Assembly-Hifi-only-VGP3/Assembly-Hifi-only-VGP3_diagram.md b/workflows/VGP-assembly-v2/Assembly-Hifi-only-VGP3/Assembly-Hifi-only-VGP3_diagram.md new file mode 100644 index 000000000..ab36e8c7e --- /dev/null +++ b/workflows/VGP-assembly-v2/Assembly-Hifi-only-VGP3/Assembly-Hifi-only-VGP3_diagram.md @@ -0,0 +1,106 @@ +```mermaid +graph LR +0["Pacbio Reads Collection"]@{ shape: docs } +1["Meryl Database"]@{ shape: doc } +2["Genomescope Summary"]@{ shape: doc } +3["Bits for Hifiasm bloom filter"]@{ shape: lean-l } +4["Homozygous Read Coverage"]@{ shape: lean-l } +5["Genomescope Model Parameters"]@{ shape: doc } +6["Database for Busco Lineage"]@{ shape: lean-l } +7["Lineage"]@{ shape: lean-l } +8["SAK input file \(Optional\)"]@{ shape: doc } +9["Name of primary assembly"]@{ shape: lean-l } +10["Name of alternate assembly"]@{ shape: lean-l } +11["Cutadapt"]@{ shape: process } +0 --> 11 +12["Search in textfiles"]@{ shape: process } +2 --> 12 +13["Pick parameter value"]@{ shape: process } +3 --> 13 +14["Compute"]@{ shape: process } +5 --> 14 +15["MultiQC"]@{ shape: process } +11 --> 15 +16["Replace Text"]@{ shape: process } +12 --> 16 +17["Cut"]@{ shape: process } +14 --> 17 +18["Convert"]@{ shape: process } +16 --> 18 +19["Parse parameter value"]@{ shape: process } +17 --> 19 +20["Cut"]@{ shape: process } +18 --> 20 +21["Homozygous read coverage for Hifiasm"]@{ shape: process } +4 --> 21 +19 --> 21 +22["Estimated genome size"]@{ shape: process } +20 --> 22 +23["Hifiasm"]@{ shape: process } +21 --> 23 +13 --> 23 +11 --> 23 +24["Raw Unitig Image"]@{ shape: process } +23 --> 24 +25["gfastats"]@{ shape: process } +23 --> 25 +8 --> 25 +26["gfastats"]@{ shape: process } +23 --> 26 +8 --> 26 +27["gfastats"]@{ shape: process } +23 --> 27 +8 --> 27 +28["gfastats"]@{ shape: process } +23 --> 28 +8 --> 28 +29["gfastats"]@{ shape: process } +23 --> 29 +22 --> 29 +30["gfastats"]@{ shape: process } +23 --> 30 +22 --> 30 +31["gfastats"]@{ shape: process } +23 --> 31 +32["gfastats"]@{ shape: process } +23 --> 32 +33["Busco"]@{ shape: process } +27 --> 33 +6 --> 33 +7 --> 33 +34["Merqury"]@{ shape: process } +27 --> 34 +28 --> 34 +1 --> 34 +35["Text reformatting"]@{ shape: process } +29 --> 35 +36["Text reformatting"]@{ shape: process } +30 --> 36 +37["Data Prep Primary"]@{ shape: subprocess } +31 --> 37 +38["Data Prep Alternate"]@{ shape: subprocess } +32 --> 38 +39["merqury\_qv"]@{ shape: process } +34 --> 39 +40["output\_merqury.spectra-cn.fl"]@{ shape: process } +34 --> 40 +41["output\_merqury.spectra-asm.fl"]@{ shape: process } +34 --> 41 +42["output\_merqury.assembly\_01.spectra-cn.fl"]@{ shape: process } +34 --> 42 +43["merqury\_stats"]@{ shape: process } +34 --> 43 +44["Join two Datasets"]@{ shape: process } +35 --> 44 +36 --> 44 +45["Plotting Nx and Sizes"]@{ shape: subprocess } +38 --> 45 +10 --> 45 +9 --> 45 +37 --> 45 +46["Advanced Cut"]@{ shape: process } +44 --> 46 +47["Replace"]@{ shape: process } +46 --> 47 + +``` \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Assembly-decontamination-VGP9/Assembly-decontamination-VGP9_diagram.md b/workflows/VGP-assembly-v2/Assembly-decontamination-VGP9/Assembly-decontamination-VGP9_diagram.md new file mode 100644 index 000000000..8e97dc6e6 --- /dev/null +++ b/workflows/VGP-assembly-v2/Assembly-decontamination-VGP9/Assembly-decontamination-VGP9_diagram.md @@ -0,0 +1,27 @@ +```mermaid +graph LR +0["Scaffolded assembly \(fasta\)"]@{ shape: doc } +1["Database for Kraken2"]@{ shape: lean-l } +2["soft-masking "]@{ shape: process } +0 --> 2 +3["hard-masking"]@{ shape: process } +2 --> 3 +4["ID non-target contaminants"]@{ shape: process } +1 --> 4 +3 --> 4 +5["blast mitochondria DB"]@{ shape: process } +3 --> 5 +6["Cut"]@{ shape: process } +4 --> 6 +7["parsing blast output"]@{ shape: process } +5 --> 7 +8["Filter"]@{ shape: process } +6 --> 8 +9["Cut"]@{ shape: process } +8 --> 9 +10["concatenate scaffold lists"]@{ shape: process } +11["removing scaffolds "]@{ shape: process } +0 --> 11 +10 --> 11 + +``` \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0_diagram.md b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0_diagram.md new file mode 100644 index 000000000..c9f1808e8 --- /dev/null +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0_diagram.md @@ -0,0 +1,16 @@ +```mermaid +graph LR +0["Collection of Pacbio Data"]@{ shape: docs } +1["Species name \(latin name\)"]@{ shape: lean-l } +2["Email adress"]@{ shape: lean-l } +3["MitoHiFi"]@{ shape: process } +2 --> 3 +1 --> 3 +4["MitoHiFi"]@{ shape: process } +0 --> 4 +3 --> 4 +3 --> 4 +5["Compress file\(s\)"]@{ shape: process } +4 --> 5 + +``` \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Plot-Nx-Size/Generate-Nx-and-Size-plots-for-multiple-assemblies_diagram.md b/workflows/VGP-assembly-v2/Plot-Nx-Size/Generate-Nx-and-Size-plots-for-multiple-assemblies_diagram.md new file mode 100644 index 000000000..a807b0907 --- /dev/null +++ b/workflows/VGP-assembly-v2/Plot-Nx-Size/Generate-Nx-and-Size-plots-for-multiple-assemblies_diagram.md @@ -0,0 +1,34 @@ +```mermaid +graph LR +0["Collection of genomes to plot"]@{ shape: docs } +1["gfastats"]@{ shape: process } +0 --> 1 +2["Sort"]@{ shape: process } +1 --> 2 +3["Text reformatting"]@{ shape: process } +2 --> 3 +4["Datamash"]@{ shape: process } +3 --> 4 +5["Add column"]@{ shape: process } +3 --> 5 +6["Parse parameter value"]@{ shape: process } +4 --> 6 +7["Compose text parameter value"]@{ shape: process } +6 --> 7 +8["Compute"]@{ shape: process } +5 --> 8 +7 --> 8 +9["Add input name as column"]@{ shape: process } +8 --> 9 +10["Collapse Collection"]@{ shape: process } +9 --> 10 +11["Cut"]@{ shape: process } +10 --> 11 +12["Cut"]@{ shape: process } +10 --> 12 +13["Nx Plot"]@{ shape: process } +11 --> 13 +14["Size Plot"]@{ shape: process } +12 --> 14 + +``` \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Purge-duplicate-contigs-VGP6/Purge-duplicate-contigs-VGP6_diagram.md b/workflows/VGP-assembly-v2/Purge-duplicate-contigs-VGP6/Purge-duplicate-contigs-VGP6_diagram.md new file mode 100644 index 000000000..c4d9fafa1 --- /dev/null +++ b/workflows/VGP-assembly-v2/Purge-duplicate-contigs-VGP6/Purge-duplicate-contigs-VGP6_diagram.md @@ -0,0 +1,130 @@ +```mermaid +graph LR +0["Pacbio Reads Collection - Trimmed"]@{ shape: docs } +1["Hifiasm Primary assembly"]@{ shape: doc } +2["Hifiasm Alternate assembly"]@{ shape: doc } +3["Meryl Database"]@{ shape: doc } +4["Genomescope model parameters"]@{ shape: doc } +5["Estimated genome size - Parameter File"]@{ shape: doc } +6["Database for Busco Lineage"]@{ shape: lean-l } +7["Lineage"]@{ shape: lean-l } +8["SAK input file"]@{ shape: doc } +9["Name of primary assembly"]@{ shape: lean-l } +10["Name of alternate assembly"]@{ shape: lean-l } +11["Map with minimap2"]@{ shape: process } +0 --> 11 +1 --> 11 +12["Purge overlaps"]@{ shape: process } +1 --> 12 +13["Compute"]@{ shape: process } +4 --> 13 +14["Estimated genome size"]@{ shape: process } +5 --> 14 +15["Awk command for primary assembly"]@{ shape: process } +9 --> 15 +16["Awk command for alternate assembly 2"]@{ shape: process } +10 --> 16 +17["Map with minimap2"]@{ shape: process } +12 --> 17 +12 --> 17 +18["Cut"]@{ shape: process } +13 --> 18 +19["Cut"]@{ shape: process } +13 --> 19 +20["Parse parameter value"]@{ shape: process } +18 --> 20 +21["Parse parameter value"]@{ shape: process } +19 --> 21 +22["Purge overlaps"]@{ shape: process } +11 --> 22 +21 --> 22 +20 --> 22 +23["Purge overlaps"]@{ shape: process } +22 --> 23 +22 --> 23 +17 --> 23 +24["Purge overlaps"]@{ shape: process } +23 --> 24 +1 --> 24 +25["Concatenate datasets"]@{ shape: process } +24 --> 25 +2 --> 25 +26["Busco"]@{ shape: process } +24 --> 26 +6 --> 26 +7 --> 26 +27["gfastats"]@{ shape: process } +24 --> 27 +8 --> 27 +28["gfastats"]@{ shape: process } +24 --> 28 +14 --> 28 +29["gfastats"]@{ shape: process } +24 --> 29 +30["Map with minimap2"]@{ shape: process } +0 --> 30 +25 --> 30 +31["Purge overlaps"]@{ shape: process } +25 --> 31 +32["Text reformatting"]@{ shape: process } +15 --> 32 +28 --> 32 +33["gfastats\_data\_prep"]@{ shape: subprocess } +29 --> 33 +34["Purge overlaps"]@{ shape: process } +30 --> 34 +21 --> 34 +20 --> 34 +35["Map with minimap2"]@{ shape: process } +31 --> 35 +31 --> 35 +36["Purge overlaps"]@{ shape: process } +34 --> 36 +34 --> 36 +35 --> 36 +37["Purge overlaps"]@{ shape: process } +36 --> 37 +25 --> 37 +38["gfastats"]@{ shape: process } +37 --> 38 +14 --> 38 +39["gfastats"]@{ shape: process } +37 --> 39 +40["Merqury"]@{ shape: process } +24 --> 40 +37 --> 40 +3 --> 40 +41["gfastats"]@{ shape: process } +37 --> 41 +8 --> 41 +42["Text reformatting"]@{ shape: process } +16 --> 42 +38 --> 42 +43["gfastats\_data\_prep"]@{ shape: subprocess } +39 --> 43 +44["merqury\_QV"]@{ shape: process } +40 --> 44 +45["output\_merqury.spectra-cn.fl"]@{ shape: process } +40 --> 45 +46["output\_merqury.spectra-asm.fl"]@{ shape: process } +40 --> 46 +47["output\_merqury.assembly\_01.spectra-cn.fl"]@{ shape: process } +40 --> 47 +48["merqury\_stats"]@{ shape: process } +40 --> 48 +49["output\_merqury.assembly\_02.spectra-cn.fl"]@{ shape: process } +40 --> 49 +50["Join two Datasets"]@{ shape: process } +32 --> 50 +42 --> 50 +51["gfastats\_plot"]@{ shape: subprocess } +43 --> 51 +10 --> 51 +9 --> 51 +33 --> 51 +52["Advanced Cut"]@{ shape: process } +50 --> 52 +53["Replace"]@{ shape: process } +52 --> 53 + +``` \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b_diagram.md b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b_diagram.md new file mode 100644 index 000000000..5a78936ea --- /dev/null +++ b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b_diagram.md @@ -0,0 +1,99 @@ +```mermaid +graph LR +0["Genomescope model parameters"]@{ shape: doc } +1["Pacbio Reads Collection - Trimmed"]@{ shape: docs } +2["Assembly to purge"]@{ shape: doc } +3["Meryl Database"]@{ shape: doc } +4["Assembly to leave alone \(For Merqury comparison\)"]@{ shape: doc } +5["Estimated genome size - Parameter File"]@{ shape: doc } +6["Database for Busco Lineage"]@{ shape: lean-l } +7["Lineage"]@{ shape: lean-l } +8["Name of purged assembly"]@{ shape: lean-l } +9["Name of un-altered assembly"]@{ shape: lean-l } +10["Compute"]@{ shape: process } +0 --> 10 +11["Map with minimap2"]@{ shape: process } +1 --> 11 +2 --> 11 +12["Purge overlaps"]@{ shape: process } +2 --> 12 +13["gfastats"]@{ shape: process } +4 --> 13 +14["Estimated genome size"]@{ shape: process } +5 --> 14 +15["Cut"]@{ shape: process } +10 --> 15 +16["Cut"]@{ shape: process } +10 --> 16 +17["Map with minimap2"]@{ shape: process } +12 --> 17 +12 --> 17 +18["gfastats\_data\_prep"]@{ shape: subprocess } +13 --> 18 +19["gfastats"]@{ shape: process } +4 --> 19 +14 --> 19 +20["Parse parameter value"]@{ shape: process } +15 --> 20 +21["Parse parameter value"]@{ shape: process } +16 --> 21 +22["Text reformatting"]@{ shape: process } +19 --> 22 +23["Purge overlaps"]@{ shape: process } +11 --> 23 +21 --> 23 +20 --> 23 +24["Purge overlaps"]@{ shape: process } +23 --> 24 +23 --> 24 +17 --> 24 +25["Remove REPEATs from BED"]@{ shape: process } +24 --> 25 +26["Purge overlaps"]@{ shape: process } +25 --> 26 +2 --> 26 +27["Merqury"]@{ shape: process } +26 --> 27 +4 --> 27 +3 --> 27 +28["gfastats"]@{ shape: process } +26 --> 28 +29["Busco"]@{ shape: process } +26 --> 29 +6 --> 29 +7 --> 29 +30["Convert purged fasta to gfa"]@{ shape: process } +26 --> 30 +31["gfastats"]@{ shape: process } +26 --> 31 +14 --> 31 +32["merqury\_QV"]@{ shape: process } +27 --> 32 +33["output\_merqury.spectra-cn.fl"]@{ shape: process } +27 --> 33 +34["output\_merqury.spectra-asm.fl"]@{ shape: process } +27 --> 34 +35["output\_merqury.assembly\_01.spectra-cn.fl"]@{ shape: process } +27 --> 35 +36["merqury\_stats"]@{ shape: process } +27 --> 36 +37["output\_merqury.assembly\_02.spectra-cn.fl"]@{ shape: process } +27 --> 37 +38["gfastats\_data\_prep"]@{ shape: subprocess } +28 --> 38 +39["Text reformatting"]@{ shape: process } +31 --> 39 +40["gfastats\_plot"]@{ shape: subprocess } +18 --> 40 +9 --> 40 +8 --> 40 +38 --> 40 +41["Join two Datasets"]@{ shape: process } +39 --> 41 +22 --> 41 +42["Advanced Cut"]@{ shape: process } +41 --> 42 +43["Replace"]@{ shape: process } +42 --> 43 + +``` \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Scaffolding-Bionano-VGP7/Scaffolding-BioNano-VGP7_diagram.md b/workflows/VGP-assembly-v2/Scaffolding-Bionano-VGP7/Scaffolding-BioNano-VGP7_diagram.md new file mode 100644 index 000000000..77e8346b6 --- /dev/null +++ b/workflows/VGP-assembly-v2/Scaffolding-Bionano-VGP7/Scaffolding-BioNano-VGP7_diagram.md @@ -0,0 +1,38 @@ +```mermaid +graph LR +0["Bionano Data"]@{ shape: doc } +1["Estimated genome size - Parameter File"]@{ shape: doc } +2["Input GFA"]@{ shape: doc } +3["Conflict resolution files"]@{ shape: doc } +4["Parse parameter value"]@{ shape: process } +1 --> 4 +5["gfastats"]@{ shape: process } +2 --> 5 +6["Bionano Hybrid Scaffold"]@{ shape: process } +0 --> 6 +3 --> 6 +5 --> 6 +7["gfastats"]@{ shape: process } +2 --> 7 +6 --> 7 +8["gfastats"]@{ shape: process } +7 --> 8 +9["gfastats"]@{ shape: process } +7 --> 9 +4 --> 9 +10["gfastats"]@{ shape: process } +7 --> 10 +11["Replace"]@{ shape: process } +9 --> 11 +12["gfastats\_data\_prep"]@{ shape: subprocess } +10 --> 12 +13["Cut"]@{ shape: process } +12 --> 13 +14["Cut"]@{ shape: process } +12 --> 14 +15["Scatterplot with ggplot2"]@{ shape: process } +13 --> 15 +16["Scatterplot with ggplot2"]@{ shape: process } +14 --> 16 + +``` \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/Scaffolding-HiC-VGP8/Scaffolding-HiC-VGP8_diagram.md b/workflows/VGP-assembly-v2/Scaffolding-HiC-VGP8/Scaffolding-HiC-VGP8_diagram.md new file mode 100644 index 000000000..b03082b8c --- /dev/null +++ b/workflows/VGP-assembly-v2/Scaffolding-HiC-VGP8/Scaffolding-HiC-VGP8_diagram.md @@ -0,0 +1,90 @@ +```mermaid +graph LR +0["Input GFA"]@{ shape: doc } +1["Haplotype"]@{ shape: lean-l } +2["Sequence graph"]@{ shape: doc } +3["Database for Busco Lineage"]@{ shape: lean-l } +4["Lineage"]@{ shape: lean-l } +5["HiC Forward reads"]@{ shape: doc } +6["HiC reverse reads"]@{ shape: doc } +7["Restriction enzymes"]@{ shape: lean-l } +8["Estimated genome size - Parameter File"]@{ shape: doc } +9["SAK input file"]@{ shape: doc } +10["Compose text parameter value"]@{ shape: process } +1 --> 10 +11["Parse parameter value"]@{ shape: process } +8 --> 11 +12["gfastats"]@{ shape: process } +0 --> 12 +9 --> 12 +13["BWA-MEM2"]@{ shape: process } +5 --> 13 +12 --> 13 +14["BWA-MEM2"]@{ shape: process } +6 --> 14 +12 --> 14 +15["Filter and merge"]@{ shape: process } +13 --> 15 +14 --> 15 +16["PretextMap"]@{ shape: process } +15 --> 16 +17["YAHS"]@{ shape: process } +2 --> 17 +15 --> 17 +7 --> 17 +12 --> 17 +18["Pretext Snapshot"]@{ shape: process } +16 --> 18 +19["Replace"]@{ shape: process } +10 --> 19 +17 --> 19 +20["Extract dataset"]@{ shape: process } +18 --> 20 +21["gfastats"]@{ shape: process } +12 --> 21 +19 --> 21 +22["gfastats"]@{ shape: process } +21 --> 22 +9 --> 22 +23["gfastats"]@{ shape: process } +21 --> 23 +11 --> 23 +24["gfastats"]@{ shape: process } +21 --> 24 +25["BWA-MEM2"]@{ shape: process } +5 --> 25 +22 --> 25 +26["BWA-MEM2"]@{ shape: process } +6 --> 26 +22 --> 26 +27["Busco"]@{ shape: process } +22 --> 27 +3 --> 27 +4 --> 27 +28["Replace"]@{ shape: process } +23 --> 28 +29["gfastats\_data\_prep"]@{ shape: subprocess } +24 --> 29 +30["Filter and merge"]@{ shape: process } +25 --> 30 +26 --> 30 +31["Cut"]@{ shape: process } +29 --> 31 +32["Cut"]@{ shape: process } +29 --> 32 +33["PretextMap"]@{ shape: process } +30 --> 33 +34["bedtools BAM to BED"]@{ shape: process } +30 --> 34 +35["Nx Plot"]@{ shape: process } +31 --> 35 +36["Size Plot"]@{ shape: process } +32 --> 36 +37["Pretext Snapshot"]@{ shape: process } +33 --> 37 +38["Sort"]@{ shape: process } +34 --> 38 +39["Extract dataset"]@{ shape: process } +37 --> 39 + +``` \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/kmer-profiling-hifi-VGP1/kmer-profiling-hifi-VGP1_diagram.md b/workflows/VGP-assembly-v2/kmer-profiling-hifi-VGP1/kmer-profiling-hifi-VGP1_diagram.md new file mode 100644 index 000000000..611692fae --- /dev/null +++ b/workflows/VGP-assembly-v2/kmer-profiling-hifi-VGP1/kmer-profiling-hifi-VGP1_diagram.md @@ -0,0 +1,18 @@ +```mermaid +graph LR +0["Collection of Pacbio Data"]@{ shape: docs } +1["K-mer length "]@{ shape: lean-l } +2["Ploidy"]@{ shape: lean-l } +3["Meryl"]@{ shape: process } +0 --> 3 +1 --> 3 +4["Meryl"]@{ shape: process } +3 --> 4 +5["Meryl"]@{ shape: process } +4 --> 5 +6["GenomeScope"]@{ shape: process } +5 --> 6 +1 --> 6 +2 --> 6 + +``` \ No newline at end of file diff --git a/workflows/VGP-assembly-v2/kmer-profiling-hifi-trio-VGP2/kmer-profiling-hifi-trio-VGP2_diagram.md b/workflows/VGP-assembly-v2/kmer-profiling-hifi-trio-VGP2/kmer-profiling-hifi-trio-VGP2_diagram.md new file mode 100644 index 000000000..d756b2cb4 --- /dev/null +++ b/workflows/VGP-assembly-v2/kmer-profiling-hifi-trio-VGP2/kmer-profiling-hifi-trio-VGP2_diagram.md @@ -0,0 +1,40 @@ +```mermaid +graph LR +0["Pacbio Hifi reads"]@{ shape: docs } +1["Paternal reads"]@{ shape: docs } +2["Maternal reads"]@{ shape: docs } +3["K-mer length"]@{ shape: lean-l } +4["Ploidy"]@{ shape: lean-l } +5["Meryl"]@{ shape: process } +1 --> 5 +3 --> 5 +6["Meryl"]@{ shape: process } +0 --> 6 +2 --> 6 +3 --> 6 +1 --> 6 +7["Meryl"]@{ shape: process } +2 --> 7 +3 --> 7 +8["Meryl"]@{ shape: process } +5 --> 8 +9["GenomeScope"]@{ shape: process } +6 --> 9 +3 --> 9 +4 --> 9 +10["Meryl"]@{ shape: process } +7 --> 10 +11["Meryl"]@{ shape: process } +8 --> 11 +12["Meryl"]@{ shape: process } +10 --> 12 +13["Genomescope on paternal haplotype"]@{ shape: process } +11 --> 13 +3 --> 13 +4 --> 13 +14["Genomescope on maternal haplotype"]@{ shape: process } +12 --> 14 +3 --> 14 +4 --> 14 + +``` \ No newline at end of file diff --git a/workflows/amplicon/dada2/dada2_paired_diagram.md b/workflows/amplicon/dada2/dada2_paired_diagram.md new file mode 100644 index 000000000..66e89a9bd --- /dev/null +++ b/workflows/amplicon/dada2/dada2_paired_diagram.md @@ -0,0 +1,52 @@ +```mermaid +graph LR +0["Paired input data"]@{ shape: docs } +1["Read length forward read"]@{ shape: lean-l } +2["Read length reverse read"]@{ shape: lean-l } +3["Pool samples"]@{ shape: lean-l } +4["Cached reference database"]@{ shape: lean-l } +5["Sort samples"]@{ shape: process } +0 --> 5 +6["QualityProfile before filterAndTrim"]@{ shape: process } +5 --> 6 +7["dada2: filterAndTrim"]@{ shape: process } +5 --> 7 +2 --> 7 +1 --> 7 +8["QualityProfile after filterAndTrim"]@{ shape: process } +7 --> 8 +9["Unzip collection"]@{ shape: process } +7 --> 9 +10["dada2: learnErrors"]@{ shape: process } +9 --> 10 +11["dada2: learnErrors"]@{ shape: process } +9 --> 11 +12["dada2: dada"]@{ shape: process } +9 --> 12 +3 --> 12 +10 --> 12 +13["dada2: dada"]@{ shape: process } +9 --> 13 +3 --> 13 +11 --> 13 +14["dada2: mergePairs"]@{ shape: process } +12 --> 14 +13 --> 14 +9 --> 14 +9 --> 14 +15["dada2: makeSequenceTable"]@{ shape: process } +14 --> 15 +16["dada2: removeBimeraDenovo"]@{ shape: process } +15 --> 16 +17["dada2: sequence counts"]@{ shape: process } +7 --> 17 +12 --> 17 +13 --> 17 +14 --> 17 +15 --> 17 +16 --> 17 +18["dada2: assignTaxonomy"]@{ shape: process } +4 --> 18 +16 --> 18 + +``` \ No newline at end of file diff --git a/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ia-multiplexed-data-single-end_diagram.md b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ia-multiplexed-data-single-end_diagram.md new file mode 100644 index 000000000..887d1e0f3 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ia-multiplexed-data-single-end_diagram.md @@ -0,0 +1,21 @@ +```mermaid +graph LR +0["Sequences"]@{ shape: doc } +1["Barcodes"]@{ shape: doc } +2["Metadata"]@{ shape: doc } +3["Metadata parameter"]@{ shape: lean-l } +4["Reverse complement barcodes"]@{ shape: lean-l } +5["Input files"]@{ shape: process } +1 --> 5 +0 --> 5 +6["Metadata as artifact"]@{ shape: process } +2 --> 6 +7["Demultiplex single-end data"]@{ shape: process } +4 --> 7 +3 --> 7 +6 --> 7 +5 --> 7 +8["Summarize demultiplexed output"]@{ shape: process } +7 --> 8 + +``` \ No newline at end of file diff --git a/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ib-multiplexed-data-paired-end_diagram.md b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ib-multiplexed-data-paired-end_diagram.md new file mode 100644 index 000000000..eb8eb0622 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ib-multiplexed-data-paired-end_diagram.md @@ -0,0 +1,23 @@ +```mermaid +graph LR +0["Forward sequences"]@{ shape: doc } +1["Reverse sequences"]@{ shape: doc } +2["Barcodes"]@{ shape: doc } +3["Metadata"]@{ shape: doc } +4["Metadata parameter"]@{ shape: lean-l } +5["Reverse complement of barcodes needed?"]@{ shape: lean-l } +6["Import data into the pipeline"]@{ shape: process } +2 --> 6 +0 --> 6 +1 --> 6 +7["Metadata as artifact"]@{ shape: process } +3 --> 7 +8["Demultiplex paired-end sequences"]@{ shape: process } +5 --> 8 +4 --> 8 +7 --> 8 +6 --> 8 +9["Summarising the demultiplexed output"]@{ shape: process } +8 --> 9 + +``` \ No newline at end of file diff --git a/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ic-demultiplexed-data-single-end_diagram.md b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ic-demultiplexed-data-single-end_diagram.md new file mode 100644 index 000000000..186e2e242 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ic-demultiplexed-data-single-end_diagram.md @@ -0,0 +1,16 @@ +```mermaid +graph LR +0["Sequence collection"]@{ shape: docs } +1["Extract element identifiers"]@{ shape: process } +0 --> 1 +2["Screening laneless and single-lane"]@{ shape: process } +1 --> 2 +3["Relabel sequence files"]@{ shape: process } +2 --> 3 +0 --> 3 +4["Import data into the pipeline"]@{ shape: process } +3 --> 4 +5["Summarising the demultiplexed output"]@{ shape: process } +4 --> 5 + +``` \ No newline at end of file diff --git a/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Id-demultiplexed-data-paired-end_diagram.md b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Id-demultiplexed-data-paired-end_diagram.md new file mode 100644 index 000000000..186e2e242 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Id-demultiplexed-data-paired-end_diagram.md @@ -0,0 +1,16 @@ +```mermaid +graph LR +0["Sequence collection"]@{ shape: docs } +1["Extract element identifiers"]@{ shape: process } +0 --> 1 +2["Screening laneless and single-lane"]@{ shape: process } +1 --> 2 +3["Relabel sequence files"]@{ shape: process } +2 --> 3 +0 --> 3 +4["Import data into the pipeline"]@{ shape: process } +3 --> 4 +5["Summarising the demultiplexed output"]@{ shape: process } +4 --> 5 + +``` \ No newline at end of file diff --git a/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIa-denoising-and-feature-table-creation-single-end_diagram.md b/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIa-denoising-and-feature-table-creation-single-end_diagram.md new file mode 100644 index 000000000..a3822b710 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIa-denoising-and-feature-table-creation-single-end_diagram.md @@ -0,0 +1,19 @@ +```mermaid +graph LR +0["Metadata"]@{ shape: doc } +1["Demultiplexed sequences"]@{ shape: doc } +2["Truncation length"]@{ shape: lean-l } +3["Trimming length"]@{ shape: lean-l } +4["Denoising the datasets"]@{ shape: process } +3 --> 4 +1 --> 4 +2 --> 4 +5["Tabulate DADA2 denoised representative sequences"]@{ shape: process } +4 --> 5 +6["Tabulate DADA2 statistical metadata "]@{ shape: process } +4 --> 6 +7["Summing up the dada2 output table"]@{ shape: process } +0 --> 7 +4 --> 7 + +``` \ No newline at end of file diff --git a/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIb-denoising-and-feature-table-creation-paired-end_diagram.md b/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIb-denoising-and-feature-table-creation-paired-end_diagram.md new file mode 100644 index 000000000..e83728c1a --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIb-denoising-and-feature-table-creation-paired-end_diagram.md @@ -0,0 +1,23 @@ +```mermaid +graph LR +0["Metadata"]@{ shape: doc } +1["Demultiplexed sequences"]@{ shape: doc } +2["Truncation length \(forward\)"]@{ shape: lean-l } +3["Truncation length \(reverse\)"]@{ shape: lean-l } +4["Trimming length \(forward\)"]@{ shape: lean-l } +5["Trimming length \(reverse\)"]@{ shape: lean-l } +6["Denoising the datasets"]@{ shape: process } +4 --> 6 +5 --> 6 +1 --> 6 +2 --> 6 +3 --> 6 +7["Tabulate DADA2 denoised representative sequences"]@{ shape: process } +6 --> 7 +8["Tabulate DADA2 statistical metadata "]@{ shape: process } +6 --> 8 +9["Summing up the dada2 output table"]@{ shape: process } +0 --> 9 +6 --> 9 + +``` \ No newline at end of file diff --git a/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-III-V-Phylogeny-Rarefaction-Taxonomic-Analysis_diagram.md b/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-III-V-Phylogeny-Rarefaction-Taxonomic-Analysis_diagram.md new file mode 100644 index 000000000..517f00d00 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-III-V-Phylogeny-Rarefaction-Taxonomic-Analysis_diagram.md @@ -0,0 +1,25 @@ +```mermaid +graph LR +0["Representative sequences"]@{ shape: doc } +1["Feature table"]@{ shape: doc } +2["Metadata"]@{ shape: doc } +3["Minimum depth"]@{ shape: lean-l } +4["Maximum depth"]@{ shape: lean-l } +5["SEPP fragment insertion reference"]@{ shape: doc } +6["Taxonomic classifier"]@{ shape: doc } +7["Phylogenetic tree for diversity analysis"]@{ shape: subprocess } +0 --> 7 +5 --> 7 +8["Taxonomic analysis"]@{ shape: subprocess } +1 --> 8 +0 --> 8 +2 --> 8 +6 --> 8 +9["Rarefaction"]@{ shape: subprocess } +1 --> 9 +4 --> 9 +2 --> 9 +3 --> 9 +7 --> 9 + +``` \ No newline at end of file diff --git a/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-VI-diversity-metrics-and-estimations_diagram.md b/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-VI-diversity-metrics-and-estimations_diagram.md new file mode 100644 index 000000000..e939d57dd --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-VI-diversity-metrics-and-estimations_diagram.md @@ -0,0 +1,58 @@ +```mermaid +graph LR +0["Sampling depth"]@{ shape: lean-l } +1["Metadata"]@{ shape: doc } +2["Feature table"]@{ shape: doc } +3["Rooted tree"]@{ shape: doc } +4["Target metadata parameter \(for beta diversity\)"]@{ shape: lean-l } +5["qiime2 tools import"]@{ shape: process } +1 --> 5 +6["Diversity metrics"]@{ shape: process } +1 --> 6 +3 --> 6 +0 --> 6 +2 --> 6 +7["Alpha diversity metrics - Pielou's evenness"]@{ shape: process } +6 --> 7 +1 --> 7 +8["Alpha diversity metrics - Observed features"]@{ shape: process } +6 --> 8 +1 --> 8 +9["Alpha diversity metrics - Shannon's diversity index"]@{ shape: process } +6 --> 9 +1 --> 9 +10["Beta diversity - Jaccard distance matrix"]@{ shape: process } +6 --> 10 +4 --> 10 +5 --> 10 +11["Beta diversity - Bray-Curtis distance matrix"]@{ shape: process } +6 --> 11 +4 --> 11 +5 --> 11 +12["Emperor plot collection"]@{ shape: process } +6 --> 12 +6 --> 12 +6 --> 12 +6 --> 12 +13["Beta diversity - weighted UniFrac distance matrix"]@{ shape: process } +6 --> 13 +4 --> 13 +5 --> 13 +14["PCoA collection"]@{ shape: process } +6 --> 14 +6 --> 14 +6 --> 14 +6 --> 14 +15["Distance matrix collection"]@{ shape: process } +6 --> 15 +6 --> 15 +6 --> 15 +6 --> 15 +16["Richness and evenness collection"]@{ shape: process } +6 --> 16 +6 --> 16 +6 --> 16 +6 --> 16 +6 --> 16 + +``` \ No newline at end of file diff --git a/workflows/bacterial_genomics/amr_gene_detection/amr_gene_detection_diagram.md b/workflows/bacterial_genomics/amr_gene_detection/amr_gene_detection_diagram.md new file mode 100644 index 000000000..0a45d4787 --- /dev/null +++ b/workflows/bacterial_genomics/amr_gene_detection/amr_gene_detection_diagram.md @@ -0,0 +1,29 @@ +```mermaid +graph LR +0["Input sequence fasta"]@{ shape: doc } +1["Select a taxonomy group point mutation"]@{ shape: lean-l } +2["Select a AMR genes detection database"]@{ shape: lean-l } +3["Select a virulence genes detection database"]@{ shape: lean-l } +4["staramr\_amr\_genes"]@{ shape: process } +0 --> 4 +5["amrfinderplus\_point\_mutation"]@{ shape: process } +2 --> 5 +0 --> 5 +1 --> 5 +6["abricate\_virulence"]@{ shape: process } +3 --> 6 +0 --> 6 +7["ToolDistillator"]@{ shape: process } +6 --> 7 +3 --> 7 +4 --> 7 +4 --> 7 +4 --> 7 +5 --> 7 +5 --> 7 +5 --> 7 +2 --> 7 +8["ToolDistillator summarize"]@{ shape: process } +7 --> 8 + +``` \ No newline at end of file diff --git a/workflows/bacterial_genomics/bacterial_genome_annotation/bacterial_genome_annotation_diagram.md b/workflows/bacterial_genomics/bacterial_genome_annotation/bacterial_genome_annotation_diagram.md new file mode 100644 index 000000000..8c67c7091 --- /dev/null +++ b/workflows/bacterial_genomics/bacterial_genome_annotation/bacterial_genome_annotation_diagram.md @@ -0,0 +1,48 @@ +```mermaid +graph LR +0["Input sequence fasta"]@{ shape: doc } +1["Select a plasmid detection database"]@{ shape: lean-l } +2["Select a bacterial genome annotation database"]@{ shape: lean-l } +3["Select a AMRFinderPlus database"]@{ shape: lean-l } +4["genomic\_annotation\_insertionelement\_isescan"]@{ shape: process } +0 --> 4 +5["genomic\_annotation\_integron"]@{ shape: process } +0 --> 5 +6["genomic\_annotation\_plasmid\_plasmidfinder"]@{ shape: process } +1 --> 6 +0 --> 6 +7["Bakta"]@{ shape: process } +3 --> 7 +2 --> 7 +0 --> 7 +8["ToolDistillator"]@{ shape: process } +6 --> 8 +6 --> 8 +6 --> 8 +6 --> 8 +1 --> 8 +4 --> 8 +4 --> 8 +4 --> 8 +4 --> 8 +4 --> 8 +4 --> 8 +5 --> 8 +5 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +2 --> 8 +7 --> 8 +9["ToolDistillator summarize"]@{ shape: process } +8 --> 9 + +``` \ No newline at end of file diff --git a/workflows/computational-chemistry/fragment-based-docking-scoring/fragment-based-docking-scoring_diagram.md b/workflows/computational-chemistry/fragment-based-docking-scoring/fragment-based-docking-scoring_diagram.md new file mode 100644 index 000000000..62a04bf69 --- /dev/null +++ b/workflows/computational-chemistry/fragment-based-docking-scoring/fragment-based-docking-scoring_diagram.md @@ -0,0 +1,40 @@ +```mermaid +graph LR +0["Number of poses"]@{ shape: lean-l } +1["Receptor \(PDB\)"]@{ shape: doc } +2["All fragments \(SDF\)"]@{ shape: doc } +3["Collection size for docking"]@{ shape: lean-l } +4["SuCOS threshold"]@{ shape: lean-l } +5["Fragment for SuCOS scoring \(SDF/MOL\)"]@{ shape: doc } +6["Candidate compounds \(SMILES\)"]@{ shape: doc } +7["Compound conversion"]@{ shape: process } +1 --> 7 +8["Create Frankenstein ligand"]@{ shape: process } +2 --> 8 +9["Compose text parameter value"]@{ shape: process } +4 --> 9 +10["Enumerate changes"]@{ shape: process } +6 --> 10 +11["rDock cavity definition"]@{ shape: process } +8 --> 11 +7 --> 11 +12["Compound conversion"]@{ shape: process } +10 --> 12 +13["Split file"]@{ shape: process } +12 --> 13 +3 --> 13 +14["rDock docking"]@{ shape: process } +11 --> 14 +13 --> 14 +0 --> 14 +7 --> 14 +15["Collapse Collection"]@{ shape: process } +14 --> 15 +16["Score docked poses using SuCOS"]@{ shape: process } +15 --> 16 +5 --> 16 +17["rDock docking"]@{ shape: process } +9 --> 17 +16 --> 17 + +``` \ No newline at end of file diff --git a/workflows/computational-chemistry/gromacs-dctmd/gromacs-dctmd_diagram.md b/workflows/computational-chemistry/gromacs-dctmd/gromacs-dctmd_diagram.md new file mode 100644 index 000000000..3912aa871 --- /dev/null +++ b/workflows/computational-chemistry/gromacs-dctmd/gromacs-dctmd_diagram.md @@ -0,0 +1,84 @@ +```mermaid +graph LR +0["Ligand SDF"]@{ shape: doc } +1["pH to protonate ligand"]@{ shape: lean-l } +2["Protein PDB"]@{ shape: doc } +3["Salt concentration"]@{ shape: lean-l } +4["Water model"]@{ shape: lean-l } +5["Force field"]@{ shape: lean-l } +6["Number of simulations"]@{ shape: lean-l } +7["Temperature"]@{ shape: lean-l } +8["Number of equilibration steps"]@{ shape: lean-l } +9["Online data"]@{ shape: process } +10["Pulling rate"]@{ shape: lean-l } +11["Step length \(ps\)"]@{ shape: lean-l } +12["Protein pull group"]@{ shape: lean-l } +13["Number of steps"]@{ shape: lean-l } +14["Pull group pbcatom"]@{ shape: lean-l } +15["Create GRO and TOP complex files"]@{ shape: subprocess } +2 --> 15 +5 --> 15 +0 --> 15 +4 --> 15 +1 --> 15 +16["Create text file"]@{ shape: process } +7 --> 16 +6 --> 16 +17["Compose text parameter value"]@{ shape: process } +10 --> 17 +18["Compose text parameter value"]@{ shape: process } +11 --> 18 +19["Compose text parameter value"]@{ shape: process } +13 --> 19 +20["Compose text parameter value"]@{ shape: process } +14 --> 20 +21["GROMACS solvation and adding ions"]@{ shape: process } +3 --> 21 +15 --> 21 +15 --> 21 +22["Split file"]@{ shape: process } +16 --> 22 +23["Add line to file"]@{ shape: process } +9 --> 23 +17 --> 23 +24["GROMACS energy minimization"]@{ shape: process } +21 --> 24 +21 --> 24 +25["Parse parameter value"]@{ shape: process } +22 --> 25 +26["Add line to file"]@{ shape: process } +23 --> 26 +19 --> 26 +27["Create GROMACS index files"]@{ shape: process } +24 --> 27 +28["GROMACS simulation"]@{ shape: process } +24 --> 28 +15 --> 28 +8 --> 28 +11 --> 28 +25 --> 28 +21 --> 28 +29["Add line to file"]@{ shape: process } +26 --> 29 +18 --> 29 +30["Text transformation"]@{ shape: process } +27 --> 30 +31["Add line to file"]@{ shape: process } +29 --> 31 +20 --> 31 +32["Add line to file"]@{ shape: process } +30 --> 32 +12 --> 32 +33["Concatenate datasets"]@{ shape: process } +27 --> 33 +32 --> 33 +34["GROMACS simulation"]@{ shape: process } +28 --> 34 +28 --> 34 +33 --> 34 +31 --> 34 +21 --> 34 +35["dcTMD friction correction"]@{ shape: process } +34 --> 35 + +``` \ No newline at end of file diff --git a/workflows/computational-chemistry/gromacs-mmgbsa/gromacs-mmgbsa_diagram.md b/workflows/computational-chemistry/gromacs-mmgbsa/gromacs-mmgbsa_diagram.md new file mode 100644 index 000000000..82f8e7bff --- /dev/null +++ b/workflows/computational-chemistry/gromacs-mmgbsa/gromacs-mmgbsa_diagram.md @@ -0,0 +1,73 @@ +```mermaid +graph LR +0["Salt concentration"]@{ shape: lean-l } +1["Number of simulations"]@{ shape: lean-l } +2["Apoprotein PDB"]@{ shape: doc } +3["Water model"]@{ shape: lean-l } +4["pH"]@{ shape: lean-l } +5["Force field"]@{ shape: lean-l } +6["Ligand SDF"]@{ shape: doc } +7["NVT equilibration steps"]@{ shape: lean-l } +8["NPT equilibration steps"]@{ shape: lean-l } +9["Production steps"]@{ shape: lean-l } +10["Compose text parameter value"]@{ shape: process } +0 --> 10 +11["Create GRO and TOP complex files"]@{ shape: subprocess } +2 --> 11 +5 --> 11 +6 --> 11 +3 --> 11 +4 --> 11 +12["Create text file"]@{ shape: process } +10 --> 12 +1 --> 12 +13["GROMACS structure configuration"]@{ shape: process } +11 --> 13 +14["Split file"]@{ shape: process } +12 --> 14 +15["Parse parameter value"]@{ shape: process } +14 --> 15 +16["GROMACS solvation and adding ions"]@{ shape: process } +15 --> 16 +13 --> 16 +11 --> 16 +17["GROMACS energy minimization"]@{ shape: process } +16 --> 17 +16 --> 17 +18["Convert Parameters"]@{ shape: process } +16 --> 18 +16 --> 18 +19["GROMACS simulation"]@{ shape: process } +17 --> 19 +11 --> 19 +7 --> 19 +16 --> 19 +20["GROMACS simulation"]@{ shape: process } +19 --> 20 +19 --> 20 +11 --> 20 +8 --> 20 +16 --> 20 +21["GROMACS simulation"]@{ shape: process } +20 --> 21 +20 --> 21 +9 --> 21 +16 --> 21 +22["MDTraj file converter"]@{ shape: process } +21 --> 22 +23["MMPBSA/MMGBSA"]@{ shape: process } +18 --> 23 +18 --> 23 +18 --> 23 +18 --> 23 +22 --> 23 +24["Search in textfiles"]@{ shape: process } +23 --> 24 +25["Collapse Collection"]@{ shape: process } +24 --> 25 +26["Cut"]@{ shape: process } +25 --> 26 +27["Summary Statistics"]@{ shape: process } +26 --> 27 + +``` \ No newline at end of file diff --git a/workflows/computational-chemistry/protein-ligand-complex-parameterization/protein-ligand-complex-parameterization_diagram.md b/workflows/computational-chemistry/protein-ligand-complex-parameterization/protein-ligand-complex-parameterization_diagram.md new file mode 100644 index 000000000..4e3f76996 --- /dev/null +++ b/workflows/computational-chemistry/protein-ligand-complex-parameterization/protein-ligand-complex-parameterization_diagram.md @@ -0,0 +1,35 @@ +```mermaid +graph LR +0["pH"]@{ shape: lean-l } +1["Ligand SDF"]@{ shape: doc } +2["Apoprotein PDB"]@{ shape: doc } +3["Water model"]@{ shape: lean-l } +4["Force field"]@{ shape: lean-l } +5["Compound conversion"]@{ shape: process } +1 --> 5 +0 --> 5 +6["Descriptors"]@{ shape: process } +1 --> 6 +7["GROMACS initial setup"]@{ shape: process } +4 --> 7 +2 --> 7 +3 --> 7 +8["Search in textfiles"]@{ shape: process } +5 --> 8 +9["Cut"]@{ shape: process } +6 --> 9 +10["Parse parameter value"]@{ shape: process } +9 --> 10 +11["AnteChamber"]@{ shape: process } +10 --> 11 +8 --> 11 +12["Generate MD topologies for small molecules"]@{ shape: process } +10 --> 12 +11 --> 12 +13["Merge GROMACS topologies"]@{ shape: process } +12 --> 13 +12 --> 13 +7 --> 13 +7 --> 13 + +``` \ No newline at end of file diff --git a/workflows/data-fetching/parallel-accession-download/parallel-accession-download_diagram.md b/workflows/data-fetching/parallel-accession-download/parallel-accession-download_diagram.md new file mode 100644 index 000000000..e24a22e13 --- /dev/null +++ b/workflows/data-fetching/parallel-accession-download/parallel-accession-download_diagram.md @@ -0,0 +1,13 @@ +```mermaid +graph LR +0["Run accessions"]@{ shape: doc } +1["Split accessions to collection"]@{ shape: process } +0 --> 1 +2["fasterq-dump"]@{ shape: process } +1 --> 2 +3["flatten paired output"]@{ shape: process } +2 --> 3 +4["flatten single end output"]@{ shape: process } +2 --> 4 + +``` \ No newline at end of file diff --git a/workflows/data-fetching/sra-manifest-to-concatenated-fastqs/sra-manifest-to-concatenated-fastqs_diagram.md b/workflows/data-fetching/sra-manifest-to-concatenated-fastqs/sra-manifest-to-concatenated-fastqs_diagram.md new file mode 100644 index 000000000..a2d8754c8 --- /dev/null +++ b/workflows/data-fetching/sra-manifest-to-concatenated-fastqs/sra-manifest-to-concatenated-fastqs_diagram.md @@ -0,0 +1,37 @@ +```mermaid +graph LR +0["SRA\_manifest"]@{ shape: doc } +1["Column number with SRA ID"]@{ shape: lean-l } +2["Column number with final identifier"]@{ shape: lean-l } +3["Set SRA column to 1 if at 0"]@{ shape: process } +1 --> 3 +4["Compute column expression"]@{ shape: process } +3 --> 4 +2 --> 4 +5["Cut columns of interest"]@{ shape: process } +4 --> 5 +0 --> 5 +6["generate table for relabelling"]@{ shape: process } +5 --> 6 +7["Cut to get only SRA"]@{ shape: process } +5 --> 7 +8["split file to get one SRA per file \+ header"]@{ shape: process } +7 --> 8 +9["get Fastqs from SRA IDs"]@{ shape: process } +8 --> 9 +10["relabel pair collec to get SRA\+sample"]@{ shape: process } +6 --> 10 +9 --> 10 +11["relabel single collec to get SRA\+sample"]@{ shape: process } +6 --> 11 +9 --> 11 +12["Apply rules"]@{ shape: process } +10 --> 12 +13["Apply rules"]@{ shape: process } +11 --> 13 +14["Concatenate multiple datasets"]@{ shape: process } +12 --> 14 +15["Concatenate multiple datasets"]@{ shape: process } +13 --> 15 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/atacseq/atacseq_diagram.md b/workflows/epigenetics/atacseq/atacseq_diagram.md new file mode 100644 index 000000000..48d225d06 --- /dev/null +++ b/workflows/epigenetics/atacseq/atacseq_diagram.md @@ -0,0 +1,77 @@ +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["reference\_genome"]@{ shape: lean-l } +2["effective\_genome\_size"]@{ shape: lean-l } +3["bin\_size"]@{ shape: lean-l } +4["Cutadapt \(remove adapter \+ bad quality bases\)"]@{ shape: process } +0 --> 4 +5["Bowtie2 map on reference"]@{ shape: process } +4 --> 5 +1 --> 5 +6["filter MAPQ30 concordant pairs and not mitochondrial pairs"]@{ shape: process } +5 --> 6 +7["Get number of reads per chromosome"]@{ shape: process } +5 --> 7 +8["remove PCR duplicates"]@{ shape: process } +6 --> 8 +9["reads in chrM/MT for multiQC"]@{ shape: process } +7 --> 9 +10["convert BAM to BED to improve peak calling"]@{ shape: process } +8 --> 10 +11["Compute fragment length histogram"]@{ shape: process } +8 --> 11 +12["number of reads"]@{ shape: process } +8 --> 12 +13["Call Peak with MACS2"]@{ shape: process } +2 --> 13 +10 --> 13 +14["remove comments lines"]@{ shape: process } +11 --> 14 +15["compute 1/million reads"]@{ shape: process } +12 --> 15 +16["Bigwig from MACS2 \(no norm\)"]@{ shape: process } +13 --> 16 +17["get summits \+/-500kb"]@{ shape: process } +1 --> 17 +13 --> 17 +18["summary of MACS2"]@{ shape: process } +13 --> 18 +19["Convert 1/million reads to parameter"]@{ shape: process } +15 --> 19 +20["Isolate each bigwig do normalize not average"]@{ shape: process } +16 --> 20 +21["Merge summits \+/-500kb"]@{ shape: process } +17 --> 21 +22["normalize by million reads"]@{ shape: process } +3 --> 22 +19 --> 22 +20 --> 22 +23["Compute coverage on summits \+/-500kb"]@{ shape: process } +21 --> 23 +8 --> 23 +24["number of reads in peaks"]@{ shape: process } +23 --> 24 +25["compute 1/million reads in peaks"]@{ shape: process } +24 --> 25 +26["Combine number of reads in peaks with total number of reads"]@{ shape: process } +24 --> 26 +12 --> 26 +27["Convert 1/million reads in peaks to parameter"]@{ shape: process } +25 --> 27 +28["reads in peaks multiQC"]@{ shape: process } +26 --> 28 +29["normalize by million reads in peaks"]@{ shape: process } +3 --> 29 +27 --> 29 +20 --> 29 +30["MultiQC"]@{ shape: process } +4 --> 30 +5 --> 30 +9 --> 30 +8 --> 30 +14 --> 30 +13 --> 30 +28 --> 30 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/average-bigwig-between-replicates/average-bigwig-between-replicates_diagram.md b/workflows/epigenetics/average-bigwig-between-replicates/average-bigwig-between-replicates_diagram.md new file mode 100644 index 000000000..77652e098 --- /dev/null +++ b/workflows/epigenetics/average-bigwig-between-replicates/average-bigwig-between-replicates_diagram.md @@ -0,0 +1,11 @@ +```mermaid +graph LR +0["Bigwig to average"]@{ shape: docs } +1["bin\_size"]@{ shape: lean-l } +2["Apply rules"]@{ shape: process } +0 --> 2 +3["average bigwigs from different replicates"]@{ shape: process } +1 --> 3 +2 --> 3 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/chipseq-pe/chipseq-pe_diagram.md b/workflows/epigenetics/chipseq-pe/chipseq-pe_diagram.md new file mode 100644 index 000000000..a28747d62 --- /dev/null +++ b/workflows/epigenetics/chipseq-pe/chipseq-pe_diagram.md @@ -0,0 +1,31 @@ +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["adapter\_forward"]@{ shape: lean-l } +2["adapter\_reverse"]@{ shape: lean-l } +3["reference\_genome"]@{ shape: lean-l } +4["effective\_genome\_size"]@{ shape: lean-l } +5["normalize\_profile"]@{ shape: lean-l } +6["Cutadapt \(remove adapter \+ bad quality bases\)"]@{ shape: process } +0 --> 6 +1 --> 6 +2 --> 6 +7["Bowtie2 map on reference"]@{ shape: process } +6 --> 7 +3 --> 7 +8["filter MAPQ30 concordent pairs"]@{ shape: process } +7 --> 8 +9["Call Peaks with MACS2"]@{ shape: process } +5 --> 9 +4 --> 9 +8 --> 9 +10["summary of MACS2"]@{ shape: process } +9 --> 10 +11["Bigwig from MACS2"]@{ shape: process } +9 --> 11 +12["MultiQC"]@{ shape: process } +6 --> 12 +7 --> 12 +9 --> 12 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/chipseq-sr/chipseq-sr_diagram.md b/workflows/epigenetics/chipseq-sr/chipseq-sr_diagram.md new file mode 100644 index 000000000..01a993084 --- /dev/null +++ b/workflows/epigenetics/chipseq-sr/chipseq-sr_diagram.md @@ -0,0 +1,29 @@ +```mermaid +graph LR +0["SR fastq input"]@{ shape: docs } +1["adapter\_forward"]@{ shape: lean-l } +2["reference\_genome"]@{ shape: lean-l } +3["effective\_genome\_size"]@{ shape: lean-l } +4["normalize\_profile"]@{ shape: lean-l } +5["Cutadapt \(remove adapter \+ bad quality bases\)"]@{ shape: process } +0 --> 5 +1 --> 5 +6["Bowtie2 map on reference"]@{ shape: process } +5 --> 6 +2 --> 6 +7["filter MAPQ30"]@{ shape: process } +6 --> 7 +8["Call Peaks with MACS2"]@{ shape: process } +4 --> 8 +3 --> 8 +7 --> 8 +9["summary of MACS2"]@{ shape: process } +8 --> 9 +10["Bigwig from MACS2"]@{ shape: process } +8 --> 10 +11["MultiQC"]@{ shape: process } +5 --> 11 +6 --> 11 +8 --> 11 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/consensus-peaks/consensus-peaks-atac-cutandrun_diagram.md b/workflows/epigenetics/consensus-peaks/consensus-peaks-atac-cutandrun_diagram.md new file mode 100644 index 000000000..64812ad7f --- /dev/null +++ b/workflows/epigenetics/consensus-peaks/consensus-peaks-atac-cutandrun_diagram.md @@ -0,0 +1,64 @@ +```mermaid +graph LR +0["n rmDup BAM"]@{ shape: docs } +1["Minimum number of overlap"]@{ shape: lean-l } +2["effective\_genome\_size"]@{ shape: lean-l } +3["bin\_size"]@{ shape: lean-l } +4["convert BAM to BED"]@{ shape: process } +0 --> 4 +5["count number of reads"]@{ shape: process } +0 --> 5 +6["generate filter rule"]@{ shape: process } +1 --> 6 +7["call peaks individually"]@{ shape: process } +2 --> 7 +4 --> 7 +8["put all nb of reads into single dataset"]@{ shape: process } +5 --> 8 +9["compute multi intersect"]@{ shape: process } +7 --> 9 +10["individual normalized bigwig"]@{ shape: process } +7 --> 10 +11["get min value"]@{ shape: process } +8 --> 11 +12["get nb of replicates"]@{ shape: process } +8 --> 12 +13["filter multi intersect"]@{ shape: process } +6 --> 13 +9 --> 13 +14["average coverage from replicates"]@{ shape: process } +3 --> 14 +10 --> 14 +15["convert min value to text"]@{ shape: process } +11 --> 15 +16["Parse parameter value"]@{ shape: process } +12 --> 16 +17["create a dataset with the min value as many times as there are replicates"]@{ shape: process } +15 --> 17 +16 --> 17 +18["split min value"]@{ shape: process } +17 --> 18 +19["convert min nb of reads to parameter"]@{ shape: process } +18 --> 19 +20["select random reads"]@{ shape: process } +0 --> 20 +19 --> 20 +21["convert subsampled bam to bed"]@{ shape: process } +20 --> 21 +22["call peaks on merge"]@{ shape: process } +2 --> 22 +21 --> 22 +23["get merged peaks overlapping at least x replicates"]@{ shape: process } +22 --> 23 +13 --> 23 +24["multiQC"]@{ shape: process } +7 --> 24 +22 --> 24 +25["only keep peaks with summits overlapping intersection of at least x replicates"]@{ shape: process } +23 --> 25 +26["keep only columns of narrowPeak"]@{ shape: process } +25 --> 26 +27["discard duplicated lines"]@{ shape: process } +26 --> 27 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-pe_diagram.md b/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-pe_diagram.md new file mode 100644 index 000000000..471dbdbc1 --- /dev/null +++ b/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-pe_diagram.md @@ -0,0 +1,60 @@ +```mermaid +graph LR +0["n rmDup BAMPE"]@{ shape: docs } +1["Minimum number of overlap"]@{ shape: lean-l } +2["effective\_genome\_size"]@{ shape: lean-l } +3["bin\_size"]@{ shape: lean-l } +4["count number of reads"]@{ shape: process } +0 --> 4 +5["generate filter rule"]@{ shape: process } +1 --> 5 +6["call peaks individually"]@{ shape: process } +2 --> 6 +0 --> 6 +7["put all nb of reads into single dataset"]@{ shape: process } +4 --> 7 +8["compute multi intersect"]@{ shape: process } +6 --> 8 +9["individual normalized bigwig"]@{ shape: process } +6 --> 9 +10["get min value"]@{ shape: process } +7 --> 10 +11["get nb of replicates"]@{ shape: process } +7 --> 11 +12["filter multi intersect"]@{ shape: process } +5 --> 12 +8 --> 12 +13["average coverage from replicates"]@{ shape: process } +3 --> 13 +9 --> 13 +14["convert min value to text"]@{ shape: process } +10 --> 14 +15["Parse parameter value"]@{ shape: process } +11 --> 15 +16["create a dataset with the min value as many times as there are replicates"]@{ shape: process } +14 --> 16 +15 --> 16 +17["split min value"]@{ shape: process } +16 --> 17 +18["convert min nb of reads to parameter"]@{ shape: process } +17 --> 18 +19["downsample BAM"]@{ shape: process } +0 --> 19 +18 --> 19 +20["call peaks on merge"]@{ shape: process } +2 --> 20 +19 --> 20 +21["get merged peaks overlapping at least x replicates"]@{ shape: process } +20 --> 21 +12 --> 21 +22["multiQC"]@{ shape: process } +6 --> 22 +20 --> 22 +23["only keep peaks with summits overlapping intersection of at least x replicates"]@{ shape: process } +21 --> 23 +24["keep only columns of narrowPeak"]@{ shape: process } +23 --> 24 +25["discard duplicated lines"]@{ shape: process } +24 --> 25 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-sr_diagram.md b/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-sr_diagram.md new file mode 100644 index 000000000..bcf46ccc4 --- /dev/null +++ b/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-sr_diagram.md @@ -0,0 +1,60 @@ +```mermaid +graph LR +0["n rmDup BAMSR"]@{ shape: docs } +1["Minimum number of overlap"]@{ shape: lean-l } +2["effective\_genome\_size"]@{ shape: lean-l } +3["bin\_size"]@{ shape: lean-l } +4["count number of reads"]@{ shape: process } +0 --> 4 +5["generate filter rule"]@{ shape: process } +1 --> 5 +6["call peaks individually"]@{ shape: process } +2 --> 6 +0 --> 6 +7["put all nb of reads into single dataset"]@{ shape: process } +4 --> 7 +8["compute multi intersect"]@{ shape: process } +6 --> 8 +9["individual normalized bigwig"]@{ shape: process } +6 --> 9 +10["get min value"]@{ shape: process } +7 --> 10 +11["get nb of replicates"]@{ shape: process } +7 --> 11 +12["filter multi intersect"]@{ shape: process } +5 --> 12 +8 --> 12 +13["average coverage from replicates"]@{ shape: process } +3 --> 13 +9 --> 13 +14["convert min value to text"]@{ shape: process } +10 --> 14 +15["Parse parameter value"]@{ shape: process } +11 --> 15 +16["create a dataset with the min value as many times as there are replicates"]@{ shape: process } +14 --> 16 +15 --> 16 +17["split min value"]@{ shape: process } +16 --> 17 +18["convert min nb of reads to parameter"]@{ shape: process } +17 --> 18 +19["downsample BAM"]@{ shape: process } +0 --> 19 +18 --> 19 +20["call peaks on merge"]@{ shape: process } +2 --> 20 +19 --> 20 +21["get merged peaks overlapping at least x replicates"]@{ shape: process } +20 --> 21 +12 --> 21 +22["multiQC"]@{ shape: process } +6 --> 22 +20 --> 22 +23["only keep peaks with summits overlapping intersection of at least x replicates"]@{ shape: process } +21 --> 23 +24["keep only columns of narrowPeak"]@{ shape: process } +23 --> 24 +25["discard duplicated lines"]@{ shape: process } +24 --> 25 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/cutandrun/cutandrun_diagram.md b/workflows/epigenetics/cutandrun/cutandrun_diagram.md new file mode 100644 index 000000000..4ed96f1ef --- /dev/null +++ b/workflows/epigenetics/cutandrun/cutandrun_diagram.md @@ -0,0 +1,36 @@ +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["adapter\_forward"]@{ shape: lean-l } +2["adapter\_reverse"]@{ shape: lean-l } +3["reference\_genome"]@{ shape: lean-l } +4["effective\_genome\_size"]@{ shape: lean-l } +5["normalize\_profile"]@{ shape: lean-l } +6["Cutadapt \(remove adapter \+ bad quality bases\)"]@{ shape: process } +0 --> 6 +1 --> 6 +2 --> 6 +7["Bowtie2 map on reference"]@{ shape: process } +6 --> 7 +3 --> 7 +8["filter MAPQ30 concordant pairs"]@{ shape: process } +7 --> 8 +9["remove PCR duplicates"]@{ shape: process } +8 --> 9 +10["convert BAM to BED to improve peak calling"]@{ shape: process } +9 --> 10 +11["Call Peaks with MACS2"]@{ shape: process } +5 --> 11 +4 --> 11 +10 --> 11 +12["summary of MACS2"]@{ shape: process } +11 --> 12 +13["Bigwig from MACS2"]@{ shape: process } +11 --> 13 +14["MultiQC"]@{ shape: process } +6 --> 14 +7 --> 14 +9 --> 14 +11 --> 14 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/hic-hicup-cooler/chic-fastq-to-cool-hicup-cooler_diagram.md b/workflows/epigenetics/hic-hicup-cooler/chic-fastq-to-cool-hicup-cooler_diagram.md new file mode 100644 index 000000000..634214d9f --- /dev/null +++ b/workflows/epigenetics/hic-hicup-cooler/chic-fastq-to-cool-hicup-cooler_diagram.md @@ -0,0 +1,45 @@ +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["genome name"]@{ shape: lean-l } +2["Restriction enzyme"]@{ shape: lean-l } +3["No fill-in"]@{ shape: lean-l } +4["minimum MAPQ"]@{ shape: lean-l } +5["Bin size in bp"]@{ shape: lean-l } +6["Interactions to consider to calculate weights in normalization step"]@{ shape: lean-l } +7["capture region \(chromosome\)"]@{ shape: lean-l } +8["capture region \(start\)"]@{ shape: lean-l } +9["capture region \(end\)"]@{ shape: lean-l } +10["Hi-C\_fastqToPairs\_hicup"]@{ shape: subprocess } +3 --> 10 +0 --> 10 +2 --> 10 +1 --> 10 +4 --> 10 +11["write filtering for capture region"]@{ shape: process } +8 --> 11 +7 --> 11 +9 --> 11 +8 --> 11 +7 --> 11 +9 --> 11 +12["write region for pyGenomeTracks"]@{ shape: process } +7 --> 12 +8 --> 12 +9 --> 12 +13["Filter for capture region"]@{ shape: process } +11 --> 13 +10 --> 13 +14["Sort filtered pairs and index"]@{ shape: process } +13 --> 14 +1 --> 14 +15["Hi-C\_juicermediumtabixToCool\_cooler"]@{ shape: subprocess } +5 --> 15 +6 --> 15 +14 --> 15 +1 --> 15 +16["final\_plot"]@{ shape: process } +12 --> 16 +15 --> 16 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-cool-hicup-cooler_diagram.md b/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-cool-hicup-cooler_diagram.md new file mode 100644 index 000000000..f9afe7a6f --- /dev/null +++ b/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-cool-hicup-cooler_diagram.md @@ -0,0 +1,29 @@ +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["genome name"]@{ shape: lean-l } +2["Restriction enzyme"]@{ shape: lean-l } +3["No fill-in"]@{ shape: lean-l } +4["minimum MAPQ"]@{ shape: lean-l } +5["Bin size in bp"]@{ shape: lean-l } +6["Interactions to consider to calculate weights in normalization step"]@{ shape: lean-l } +7["region for matrix plotting"]@{ shape: lean-l } +8["Hi-C\_fastqToPairs\_hicup"]@{ shape: subprocess } +3 --> 8 +0 --> 8 +2 --> 8 +1 --> 8 +4 --> 8 +9["Sort pairs and index"]@{ shape: process } +8 --> 9 +1 --> 9 +10["Hi-C\_juicermediumtabixToCool\_cooler"]@{ shape: subprocess } +5 --> 10 +6 --> 10 +9 --> 10 +1 --> 10 +11["final plot"]@{ shape: process } +7 --> 11 +10 --> 11 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-pairs-hicup_diagram.md b/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-pairs-hicup_diagram.md new file mode 100644 index 000000000..3878c2df0 --- /dev/null +++ b/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-pairs-hicup_diagram.md @@ -0,0 +1,24 @@ +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["genome name"]@{ shape: lean-l } +2["Restriction enzyme"]@{ shape: lean-l } +3["No fill-in"]@{ shape: lean-l } +4["minimum MAPQ"]@{ shape: lean-l } +5["HiCUP"]@{ shape: process } +3 --> 5 +1 --> 5 +0 --> 5 +2 --> 5 +1 --> 5 +6["build filtering rule for MAPQ"]@{ shape: process } +4 --> 6 +4 --> 6 +7["valid pairs in juicebox format"]@{ shape: process } +5 --> 7 +5 --> 7 +8["valid pairs in juicebox format MAPQ filtered"]@{ shape: process } +6 --> 8 +7 --> 8 + +``` \ No newline at end of file diff --git a/workflows/epigenetics/hic-hicup-cooler/hic-juicermediumtabix-to-cool-cooler_diagram.md b/workflows/epigenetics/hic-hicup-cooler/hic-juicermediumtabix-to-cool-cooler_diagram.md new file mode 100644 index 000000000..aea9c8873 --- /dev/null +++ b/workflows/epigenetics/hic-hicup-cooler/hic-juicermediumtabix-to-cool-cooler_diagram.md @@ -0,0 +1,18 @@ +```mermaid +graph LR +0["Bin size in bp"]@{ shape: lean-l } +1["genome name"]@{ shape: lean-l } +2["Juicer Medium Tabix with validPairs"]@{ shape: docs } +3["Interactions to consider to calculate weights in normalization step"]@{ shape: lean-l } +4["make bed with bins"]@{ shape: process } +0 --> 4 +1 --> 4 +5["Load pairs in matrix"]@{ shape: process } +1 --> 5 +2 --> 5 +4 --> 5 +6["ICE normalization"]@{ shape: process } +3 --> 6 +5 --> 6 + +``` \ No newline at end of file diff --git a/workflows/genome-assembly/assembly-with-flye/Genome-assembly-with-Flye_diagram.md b/workflows/genome-assembly/assembly-with-flye/Genome-assembly-with-Flye_diagram.md new file mode 100644 index 000000000..b335727e2 --- /dev/null +++ b/workflows/genome-assembly/assembly-with-flye/Genome-assembly-with-Flye_diagram.md @@ -0,0 +1,13 @@ +```mermaid +graph LR +0["Input sequence reads"]@{ shape: doc } +1["Flye: assembly"]@{ shape: process } +0 --> 1 +2["Quast genome report"]@{ shape: process } +1 --> 2 +3["Fasta statistics"]@{ shape: process } +1 --> 3 +4["Bandage image: Flye assembly"]@{ shape: process } +1 --> 4 + +``` \ No newline at end of file diff --git a/workflows/genome-assembly/bacterial-genome-assembly/bacterial_genome_assembly_diagram.md b/workflows/genome-assembly/bacterial-genome-assembly/bacterial_genome_assembly_diagram.md new file mode 100644 index 000000000..c0ac860e3 --- /dev/null +++ b/workflows/genome-assembly/bacterial-genome-assembly/bacterial_genome_assembly_diagram.md @@ -0,0 +1,30 @@ +```mermaid +graph LR +0["Input adapter trimmed sequence reads \(forward\)"]@{ shape: doc } +1["Input adapter trimmed sequence reads \(reverse\)"]@{ shape: doc } +2["shovill\_genome\_assembly"]@{ shape: process } +0 --> 2 +1 --> 2 +3["quast\_quality"]@{ shape: process } +2 --> 3 +0 --> 3 +1 --> 3 +4["refseqmasher\_genome"]@{ shape: process } +2 --> 4 +5["bandage\_contig\_graph\_stats"]@{ shape: process } +2 --> 5 +6["bandage\_contig\_graph\_plot"]@{ shape: process } +2 --> 6 +7["ToolDistillator"]@{ shape: process } +2 --> 7 +2 --> 7 +2 --> 7 +3 --> 7 +3 --> 7 +4 --> 7 +6 --> 7 +5 --> 7 +8["ToolDistillator summarize"]@{ shape: process } +7 --> 8 + +``` \ No newline at end of file diff --git a/workflows/genome-assembly/polish-with-long-reads/Assembly-polishing-with-long-reads_diagram.md b/workflows/genome-assembly/polish-with-long-reads/Assembly-polishing-with-long-reads_diagram.md new file mode 100644 index 000000000..fed543c70 --- /dev/null +++ b/workflows/genome-assembly/polish-with-long-reads/Assembly-polishing-with-long-reads_diagram.md @@ -0,0 +1,39 @@ +```mermaid +graph LR +0["Assembly to be polished"]@{ shape: doc } +1["long reads"]@{ shape: doc } +2["minimap setting \(for long reads\) "]@{ shape: lean-l } +3["Minimap2: map long reads to assembly"]@{ shape: process } +2 --> 3 +1 --> 3 +0 --> 3 +4["Racon: polish 1"]@{ shape: process } +0 --> 4 +3 --> 4 +1 --> 4 +5["Minimap2: map long reads to polished assembly 1"]@{ shape: process } +2 --> 5 +1 --> 5 +4 --> 5 +6["Racon: polish 2"]@{ shape: process } +4 --> 6 +5 --> 6 +1 --> 6 +7["Minimap2: map long reads to polished assembly 2"]@{ shape: process } +2 --> 7 +1 --> 7 +6 --> 7 +8["Racon: polish 3"]@{ shape: process } +6 --> 8 +7 --> 8 +1 --> 8 +9["Minimap2: map long reads to polished assembly 3"]@{ shape: process } +2 --> 9 +1 --> 9 +8 --> 9 +10["Racon: polish 4"]@{ shape: process } +8 --> 10 +9 --> 10 +1 --> 10 + +``` \ No newline at end of file diff --git a/workflows/genome-assembly/quality-and-contamination-control/quality_and_contamination_control_diagram.md b/workflows/genome-assembly/quality-and-contamination-control/quality_and_contamination_control_diagram.md new file mode 100644 index 000000000..063b7b977 --- /dev/null +++ b/workflows/genome-assembly/quality-and-contamination-control/quality_and_contamination_control_diagram.md @@ -0,0 +1,38 @@ +```mermaid +graph LR +0["Input sequence reads \(forward\)"]@{ shape: doc } +1["Input sequence reads \(reverse\)"]@{ shape: doc } +2["Select a taxonomy database"]@{ shape: lean-l } +3["Select a NCBI taxonomy database"]@{ shape: lean-l } +4["fastp\_trimming\_step"]@{ shape: process } +0 --> 4 +1 --> 4 +5["kraken\_taxonomy\_assignation"]@{ shape: process } +2 --> 5 +4 --> 5 +4 --> 5 +6["bracken\_abundance\_estimation"]@{ shape: process } +5 --> 6 +2 --> 6 +7["recentrifuge\_taxonomy\_visualization"]@{ shape: process } +3 --> 7 +5 --> 7 +8["ToolDistillator"]@{ shape: process } +4 --> 8 +4 --> 8 +4 --> 8 +4 --> 8 +5 --> 8 +2 --> 8 +5 --> 8 +6 --> 8 +6 --> 8 +2 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +3 --> 8 +9["ToolDistillator summarize"]@{ shape: process } +8 --> 9 + +``` \ No newline at end of file diff --git a/workflows/imaging/fluorescence-nuclei-segmentation-and-counting/segmentation-and-counting_diagram.md b/workflows/imaging/fluorescence-nuclei-segmentation-and-counting/segmentation-and-counting_diagram.md new file mode 100644 index 000000000..e033b639e --- /dev/null +++ b/workflows/imaging/fluorescence-nuclei-segmentation-and-counting/segmentation-and-counting_diagram.md @@ -0,0 +1,20 @@ +```mermaid +graph LR +0["input\_image"]@{ shape: doc } +1["Filter 2-D image"]@{ shape: process } +0 --> 1 +2["Perform histogram equalization"]@{ shape: process } +0 --> 2 +3["Threshold image"]@{ shape: process } +1 --> 3 +4["Convert image format"]@{ shape: process } +2 --> 4 +5["Convert binary image to label map"]@{ shape: process } +3 --> 5 +6["Overlay images"]@{ shape: process } +4 --> 6 +5 --> 6 +7["Count objects in label map"]@{ shape: process } +5 --> 7 + +``` \ No newline at end of file diff --git a/workflows/metabomics/gcms-metams/Mass-spectrometry__GCMS-with-metaMS_diagram.md b/workflows/metabomics/gcms-metams/Mass-spectrometry__GCMS-with-metaMS_diagram.md new file mode 100644 index 000000000..00f3aff13 --- /dev/null +++ b/workflows/metabomics/gcms-metams/Mass-spectrometry__GCMS-with-metaMS_diagram.md @@ -0,0 +1,26 @@ +```mermaid +graph LR +0["Mass-spectrometry Dataset Collection"]@{ shape: docs } +1["sampleMetadata"]@{ shape: doc } +2["MSnbase readMSData"]@{ shape: process } +0 --> 2 +3["xcms findChromPeaks \(xcmsSet\)"]@{ shape: process } +2 --> 3 +4["xcms plot chromatogram"]@{ shape: process } +3 --> 4 +1 --> 4 +5["xcms findChromPeaks Merger"]@{ shape: process } +3 --> 5 +1 --> 5 +6["metaMS.runGC"]@{ shape: process } +5 --> 6 +7["Check Format"]@{ shape: process } +6 --> 7 +1 --> 7 +6 --> 7 +8["Multivariate"]@{ shape: process } +7 --> 8 +7 --> 8 +7 --> 8 + +``` \ No newline at end of file diff --git a/workflows/metabomics/lcms-preprocessing/Mass_spectrometry__LC-MS_preprocessing_with_XCMS_diagram.md b/workflows/metabomics/lcms-preprocessing/Mass_spectrometry__LC-MS_preprocessing_with_XCMS_diagram.md new file mode 100644 index 000000000..27648c256 --- /dev/null +++ b/workflows/metabomics/lcms-preprocessing/Mass_spectrometry__LC-MS_preprocessing_with_XCMS_diagram.md @@ -0,0 +1,33 @@ +```mermaid +graph LR +0["SampleMetadata"]@{ shape: doc } +1["Mass-spectrometry Dataset Collection"]@{ shape: docs } +2["MSnbase readMSData"]@{ shape: process } +1 --> 2 +3["xcms plot chromatogram"]@{ shape: process } +2 --> 3 +0 --> 3 +4["xcms findChromPeaks \(xcmsSet\)"]@{ shape: process } +2 --> 4 +5["xcms findChromPeaks Merger"]@{ shape: process } +4 --> 5 +0 --> 5 +6["xcms groupChromPeaks \(group\)"]@{ shape: process } +5 --> 6 +7["xcms adjustRtime \(retcor\)"]@{ shape: process } +6 --> 7 +8["Intensity Check"]@{ shape: process } +6 --> 8 +0 --> 8 +6 --> 8 +9["xcms plot chromatogram"]@{ shape: process } +7 --> 9 +0 --> 9 +10["xcms groupChromPeaks \(group\)"]@{ shape: process } +7 --> 10 +11["xcms fillChromPeaks \(fillPeaks\)"]@{ shape: process } +10 --> 11 +12["CAMERA.annotate"]@{ shape: process } +11 --> 12 + +``` \ No newline at end of file diff --git a/workflows/microbiome/allele-based-pathogen-identification/Allele-based-Pathogen-Identification_diagram.md b/workflows/microbiome/allele-based-pathogen-identification/Allele-based-Pathogen-Identification_diagram.md new file mode 100644 index 000000000..d9889b667 --- /dev/null +++ b/workflows/microbiome/allele-based-pathogen-identification/Allele-based-Pathogen-Identification_diagram.md @@ -0,0 +1,59 @@ +```mermaid +graph LR +0["collection\_of\_preprocessed\_samples"]@{ shape: docs } +1["samples\_profile"]@{ shape: lean-l } +2["reference\_genome\_of\_tested\_strain"]@{ shape: doc } +3["Convert compressed file to uncompressed."]@{ shape: process } +2 --> 3 +4["Map with minimap2"]@{ shape: process } +1 --> 4 +0 --> 4 +3 --> 4 +5["Clair3"]@{ shape: process } +4 --> 5 +3 --> 5 +6["Samtools depth"]@{ shape: process } +4 --> 6 +7["Samtools coverage"]@{ shape: process } +4 --> 7 +8["bcftools norm"]@{ shape: process } +5 --> 8 +3 --> 8 +9["Advanced Cut"]@{ shape: process } +6 --> 9 +10["Remove beginning"]@{ shape: process } +7 --> 10 +11["SnpSift Filter"]@{ shape: process } +8 --> 11 +12["Table Compute"]@{ shape: process } +9 --> 12 +13["Cut"]@{ shape: process } +10 --> 13 +14["SnpSift Extract Fields"]@{ shape: process } +11 --> 14 +15["bcftools consensus"]@{ shape: process } +11 --> 15 +3 --> 15 +16["Select first"]@{ shape: process } +13 --> 16 +17["Remove beginning"]@{ shape: process } +14 --> 17 +18["Collapse Collection"]@{ shape: process } +16 --> 18 +19["Count"]@{ shape: process } +17 --> 19 +20["Advanced Cut"]@{ shape: process } +18 --> 20 +21["Cut"]@{ shape: process } +19 --> 21 +22["Paste"]@{ shape: process } +20 --> 22 +12 --> 22 +23["Select first"]@{ shape: process } +21 --> 23 +24["Collapse Collection"]@{ shape: process } +23 --> 24 +25["Column Regex Find And Replace"]@{ shape: process } +24 --> 25 + +``` \ No newline at end of file diff --git a/workflows/microbiome/gene-based-pathogen-identification/Gene-based-Pathogen-Identification_diagram.md b/workflows/microbiome/gene-based-pathogen-identification/Gene-based-Pathogen-Identification_diagram.md new file mode 100644 index 000000000..58aeae95d --- /dev/null +++ b/workflows/microbiome/gene-based-pathogen-identification/Gene-based-Pathogen-Identification_diagram.md @@ -0,0 +1,39 @@ +```mermaid +graph LR +0["collection\_of\_preprocessed\_samples"]@{ shape: docs } +1["Extract element identifiers"]@{ shape: process } +0 --> 1 +2["Build list"]@{ shape: process } +0 --> 2 +3["Split file"]@{ shape: process } +1 --> 3 +4["Flye"]@{ shape: process } +2 --> 4 +5["Parse parameter value"]@{ shape: process } +3 --> 5 +6["medaka consensus pipeline"]@{ shape: process } +4 --> 6 +0 --> 6 +7["Bandage Image"]@{ shape: process } +4 --> 7 +8["Compose text parameter value"]@{ shape: process } +5 --> 8 +9["FASTA-to-Tabular"]@{ shape: process } +6 --> 9 +10["ABRicate"]@{ shape: process } +6 --> 10 +11["ABRicate"]@{ shape: process } +6 --> 11 +12["Replace"]@{ shape: process } +8 --> 12 +9 --> 12 +13["Replace"]@{ shape: process } +8 --> 13 +10 --> 13 +14["Replace"]@{ shape: process } +8 --> 14 +11 --> 14 +15["Tabular-to-FASTA"]@{ shape: process } +12 --> 15 + +``` \ No newline at end of file diff --git a/workflows/microbiome/nanopore-pre-processing/Nanopore-Pre-Processing_diagram.md b/workflows/microbiome/nanopore-pre-processing/Nanopore-Pre-Processing_diagram.md new file mode 100644 index 000000000..179238f42 --- /dev/null +++ b/workflows/microbiome/nanopore-pre-processing/Nanopore-Pre-Processing_diagram.md @@ -0,0 +1,61 @@ +```mermaid +graph LR +0["samples\_profile"]@{ shape: lean-l } +1["host\_reference\_genome"]@{ shape: lean-l } +2["collection\_of\_all\_samples"]@{ shape: docs } +3["Porechop"]@{ shape: process } +2 --> 3 +4["NanoPlot"]@{ shape: process } +2 --> 4 +5["FastQC"]@{ shape: process } +2 --> 5 +6["fastp"]@{ shape: process } +3 --> 6 +7["MultiQC"]@{ shape: process } +5 --> 7 +8["Map with minimap2"]@{ shape: process } +0 --> 8 +6 --> 8 +1 --> 8 +9["NanoPlot"]@{ shape: process } +6 --> 9 +10["FastQC"]@{ shape: process } +6 --> 10 +11["Split BAM by reads mapping status"]@{ shape: process } +8 --> 11 +12["Select"]@{ shape: process } +10 --> 12 +13["Samtools fastx"]@{ shape: process } +11 --> 13 +14["Samtools fastx"]@{ shape: process } +11 --> 14 +15["Collapse Collection"]@{ shape: process } +12 --> 15 +16["Filter failed datasets"]@{ shape: process } +13 --> 16 +17["Kraken2"]@{ shape: process } +14 --> 17 +18["Cut"]@{ shape: process } +15 --> 18 +19["FastQC"]@{ shape: process } +16 --> 19 +20["Krakentools: Extract Kraken Reads By ID"]@{ shape: process } +6 --> 20 +17 --> 20 +17 --> 20 +21["Select"]@{ shape: process } +19 --> 21 +22["Collapse Collection"]@{ shape: process } +21 --> 22 +23["Cut"]@{ shape: process } +22 --> 23 +24["Column join"]@{ shape: process } +25["Compute"]@{ shape: process } +24 --> 25 +26["Column Regex Find And Replace"]@{ shape: process } +25 --> 26 +27["MultiQC"]@{ shape: process } +10 --> 27 +26 --> 27 + +``` \ No newline at end of file diff --git a/workflows/microbiome/pathogen-detection-pathogfair-samples-aggregation-and-visualisation/Pathogen-Detection-PathoGFAIR-Samples-Aggregation-and-Visualisation_diagram.md b/workflows/microbiome/pathogen-detection-pathogfair-samples-aggregation-and-visualisation/Pathogen-Detection-PathoGFAIR-Samples-Aggregation-and-Visualisation_diagram.md new file mode 100644 index 000000000..33e0835f8 --- /dev/null +++ b/workflows/microbiome/pathogen-detection-pathogfair-samples-aggregation-and-visualisation/Pathogen-Detection-PathoGFAIR-Samples-Aggregation-and-Visualisation_diagram.md @@ -0,0 +1,138 @@ +```mermaid +graph LR +0["amr\_identified\_by\_ncbi"]@{ shape: docs } +1["vfs\_of\_genes\_identified\_by\_vfdb"]@{ shape: docs } +2["amrs"]@{ shape: docs } +3["contigs"]@{ shape: docs } +4["vfs"]@{ shape: docs } +5["removed\_hosts\_percentage\_tabular"]@{ shape: doc } +6["mapping\_mean\_depth\_per\_sample"]@{ shape: doc } +7["mapping\_coverage\_percentage\_per\_sample"]@{ shape: doc } +8["number\_of\_variants\_per\_sample"]@{ shape: doc } +9["metadata"]@{ shape: doc } +10["Filter failed datasets"]@{ shape: process } +0 --> 10 +11["Filter failed datasets"]@{ shape: process } +1 --> 11 +12["Filter failed datasets"]@{ shape: process } +2 --> 12 +13["Filter failed datasets"]@{ shape: process } +3 --> 13 +14["Filter failed datasets"]@{ shape: process } +4 --> 14 +15["Remove beginning"]@{ shape: process } +10 --> 15 +16["Remove beginning"]@{ shape: process } +11 --> 16 +17["Remove beginning"]@{ shape: process } +12 --> 17 +18["Collapse Collection"]@{ shape: process } +13 --> 18 +19["Collapse Collection"]@{ shape: process } +14 --> 19 +20["Remove beginning"]@{ shape: process } +14 --> 20 +21["Count"]@{ shape: process } +15 --> 21 +22["Count"]@{ shape: process } +16 --> 22 +23["Group"]@{ shape: process } +16 --> 23 +24["Unique"]@{ shape: process } +17 --> 24 +25["Split by group"]@{ shape: process } +19 --> 25 +26["Unique"]@{ shape: process } +20 --> 26 +27["Cut"]@{ shape: process } +21 --> 27 +28["Cut"]@{ shape: process } +22 --> 28 +29["Filter empty datasets"]@{ shape: process } +23 --> 29 +30["Cut"]@{ shape: process } +24 --> 30 +31["Cut"]@{ shape: process } +25 --> 31 +32["Cut"]@{ shape: process } +26 --> 32 +33["Collapse Collection"]@{ shape: process } +27 --> 33 +34["Collapse Collection"]@{ shape: process } +28 --> 34 +35["Column join"]@{ shape: process } +29 --> 35 +36["bedtools getfasta"]@{ shape: process } +18 --> 36 +30 --> 36 +37["Remove beginning"]@{ shape: process } +31 --> 37 +38["bedtools getfasta"]@{ shape: process } +18 --> 38 +32 --> 38 +39["Column Regex Find And Replace"]@{ shape: process } +33 --> 39 +40["Column Regex Find And Replace"]@{ shape: process } +34 --> 40 +41["Column Regex Find And Replace"]@{ shape: process } +35 --> 41 +42["Regex Find And Replace"]@{ shape: process } +36 --> 42 +43["bedtools getfasta"]@{ shape: process } +18 --> 43 +37 --> 43 +44["Regex Find And Replace"]@{ shape: process } +38 --> 44 +45["Multi-Join"]@{ shape: process } +40 --> 45 +39 --> 45 +46["Heatmap w ggplot"]@{ shape: process } +41 --> 46 +47["Filter empty datasets"]@{ shape: process } +42 --> 47 +48["ClustalW"]@{ shape: process } +43 --> 48 +49["Filter empty datasets"]@{ shape: process } +44 --> 49 +50["Replace Text"]@{ shape: process } +45 --> 50 +51["FASTA-to-Tabular"]@{ shape: process } +47 --> 51 +52["Filter empty datasets"]@{ shape: process } +48 --> 52 +53["FASTA-to-Tabular"]@{ shape: process } +49 --> 53 +54["Cut"]@{ shape: process } +51 --> 54 +55["FASTTREE"]@{ shape: process } +52 --> 55 +56["Cut"]@{ shape: process } +53 --> 56 +57["Group"]@{ shape: process } +54 --> 57 +58["Newick Display"]@{ shape: process } +55 --> 58 +59["Group"]@{ shape: process } +56 --> 59 +60["Tabular-to-FASTA"]@{ shape: process } +57 --> 60 +61["Tabular-to-FASTA"]@{ shape: process } +59 --> 61 +62["FASTA Merge Files and Filter Unique Sequences"]@{ shape: process } +60 --> 62 +63["FASTA Merge Files and Filter Unique Sequences"]@{ shape: process } +61 --> 63 +64["ClustalW"]@{ shape: process } +62 --> 64 +65["ClustalW"]@{ shape: process } +63 --> 65 +66["FASTTREE"]@{ shape: process } +64 --> 66 +67["FASTTREE"]@{ shape: process } +65 --> 67 +68["Newick Display"]@{ shape: process } +66 --> 68 +69["Newick Display"]@{ shape: process } +67 --> 69 + +``` \ No newline at end of file diff --git a/workflows/microbiome/taxonomy-profiling-and-visualization-with-krona/Taxonomy-Profiling-and-Visualization-with-Krona_diagram.md b/workflows/microbiome/taxonomy-profiling-and-visualization-with-krona/Taxonomy-Profiling-and-Visualization-with-Krona_diagram.md new file mode 100644 index 000000000..8559407ae --- /dev/null +++ b/workflows/microbiome/taxonomy-profiling-and-visualization-with-krona/Taxonomy-Profiling-and-Visualization-with-Krona_diagram.md @@ -0,0 +1,13 @@ +```mermaid +graph LR +0["collection\_of\_preprocessed\_samples"]@{ shape: docs } +1["kraken\_database"]@{ shape: lean-l } +2["Kraken2"]@{ shape: process } +1 --> 2 +0 --> 2 +3["Krakentools: Convert kraken report file"]@{ shape: process } +2 --> 3 +4["Krona pie chart"]@{ shape: process } +3 --> 4 + +``` \ No newline at end of file diff --git a/workflows/proteomics/clinicalmp/clinicalmp-data-interpretation/WF5_Data_Interpretation_Worklow_diagram.md b/workflows/proteomics/clinicalmp/clinicalmp-data-interpretation/WF5_Data_Interpretation_Worklow_diagram.md new file mode 100644 index 000000000..85569e1d3 --- /dev/null +++ b/workflows/proteomics/clinicalmp/clinicalmp-data-interpretation/WF5_Data_Interpretation_Worklow_diagram.md @@ -0,0 +1,27 @@ +```mermaid +graph LR +0["Quantified Peptides"]@{ shape: doc } +1["MaxQuant Protein Groups"]@{ shape: doc } +2["MaxQuant Evidence"]@{ shape: doc } +3["Annotation"]@{ shape: doc } +4["Comparison Matrix"]@{ shape: doc } +5["Unipept"]@{ shape: process } +0 --> 5 +6["Microbial Proteins"]@{ shape: process } +1 --> 6 +7["Select"]@{ shape: process } +1 --> 7 +8["MSstatsTMT\_for\_microbial\_proteins"]@{ shape: process } +4 --> 8 +3 --> 8 +2 --> 8 +6 --> 8 +9["Human Proteins"]@{ shape: process } +7 --> 9 +10["MSstatsTMT\_for\_human\_proteins"]@{ shape: process } +4 --> 10 +3 --> 10 +2 --> 10 +9 --> 10 + +``` \ No newline at end of file diff --git a/workflows/proteomics/clinicalmp/clinicalmp-database-generation/iwc-clinicalmp-database-generation_diagram.md b/workflows/proteomics/clinicalmp/clinicalmp-database-generation/iwc-clinicalmp-database-generation_diagram.md new file mode 100644 index 000000000..9feca0730 --- /dev/null +++ b/workflows/proteomics/clinicalmp/clinicalmp-database-generation/iwc-clinicalmp-database-generation_diagram.md @@ -0,0 +1,19 @@ +```mermaid +graph LR +0["Human SwissProt Protein Database"]@{ shape: doc } +1["Tandem Mass Spectrometry \(MS/MS\) datasets"]@{ shape: docs } +2["Species UniProt Protein Database"]@{ shape: doc } +3["Contaminants cRAP Protein Database"]@{ shape: doc } +4["Human UniProt Microbial Proteins cRAP for MetaNovo"]@{ shape: process } +0 --> 4 +2 --> 4 +3 --> 4 +5["Metanovo"]@{ shape: process } +4 --> 5 +1 --> 5 +6["Merge all FASTA"]@{ shape: process } +0 --> 6 +5 --> 6 +3 --> 6 + +``` \ No newline at end of file diff --git a/workflows/proteomics/clinicalmp/clinicalmp-quantitation/iwc-clinicalmp-quantitation_diagram.md b/workflows/proteomics/clinicalmp/clinicalmp-quantitation/iwc-clinicalmp-quantitation_diagram.md new file mode 100644 index 000000000..e0978b9ce --- /dev/null +++ b/workflows/proteomics/clinicalmp/clinicalmp-quantitation/iwc-clinicalmp-quantitation_diagram.md @@ -0,0 +1,23 @@ +```mermaid +graph LR +0["Quantitation\_Database-For-MaxQuant"]@{ shape: doc } +1["Experimental-Design Discovery MaxQuant"]@{ shape: doc } +2["Input Raw-files"]@{ shape: docs } +3["MaxQuant"]@{ shape: process } +0 --> 3 +2 --> 3 +1 --> 3 +4["extracting microbial Proteins"]@{ shape: process } +3 --> 4 +5["extracting microbial Peptides"]@{ shape: process } +3 --> 5 +6["extract proteins"]@{ shape: process } +4 --> 6 +7["extract peptides"]@{ shape: process } +5 --> 7 +8["Quantified-Proteins"]@{ shape: process } +6 --> 8 +9["Quantified-Peptides"]@{ shape: process } +7 --> 9 + +``` \ No newline at end of file diff --git a/workflows/proteomics/clinicalmp/clinicalmp-verification/clinicalmp-verification_diagram.md b/workflows/proteomics/clinicalmp/clinicalmp-verification/clinicalmp-verification_diagram.md new file mode 100644 index 000000000..ad26a36eb --- /dev/null +++ b/workflows/proteomics/clinicalmp/clinicalmp-verification/clinicalmp-verification_diagram.md @@ -0,0 +1,49 @@ +```mermaid +graph LR +0["Human UniProt Isoforms FASTA"]@{ shape: process } +1["cRAP"]@{ shape: process } +2["Tandem Mass Spectrometry \(MS/MS\) datasets"]@{ shape: docs } +3["SGPS peptide report"]@{ shape: doc } +4["Distinct Peptides for PepQuery"]@{ shape: doc } +5["MaxQuant peptide report"]@{ shape: doc } +6["Human UniProt Isoforms cRAP FASTA"]@{ shape: process } +0 --> 6 +1 --> 6 +7["SGPS Cut"]@{ shape: process } +3 --> 7 +8["MQ Cut"]@{ shape: process } +5 --> 8 +9["PepQuery2"]@{ shape: process } +6 --> 9 +4 --> 9 +2 --> 9 +10["SGPS Remove Beginner"]@{ shape: process } +7 --> 10 +11["MQ Remove Beginner"]@{ shape: process } +8 --> 11 +12["Collapse Collection"]@{ shape: process } +9 --> 12 +13["Concatenate datasets"]@{ shape: process } +14["Filter"]@{ shape: process } +12 --> 14 +15["Filter Remove beginning"]@{ shape: process } +14 --> 15 +16["FRB Cut"]@{ shape: process } +15 --> 16 +17["Peptide and Protein from Peptide Reports"]@{ shape: process } +16 --> 17 +13 --> 17 +18["PPPR Remove beginning"]@{ shape: process } +17 --> 18 +19["Group"]@{ shape: process } +18 --> 19 +20["Uniprot ID from verified Peptides"]@{ shape: process } +19 --> 20 +21["UniProt"]@{ shape: process } +20 --> 21 +22["Quantitation Database for MaxQuant"]@{ shape: process } +0 --> 22 +1 --> 22 +21 --> 22 + +``` \ No newline at end of file diff --git a/workflows/proteomics/openms-metaprosip/metaprosip_diagram.md b/workflows/proteomics/openms-metaprosip/metaprosip_diagram.md new file mode 100644 index 000000000..e45844a02 --- /dev/null +++ b/workflows/proteomics/openms-metaprosip/metaprosip_diagram.md @@ -0,0 +1,36 @@ +```mermaid +graph LR +0["Centroided LC-MS datasets"]@{ shape: docs } +1["Fasta Database"]@{ shape: doc } +2["Precursor monoisotopic mass tolerance \(ppm\)"]@{ shape: lean-l } +3["Fixed modifications"]@{ shape: lean-l } +4["Variable modifications"]@{ shape: lean-l } +5["Labeled element"]@{ shape: lean-l } +6["Sort collection"]@{ shape: process } +0 --> 6 +7["DecoyDatabase"]@{ shape: process } +1 --> 7 +8["FeatureFinderMultiplex"]@{ shape: process } +6 --> 8 +9["MSGFPlusAdapter"]@{ shape: process } +7 --> 9 +3 --> 9 +6 --> 9 +2 --> 9 +4 --> 9 +10["PeptideIndexer"]@{ shape: process } +7 --> 10 +9 --> 10 +11["FalseDiscoveryRate"]@{ shape: process } +10 --> 11 +12["IDMapper"]@{ shape: process } +11 --> 12 +8 --> 12 +13["MetaProSIP"]@{ shape: process } +7 --> 13 +12 --> 13 +6 --> 13 +5 --> 13 +2 --> 13 + +``` \ No newline at end of file diff --git a/workflows/repeatmasking/RepeatMasking-Workflow_diagram.md b/workflows/repeatmasking/RepeatMasking-Workflow_diagram.md new file mode 100644 index 000000000..f3729cd70 --- /dev/null +++ b/workflows/repeatmasking/RepeatMasking-Workflow_diagram.md @@ -0,0 +1,9 @@ +```mermaid +graph LR +0["input"]@{ shape: doc } +1["RepeatModeler"]@{ shape: process } +0 --> 1 +2["RepeatMasker"]@{ shape: process } +1 --> 2 + +``` \ No newline at end of file diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-consensus-from-variation/consensus-from-variation_diagram.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-consensus-from-variation/consensus-from-variation_diagram.md new file mode 100644 index 000000000..ccdebbc1a --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-consensus-from-variation/consensus-from-variation_diagram.md @@ -0,0 +1,52 @@ +```mermaid +graph LR +0["Variant calls"]@{ shape: docs } +1["min-AF for consensus variant"]@{ shape: lean-l } +2["min-AF for failed variants"]@{ shape: lean-l } +3["aligned reads data for depth calculation"]@{ shape: docs } +4["Depth-threshold for masking"]@{ shape: lean-l } +5["Reference genome"]@{ shape: doc } +6["Compose text parameter value"]@{ shape: process } +1 --> 6 +7["Compose text parameter value"]@{ shape: process } +2 --> 7 +1 --> 7 +8["bedtools Genome Coverage"]@{ shape: process } +3 --> 8 +9["Compose text parameter value"]@{ shape: process } +4 --> 9 +10["SnpSift Filter"]@{ shape: process } +6 --> 10 +0 --> 10 +11["SnpSift Filter"]@{ shape: process } +7 --> 11 +0 --> 11 +12["Filter"]@{ shape: process } +9 --> 12 +8 --> 12 +13["SnpSift Extract Fields"]@{ shape: process } +10 --> 13 +14["SnpSift Extract Fields"]@{ shape: process } +11 --> 14 +15["Compute"]@{ shape: process } +13 --> 15 +16["Compute"]@{ shape: process } +14 --> 16 +17["Concatenate"]@{ shape: process } +12 --> 17 +16 --> 17 +18["Merge"]@{ shape: process } +17 --> 18 +19["Subtract"]@{ shape: process } +18 --> 19 +15 --> 19 +20["Compute"]@{ shape: process } +19 --> 20 +21["bcftools consensus"]@{ shape: process } +10 --> 21 +5 --> 21 +20 --> 21 +22["Collapse Collection"]@{ shape: process } +21 --> 22 + +``` \ No newline at end of file diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-ont-artic-variant-calling/ont-artic-variation_diagram.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-ont-artic-variant-calling/ont-artic-variation_diagram.md new file mode 100644 index 000000000..932241f65 --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-ont-artic-variant-calling/ont-artic-variation_diagram.md @@ -0,0 +1,66 @@ +```mermaid +graph LR +0["ONT-sequenced reads"]@{ shape: docs } +1["Minimum read length"]@{ shape: lean-l } +2["Maximum read length"]@{ shape: lean-l } +3["NC\_045512.2 FASTA sequence of SARS-CoV-2"]@{ shape: doc } +4["Primer binding sites info in BED format"]@{ shape: doc } +5["fastp"]@{ shape: process } +2 --> 5 +1 --> 5 +0 --> 5 +6["Compute"]@{ shape: process } +4 --> 6 +7["Replace Text"]@{ shape: process } +4 --> 7 +8["Map with minimap2"]@{ shape: process } +5 --> 8 +3 --> 8 +9["Datamash"]@{ shape: process } +6 --> 9 +10["Samtools view"]@{ shape: process } +8 --> 10 +11["Parse parameter value"]@{ shape: process } +9 --> 11 +12["Samtools stats"]@{ shape: process } +10 --> 12 +13["BamLeftAlign"]@{ shape: process } +10 --> 13 +3 --> 13 +14["ivar trim"]@{ shape: process } +13 --> 14 +4 --> 14 +15["QualiMap BamQC"]@{ shape: process } +14 --> 15 +16["medaka consensus tool"]@{ shape: process } +14 --> 16 +17["Filter failed"]@{ shape: process } +15 --> 17 +18["medaka variant tool"]@{ shape: process } +13 --> 18 +16 --> 18 +3 --> 18 +19["medaka variant tool"]@{ shape: process } +13 --> 19 +11 --> 19 +16 --> 19 +3 --> 19 +20["Flatten Collection"]@{ shape: process } +17 --> 20 +21["bedtools Intersect intervals"]@{ shape: process } +19 --> 21 +7 --> 21 +22["MultiQC"]@{ shape: process } +12 --> 22 +20 --> 22 +23["bcftools annotate"]@{ shape: process } +18 --> 23 +21 --> 23 +24["SnpEff eff covid19 version"]@{ shape: process } +23 --> 24 +25["Lofreq filter"]@{ shape: process } +24 --> 25 +26["Replace"]@{ shape: process } +25 --> 26 + +``` \ No newline at end of file diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-ivar-analysis/pe-wgs-ivar-analysis_diagram.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-ivar-analysis/pe-wgs-ivar-analysis_diagram.md new file mode 100644 index 000000000..3384e4789 --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-ivar-analysis/pe-wgs-ivar-analysis_diagram.md @@ -0,0 +1,50 @@ +```mermaid +graph LR +0["Paired read collection for samples"]@{ shape: docs } +1["Reference FASTA"]@{ shape: doc } +2["Primer BED"]@{ shape: doc } +3["Read fraction to call variant"]@{ shape: lean-l } +4["Minimum quality score to call base"]@{ shape: lean-l } +5["fastp: Trimmed Illumina Reads"]@{ shape: process } +0 --> 5 +6["Rename reference to NC\_045512.2"]@{ shape: process } +1 --> 6 +7["Map with BWA-MEM"]@{ shape: process } +5 --> 7 +6 --> 7 +8["Samtools stats"]@{ shape: process } +7 --> 8 +9["Samtools view"]@{ shape: process } +7 --> 9 +10["QualiMap BamQC"]@{ shape: process } +9 --> 10 +11["ivar trim"]@{ shape: process } +9 --> 11 +2 --> 11 +12["Flatten collection"]@{ shape: process } +10 --> 12 +13["ivar variants"]@{ shape: process } +11 --> 13 +3 --> 13 +4 --> 13 +1 --> 13 +14["ivar consensus"]@{ shape: process } +11 --> 14 +3 --> 14 +4 --> 14 +15["Quality Control Report"]@{ shape: process } +5 --> 15 +8 --> 15 +12 --> 15 +16["Annotated variants"]@{ shape: process } +13 --> 16 +17["Consensus genome \(masked for depth\)"]@{ shape: process } +14 --> 17 +18["Concatenate datasets"]@{ shape: process } +17 --> 18 +19["Pangolin"]@{ shape: process } +18 --> 19 +20["Nextclade"]@{ shape: process } +18 --> 20 + +``` \ No newline at end of file diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-variant-calling/pe-artic-variation_diagram.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-variant-calling/pe-artic-variation_diagram.md new file mode 100644 index 000000000..fcd762ed9 --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-variant-calling/pe-artic-variation_diagram.md @@ -0,0 +1,83 @@ +```mermaid +graph LR +0["Paired Collection"]@{ shape: docs } +1["NC\_045512.2 FASTA sequence of SARS-CoV-2"]@{ shape: doc } +2["ARTIC primer BED"]@{ shape: doc } +3["ARTIC primers to amplicon assignments"]@{ shape: doc } +4["Read removal minimum AF"]@{ shape: lean-l } +5["Read removal maximum AF"]@{ shape: lean-l } +6["Minimum DP required after amplicon bias correction"]@{ shape: lean-l } +7["Minimum DP\_ALT required after amplicon bias correction"]@{ shape: lean-l } +8["fastp"]@{ shape: process } +0 --> 8 +9["Compose text parameter value"]@{ shape: process } +4 --> 9 +5 --> 9 +10["Compose text parameter value"]@{ shape: process } +6 --> 10 +7 --> 10 +11["Map with BWA-MEM"]@{ shape: process } +8 --> 11 +1 --> 11 +12["Samtools view"]@{ shape: process } +11 --> 12 +13["Realign reads"]@{ shape: process } +12 --> 13 +1 --> 13 +14["Samtools stats"]@{ shape: process } +12 --> 14 +15["Insert indel qualities"]@{ shape: process } +13 --> 15 +1 --> 15 +16["ivar trim"]@{ shape: process } +3 --> 16 +15 --> 16 +2 --> 16 +17["Call variants"]@{ shape: process } +16 --> 17 +1 --> 17 +18["QualiMap BamQC"]@{ shape: process } +16 --> 18 +19["SnpSift Filter"]@{ shape: process } +9 --> 19 +17 --> 19 +20["SnpSift Filter"]@{ shape: process } +10 --> 20 +17 --> 20 +21["Filter failed datasets"]@{ shape: process } +18 --> 21 +22["ivar removereads"]@{ shape: process } +3 --> 22 +16 --> 22 +2 --> 22 +19 --> 22 +23["Flatten collection"]@{ shape: process } +21 --> 23 +24["Call variants"]@{ shape: process } +22 --> 24 +1 --> 24 +25["MultiQC"]@{ shape: process } +8 --> 25 +14 --> 25 +23 --> 25 +26["bcftools annotate"]@{ shape: process } +17 --> 26 +24 --> 26 +27["SnpSift Filter"]@{ shape: process } +10 --> 27 +24 --> 27 +28["VCF-VCFintersect:"]@{ shape: process } +1 --> 28 +27 --> 28 +20 --> 28 +29["bcftools annotate"]@{ shape: process } +26 --> 29 +28 --> 29 +30["Replace Text"]@{ shape: process } +29 --> 30 +31["SnpEff eff covid19 version"]@{ shape: process } +30 --> 31 +32["Lofreq filter"]@{ shape: process } +31 --> 32 + +``` \ No newline at end of file diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-wgs-variant-calling/pe-wgs-variation_diagram.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-wgs-variant-calling/pe-wgs-variation_diagram.md new file mode 100644 index 000000000..06030d0a0 --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-wgs-variant-calling/pe-wgs-variation_diagram.md @@ -0,0 +1,34 @@ +```mermaid +graph LR +0["Paired Collection"]@{ shape: docs } +1["NC\_045512.2 FASTA sequence of SARS-CoV-2"]@{ shape: doc } +2["fastp"]@{ shape: process } +0 --> 2 +3["Map with BWA-MEM"]@{ shape: process } +2 --> 3 +1 --> 3 +4["Samtools view"]@{ shape: process } +3 --> 4 +5["Samtools stats"]@{ shape: process } +4 --> 5 +6["MarkDuplicates"]@{ shape: process } +4 --> 6 +7["Realign reads"]@{ shape: process } +6 --> 7 +1 --> 7 +8["MultiQC"]@{ shape: process } +2 --> 8 +5 --> 8 +6 --> 8 +9["Insert indel qualities"]@{ shape: process } +7 --> 9 +1 --> 9 +10["Call variants"]@{ shape: process } +9 --> 10 +1 --> 10 +11["Lofreq filter"]@{ shape: process } +10 --> 11 +12["SnpEff eff covid19 version"]@{ shape: process } +11 --> 12 + +``` \ No newline at end of file diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-se-illumina-wgs-variant-calling/se-wgs-variation_diagram.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-se-illumina-wgs-variant-calling/se-wgs-variation_diagram.md new file mode 100644 index 000000000..8a79ff8ee --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-se-illumina-wgs-variant-calling/se-wgs-variation_diagram.md @@ -0,0 +1,30 @@ +```mermaid +graph LR +0["Single End Collection"]@{ shape: docs } +1["NC\_045512.2 FASTA sequence of SARS-CoV-2"]@{ shape: doc } +2["fastp"]@{ shape: process } +0 --> 2 +3["Bowtie2"]@{ shape: process } +2 --> 3 +1 --> 3 +4["MarkDuplicates"]@{ shape: process } +3 --> 4 +5["MultiQC"]@{ shape: process } +2 --> 5 +3 --> 5 +4 --> 5 +6["Realign reads"]@{ shape: process } +4 --> 6 +1 --> 6 +7["Insert indel qualities"]@{ shape: process } +6 --> 7 +1 --> 7 +8["Call variants"]@{ shape: process } +7 --> 8 +1 --> 8 +9["Lofreq filter"]@{ shape: process } +8 --> 9 +10["SnpEff eff covid19 version"]@{ shape: process } +9 --> 10 + +``` \ No newline at end of file diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-variation-reporting/variation-reporting_diagram.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-variation-reporting/variation-reporting_diagram.md new file mode 100644 index 000000000..5053a1ce4 --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-variation-reporting/variation-reporting_diagram.md @@ -0,0 +1,82 @@ +```mermaid +graph LR +0["Variation data to report"]@{ shape: docs } +1["AF Filter"]@{ shape: lean-l } +2["DP Filter"]@{ shape: lean-l } +3["DP\_ALT Filter"]@{ shape: lean-l } +4["gene products translations"]@{ shape: doc } +5["Number of Clusters"]@{ shape: lean-l } +6["SnpSift Filter"]@{ shape: process } +0 --> 6 +7["Compose text parameter value"]@{ shape: process } +1 --> 7 +2 --> 7 +3 --> 7 +8["Compose text parameter value"]@{ shape: process } +1 --> 8 +2 --> 8 +3 --> 8 +9["SnpSift Filter"]@{ shape: process } +7 --> 9 +8 --> 9 +6 --> 9 +10["SnpSift Extract Fields"]@{ shape: process } +9 --> 10 +11["Replace column"]@{ shape: process } +10 --> 11 +4 --> 11 +12["Compute"]@{ shape: process } +11 --> 12 +13["Datamash"]@{ shape: process } +12 --> 13 +14["Replace"]@{ shape: process } +13 --> 14 +15["Collapse Collection"]@{ shape: process } +14 --> 15 +16["Compute"]@{ shape: process } +15 --> 16 +17["Replace"]@{ shape: process } +16 --> 17 +18["Datamash"]@{ shape: process } +17 --> 18 +19["Filter"]@{ shape: process } +17 --> 19 +20["Datamash"]@{ shape: process } +17 --> 20 +21["Join"]@{ shape: process } +19 --> 21 +18 --> 21 +22["Datamash"]@{ shape: process } +19 --> 22 +23["Datamash"]@{ shape: process } +19 --> 23 +24["Datamash"]@{ shape: process } +21 --> 24 +25["Join"]@{ shape: process } +17 --> 25 +22 --> 25 +26["Join"]@{ shape: process } +16 --> 26 +23 --> 26 +27["Cut"]@{ shape: process } +24 --> 27 +28["Join"]@{ shape: process } +25 --> 28 +20 --> 28 +29["Cut"]@{ shape: process } +26 --> 29 +30["Replace"]@{ shape: process } +27 --> 30 +31["Cut"]@{ shape: process } +28 --> 31 +32["Split file"]@{ shape: process } +29 --> 32 +33["Sort"]@{ shape: process } +30 --> 33 +34["Sort"]@{ shape: process } +31 --> 34 +35["Variant Frequency Plot"]@{ shape: process } +5 --> 35 +32 --> 35 + +``` \ No newline at end of file diff --git a/workflows/scRNAseq/baredsc/baredSC-1d-logNorm_diagram.md b/workflows/scRNAseq/baredsc/baredSC-1d-logNorm_diagram.md new file mode 100644 index 000000000..04a163fa5 --- /dev/null +++ b/workflows/scRNAseq/baredsc/baredSC-1d-logNorm_diagram.md @@ -0,0 +1,20 @@ +```mermaid +graph LR +0["Tabular with raw expression values"]@{ shape: doc } +1["Gene name"]@{ shape: lean-l } +2["Maximum value in logNorm"]@{ shape: lean-l } +3["Maximum number of Gaussians to study"]@{ shape: lean-l } +4["generate\_param\_list\_one\_to\_number"]@{ shape: subprocess } +3 --> 4 +5["baredSC"]@{ shape: process } +4 --> 5 +2 --> 5 +1 --> 5 +0 --> 5 +6["combine baredSC 1d"]@{ shape: process } +5 --> 6 +2 --> 6 +1 --> 6 +0 --> 6 + +``` \ No newline at end of file diff --git a/workflows/scRNAseq/baredsc/baredSC-2d-logNorm_diagram.md b/workflows/scRNAseq/baredsc/baredSC-2d-logNorm_diagram.md new file mode 100644 index 000000000..9b0d764b7 --- /dev/null +++ b/workflows/scRNAseq/baredsc/baredSC-2d-logNorm_diagram.md @@ -0,0 +1,28 @@ +```mermaid +graph LR +0["Tabular with raw expression values"]@{ shape: doc } +1["Gene name for x axis"]@{ shape: lean-l } +2["maximum value in logNorm for x-axis"]@{ shape: lean-l } +3["Gene name for y axis"]@{ shape: lean-l } +4["maximum value in logNorm for y-axis"]@{ shape: lean-l } +5["Maximum number of Gaussians to study"]@{ shape: lean-l } +6["compute p-value"]@{ shape: lean-l } +7["generate\_param\_list\_one\_to\_number"]@{ shape: subprocess } +5 --> 7 +8["baredSC 2d"]@{ shape: process } +7 --> 8 +2 --> 8 +4 --> 8 +1 --> 8 +3 --> 8 +0 --> 8 +9["Combine multiple 2D Models"]@{ shape: process } +8 --> 9 +2 --> 9 +4 --> 9 +6 --> 9 +1 --> 9 +3 --> 9 +0 --> 9 + +``` \ No newline at end of file diff --git a/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-cellplex_diagram.md b/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-cellplex_diagram.md new file mode 100644 index 000000000..df289a6b0 --- /dev/null +++ b/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-cellplex_diagram.md @@ -0,0 +1,33 @@ +```mermaid +graph LR +0["fastq PE collection GEX"]@{ shape: docs } +1["reference genome"]@{ shape: lean-l } +2["gtf"]@{ shape: doc } +3["cellranger\_barcodes\_3M-february-2018.txt"]@{ shape: doc } +4["Barcode Size is same size of the Read"]@{ shape: lean-l } +5["fastq PE collection CMO"]@{ shape: docs } +6["sample name and CMO sequence collection"]@{ shape: docs } +7["Number of expected cells"]@{ shape: lean-l } +8["process GEX reads"]@{ shape: subprocess } +4 --> 8 +3 --> 8 +0 --> 8 +2 --> 8 +1 --> 8 +9["CITE-seq-Count"]@{ shape: process } +7 --> 9 +5 --> 9 +6 --> 9 +3 --> 9 +10["Rename STAR-solo output"]@{ shape: process } +8 --> 10 +11["translate 10x barcode v2 to 10x barcode v1"]@{ shape: process } +9 --> 11 +12["Re-organize CITE-seq-Count output"]@{ shape: subprocess } +11 --> 12 +9 --> 12 +9 --> 12 +13["rename CITE-seq output"]@{ shape: process } +12 --> 13 + +``` \ No newline at end of file diff --git a/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-v3_diagram.md b/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-v3_diagram.md new file mode 100644 index 000000000..7a3183647 --- /dev/null +++ b/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-v3_diagram.md @@ -0,0 +1,26 @@ +```mermaid +graph LR +0["fastq PE collection"]@{ shape: docs } +1["reference genome"]@{ shape: lean-l } +2["gtf"]@{ shape: doc } +3["cellranger\_barcodes\_3M-february-2018.txt"]@{ shape: doc } +4["Barcode Size is same size of the Read"]@{ shape: lean-l } +5["RNA STARSolo"]@{ shape: process } +1 --> 5 +2 --> 5 +0 --> 5 +4 --> 5 +3 --> 5 +6["multiQC"]@{ shape: process } +5 --> 6 +5 --> 6 +7["filter cells"]@{ shape: process } +5 --> 7 +5 --> 7 +5 --> 7 +8["Re-organize STAR-solo output"]@{ shape: subprocess } +7 --> 8 +7 --> 8 +7 --> 8 + +``` \ No newline at end of file diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR_diagram.md b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR_diagram.md new file mode 100644 index 000000000..6f398ce13 --- /dev/null +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR_diagram.md @@ -0,0 +1,49 @@ +```mermaid +graph LR +0["Source AnnData file"]@{ shape: doc } +1["Pseudo-bulk: Fields to merge"]@{ shape: lean-l } +2["Group by column"]@{ shape: lean-l } +3["Sample key column"]@{ shape: lean-l } +4["Name Your Raw Counts Layer"]@{ shape: lean-l } +5["Factor fields"]@{ shape: lean-l } +6["Formula"]@{ shape: lean-l } +7["Gene symbol column"]@{ shape: lean-l } +8["Decoupler pseudo-bulk"]@{ shape: process } +1 --> 8 +5 --> 8 +2 --> 8 +0 --> 8 +4 --> 8 +3 --> 8 +9["Sanitize matrix"]@{ shape: process } +8 --> 9 +10["Sanitize factors"]@{ shape: process } +8 --> 10 +11["Remove start, end, width"]@{ shape: process } +8 --> 11 +12["Sanitize first factor for leading digits"]@{ shape: process } +10 --> 12 +13["Text reformatting"]@{ shape: process } +12 --> 13 +14["edgeR"]@{ shape: process } +11 --> 14 +13 --> 14 +6 --> 14 +9 --> 14 +12 --> 14 +15["Get contrast labels"]@{ shape: process } +14 --> 15 +16["Select gene symbols, logFC, PValue and FDR"]@{ shape: process } +7 --> 16 +14 --> 16 +17["Replace Text"]@{ shape: process } +15 --> 17 +18["Split contrasts"]@{ shape: process } +17 --> 18 +19["Contrast as parameters"]@{ shape: process } +18 --> 19 +20["Volcano Plot"]@{ shape: process } +16 --> 20 +19 --> 20 + +``` \ No newline at end of file diff --git a/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes_diagram.md b/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes_diagram.md new file mode 100644 index 000000000..beacb4819 --- /dev/null +++ b/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes_diagram.md @@ -0,0 +1,11 @@ +```mermaid +graph LR +0["BAM files with CB and UB"]@{ shape: docs } +1["filtered barcodes"]@{ shape: docs } +2["gtf file"]@{ shape: doc } +3["velocyto"]@{ shape: process } +0 --> 3 +1 --> 3 +2 --> 3 + +``` \ No newline at end of file diff --git a/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled_diagram.md b/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled_diagram.md new file mode 100644 index 000000000..67ed64d26 --- /dev/null +++ b/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled_diagram.md @@ -0,0 +1,13 @@ +```mermaid +graph LR +0["BAM files with CB and UB"]@{ shape: docs } +1["filtered matrices in bundle"]@{ shape: docs } +2["gtf file"]@{ shape: doc } +3["extract barcodes from bundle"]@{ shape: process } +1 --> 3 +4["Velocyto\_on10X\_filtered\_barcodes"]@{ shape: subprocess } +0 --> 4 +3 --> 4 +2 --> 4 + +``` \ No newline at end of file diff --git a/workflows/transcriptomics/brew3r/BREW3R_diagram.md b/workflows/transcriptomics/brew3r/BREW3R_diagram.md new file mode 100644 index 000000000..386f497fd --- /dev/null +++ b/workflows/transcriptomics/brew3r/BREW3R_diagram.md @@ -0,0 +1,25 @@ +```mermaid +graph LR +0["Input gtf"]@{ shape: doc } +1["BAM collection"]@{ shape: docs } +2["strandedness"]@{ shape: lean-l } +3["minimum coverage"]@{ shape: lean-l } +4["minimum FPKM for merge"]@{ shape: lean-l } +5["Map parameter value"]@{ shape: process } +2 --> 5 +6["Unstranded"]@{ shape: process } +2 --> 6 +7["assembl with StringTie"]@{ shape: process } +3 --> 7 +3 --> 7 +1 --> 7 +5 --> 7 +8["merge assembled transcripts"]@{ shape: process } +7 --> 8 +4 --> 8 +9["BREW3R.r"]@{ shape: process } +6 --> 9 +0 --> 9 +8 --> 9 + +``` \ No newline at end of file diff --git a/workflows/transcriptomics/goseq/goseq-go-kegg-enrichment-analsis_diagram.md b/workflows/transcriptomics/goseq/goseq-go-kegg-enrichment-analsis_diagram.md new file mode 100644 index 000000000..1449a3487 --- /dev/null +++ b/workflows/transcriptomics/goseq/goseq-go-kegg-enrichment-analsis_diagram.md @@ -0,0 +1,32 @@ +```mermaid +graph LR +0["Select genome to use"]@{ shape: lean-l } +1["Differential expression result"]@{ shape: doc } +2["Select gene ID format"]@{ shape: lean-l } +3["gene length"]@{ shape: doc } +4["KEGG pathways"]@{ shape: doc } +5["goseq - Cellular Component"]@{ shape: process } +2 --> 5 +0 --> 5 +1 --> 5 +3 --> 5 +6["goseq - Biological Process"]@{ shape: process } +2 --> 6 +0 --> 6 +1 --> 6 +3 --> 6 +7["goseq - Molecular Function"]@{ shape: process } +2 --> 7 +0 --> 7 +1 --> 7 +3 --> 7 +8["goseq - KEGG"]@{ shape: process } +2 --> 8 +0 --> 8 +1 --> 8 +3 --> 8 +9["Join two Datasets"]@{ shape: process } +8 --> 9 +4 --> 9 + +``` \ No newline at end of file diff --git a/workflows/transcriptomics/rnaseq-de/rnaseq-de-filtering-plotting_diagram.md b/workflows/transcriptomics/rnaseq-de/rnaseq-de-filtering-plotting_diagram.md new file mode 100644 index 000000000..c9ba1da97 --- /dev/null +++ b/workflows/transcriptomics/rnaseq-de/rnaseq-de-filtering-plotting_diagram.md @@ -0,0 +1,58 @@ +```mermaid +graph LR +0["Counts from changed condition"]@{ shape: docs } +1["Counts from reference condition"]@{ shape: docs } +2["Count files have header"]@{ shape: lean-l } +3["Gene Annotaton"]@{ shape: doc } +4["Adjusted p-value threshold"]@{ shape: lean-l } +5["Create text file"]@{ shape: process } +6["log2 fold change threshold"]@{ shape: lean-l } +7["Pick parameter value"]@{ shape: process } +4 --> 7 +8["Text transformation"]@{ shape: process } +5 --> 8 +9["Pick parameter value"]@{ shape: process } +6 --> 9 +10["Differential Analysis"]@{ shape: process } +2 --> 10 +7 --> 10 +0 --> 10 +1 --> 10 +11["Compose text parameter value"]@{ shape: process } +7 --> 11 +12["Compose text parameter value"]@{ shape: process } +9 --> 12 +13["Annotate DESeq2/DEXSeq output tables"]@{ shape: process } +3 --> 13 +10 --> 13 +14["Text reformatting"]@{ shape: process } +10 --> 14 +15["Annotate DESeq2 table"]@{ shape: process } +8 --> 15 +13 --> 15 +16["Parse parameter value"]@{ shape: process } +14 --> 16 +17["Filter with p-adj threshold"]@{ shape: process } +11 --> 17 +15 --> 17 +18["Generate Valcanot plot of DE genes"]@{ shape: process } +15 --> 18 +9 --> 18 +7 --> 18 +19["Compose text parameter value"]@{ shape: process } +16 --> 19 +20["Filter with log2 FC threshold"]@{ shape: process } +12 --> 20 +17 --> 20 +21["Join two Datasets"]@{ shape: process } +10 --> 21 +20 --> 21 +22["Cut"]@{ shape: process } +19 --> 22 +21 --> 22 +23["Generate Heatmap of counts"]@{ shape: process } +22 --> 23 +24["Generate Heatmap of Z-scores"]@{ shape: process } +22 --> 24 + +``` \ No newline at end of file diff --git a/workflows/transcriptomics/rnaseq-pe/rnaseq-pe_diagram.md b/workflows/transcriptomics/rnaseq-pe/rnaseq-pe_diagram.md new file mode 100644 index 000000000..55b6600bf --- /dev/null +++ b/workflows/transcriptomics/rnaseq-pe/rnaseq-pe_diagram.md @@ -0,0 +1,79 @@ +```mermaid +graph LR +0["Collection paired FASTQ files"]@{ shape: docs } +1["Forward adapter"]@{ shape: lean-l } +2["Reverse adapter"]@{ shape: lean-l } +3["Generate additional QC reports"]@{ shape: lean-l } +4["Reference genome"]@{ shape: lean-l } +5["GTF file of annotation"]@{ shape: doc } +6["Strandedness"]@{ shape: lean-l } +7["Use featureCounts for generating count tables"]@{ shape: lean-l } +8["Compute Cufflinks FPKM"]@{ shape: lean-l } +9["GTF with regions to exclude from FPKM normalization with Cufflinks"]@{ shape: doc } +10["Compute StringTie FPKM"]@{ shape: lean-l } +11["Flatten collection"]@{ shape: process } +0 --> 11 +12["remove adapters \+ bad quality bases"]@{ shape: process } +1 --> 12 +2 --> 12 +0 --> 12 +13["no additional QC"]@{ shape: process } +3 --> 13 +14["get reference\_genome as text parameter"]@{ shape: process } +4 --> 14 +15["Get featureCounts strandedness parameter"]@{ shape: process } +6 --> 15 +16["Get cufflinks strandedness parameter"]@{ shape: process } +6 --> 16 +17["Get Stringtie strandedness parameter"]@{ shape: process } +6 --> 17 +18["STAR: map and count and coverage splitted"]@{ shape: process } +4 --> 18 +5 --> 18 +12 --> 18 +19["Generate Unstranded Coverage"]@{ shape: subprocess } +18 --> 19 +18 --> 19 +20["Generate Stranded Coverage"]@{ shape: subprocess } +18 --> 20 +18 --> 20 +6 --> 20 +21["featureCounts"]@{ shape: process } +18 --> 21 +5 --> 21 +15 --> 21 +7 --> 21 +22["Compute FPKM with StringTie"]@{ shape: process } +5 --> 22 +18 --> 22 +17 --> 22 +10 --> 22 +23["Compute FPKM with cufflinks"]@{ shape: process } +16 --> 23 +9 --> 23 +14 --> 23 +18 --> 23 +5 --> 23 +8 --> 23 +24["Process Count files"]@{ shape: subprocess } +18 --> 24 +6 --> 24 +21 --> 24 +21 --> 24 +25["Combined MultiQC without additional QC"]@{ shape: process } +12 --> 25 +18 --> 25 +18 --> 25 +24 --> 25 +13 --> 25 +26["Combined MultiQC Quality Report with additional QC"]@{ shape: subprocess } +11 --> 26 +18 --> 26 +18 --> 26 +18 --> 26 +12 --> 26 +24 --> 26 +5 --> 26 +3 --> 26 + +``` \ No newline at end of file diff --git a/workflows/transcriptomics/rnaseq-sr/rnaseq-sr_diagram.md b/workflows/transcriptomics/rnaseq-sr/rnaseq-sr_diagram.md new file mode 100644 index 000000000..7ed947ddb --- /dev/null +++ b/workflows/transcriptomics/rnaseq-sr/rnaseq-sr_diagram.md @@ -0,0 +1,75 @@ +```mermaid +graph LR +0["Collection of FASTQ files"]@{ shape: docs } +1["Forward adapter"]@{ shape: lean-l } +2["Generate additional QC reports"]@{ shape: lean-l } +3["Reference genome"]@{ shape: lean-l } +4["GTF file of annotation"]@{ shape: doc } +5["Strandedness"]@{ shape: lean-l } +6["Use featureCounts for generating count tables"]@{ shape: lean-l } +7["Compute Cufflinks FPKM"]@{ shape: lean-l } +8["GTF with regions to exclude from FPKM normalization with Cufflinks"]@{ shape: doc } +9["Compute StringTie FPKM"]@{ shape: lean-l } +10["remove adapters \+ bad quality bases"]@{ shape: process } +1 --> 10 +0 --> 10 +11["no additional QC"]@{ shape: process } +2 --> 11 +12["get reference\_genome as text parameter"]@{ shape: process } +3 --> 12 +13["Get featureCounts strandedness parameter"]@{ shape: process } +5 --> 13 +14["Get cufflinks strandedness parameter"]@{ shape: process } +5 --> 14 +15["Get Stringtie strandedness parameter"]@{ shape: process } +5 --> 15 +16["STAR: map and count and coverage splitted"]@{ shape: process } +3 --> 16 +4 --> 16 +10 --> 16 +17["Generate Unstranded Coverage"]@{ shape: subprocess } +16 --> 17 +16 --> 17 +18["Generate Stranded Coverage"]@{ shape: subprocess } +16 --> 18 +16 --> 18 +5 --> 18 +19["featureCounts"]@{ shape: process } +16 --> 19 +4 --> 19 +13 --> 19 +6 --> 19 +20["Compute FPKM with StringTie"]@{ shape: process } +4 --> 20 +16 --> 20 +15 --> 20 +9 --> 20 +21["Compute FPKM with cufflinks"]@{ shape: process } +14 --> 21 +8 --> 21 +12 --> 21 +16 --> 21 +4 --> 21 +7 --> 21 +22["Process Count files"]@{ shape: subprocess } +16 --> 22 +5 --> 22 +19 --> 22 +19 --> 22 +23["Combined MultiQC without additional QC"]@{ shape: process } +10 --> 23 +16 --> 23 +16 --> 23 +22 --> 23 +11 --> 23 +24["Combined MultiQC Quality Report"]@{ shape: subprocess } +0 --> 24 +16 --> 24 +16 --> 24 +16 --> 24 +10 --> 24 +22 --> 24 +4 --> 24 +2 --> 24 + +``` \ No newline at end of file diff --git a/workflows/variant-calling/generic-variant-calling-wgs-pe/Generic-variation-analysis-on-WGS-PE-data_diagram.md b/workflows/variant-calling/generic-variant-calling-wgs-pe/Generic-variation-analysis-on-WGS-PE-data_diagram.md new file mode 100644 index 000000000..2177acb87 --- /dev/null +++ b/workflows/variant-calling/generic-variant-calling-wgs-pe/Generic-variation-analysis-on-WGS-PE-data_diagram.md @@ -0,0 +1,39 @@ +```mermaid +graph LR +0["Paired Collection"]@{ shape: docs } +1["GenBank genome"]@{ shape: doc } +2["Name for genome database"]@{ shape: lean-l } +3["fastp"]@{ shape: process } +0 --> 3 +4["SnpEff build:"]@{ shape: process } +2 --> 4 +1 --> 4 +5["Map with BWA-MEM"]@{ shape: process } +3 --> 5 +4 --> 5 +6["Samtools view"]@{ shape: process } +5 --> 6 +7["MarkDuplicates"]@{ shape: process } +6 --> 7 +8["Samtools stats"]@{ shape: process } +6 --> 8 +9["Realign reads"]@{ shape: process } +7 --> 9 +4 --> 9 +10["MultiQC"]@{ shape: process } +3 --> 10 +8 --> 10 +7 --> 10 +11["Insert indel qualities"]@{ shape: process } +9 --> 11 +4 --> 11 +12["Call variants"]@{ shape: process } +11 --> 12 +4 --> 12 +13["Lofreq filter"]@{ shape: process } +12 --> 13 +14["SnpEff eff:"]@{ shape: process } +13 --> 14 +4 --> 14 + +``` \ No newline at end of file diff --git a/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system_diagram.md b/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system_diagram.md new file mode 100644 index 000000000..03336ad4f --- /dev/null +++ b/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system_diagram.md @@ -0,0 +1,40 @@ +```mermaid +graph LR +0["Paired Collection"]@{ shape: docs } +1["Annotation GTF"]@{ shape: doc } +2["Genome fasta"]@{ shape: doc } +3["fastp"]@{ shape: process } +0 --> 3 +4["SnpEff build:"]@{ shape: process } +1 --> 4 +2 --> 4 +5["Map with BWA-MEM"]@{ shape: process } +3 --> 5 +2 --> 5 +6["Samtools view"]@{ shape: process } +5 --> 6 +7["Samtools stats"]@{ shape: process } +6 --> 7 +8["MarkDuplicates"]@{ shape: process } +6 --> 8 +9["MultiQC"]@{ shape: process } +3 --> 9 +7 --> 9 +8 --> 9 +10["Realign reads"]@{ shape: process } +8 --> 10 +2 --> 10 +11["Call variants"]@{ shape: process } +10 --> 11 +2 --> 11 +12["Text reformatting"]@{ shape: process } +11 --> 12 +13["SnpEff eff:"]@{ shape: process } +12 --> 13 +4 --> 13 +14["SnpSift Extract Fields"]@{ shape: process } +13 --> 14 +15["Collapse Collection"]@{ shape: process } +14 --> 15 + +``` \ No newline at end of file diff --git a/workflows/variant-calling/variation-reporting/Generic-variation-analysis-reporting_diagram.md b/workflows/variant-calling/variation-reporting/Generic-variation-analysis-reporting_diagram.md new file mode 100644 index 000000000..ebea307e9 --- /dev/null +++ b/workflows/variant-calling/variation-reporting/Generic-variation-analysis-reporting_diagram.md @@ -0,0 +1,80 @@ +```mermaid +graph LR +0["Variation data to report"]@{ shape: docs } +1["AF Filter"]@{ shape: lean-l } +2["DP Filter"]@{ shape: lean-l } +3["DP\_ALT Filter"]@{ shape: lean-l } +4["SnpSift Filter"]@{ shape: process } +0 --> 4 +5["Compose text parameter value"]@{ shape: process } +1 --> 5 +2 --> 5 +3 --> 5 +6["Compose text parameter value"]@{ shape: process } +1 --> 6 +2 --> 6 +3 --> 6 +7["SnpSift Filter"]@{ shape: process } +5 --> 7 +6 --> 7 +4 --> 7 +8["SnpSift Extract Fields"]@{ shape: process } +7 --> 8 +9["Compute"]@{ shape: process } +8 --> 9 +10["Datamash"]@{ shape: process } +9 --> 10 +11["Replace"]@{ shape: process } +10 --> 11 +12["Replace"]@{ shape: process } +11 --> 12 +13["Replace"]@{ shape: process } +12 --> 13 +14["Collapse Collection"]@{ shape: process } +13 --> 14 +15["Compute"]@{ shape: process } +14 --> 15 +16["Compute"]@{ shape: process } +15 --> 16 +17["Replace"]@{ shape: process } +16 --> 17 +18["Datamash"]@{ shape: process } +17 --> 18 +19["Filter"]@{ shape: process } +17 --> 19 +20["Datamash"]@{ shape: process } +17 --> 20 +21["Datamash"]@{ shape: process } +19 --> 21 +22["Join"]@{ shape: process } +19 --> 22 +18 --> 22 +23["Datamash"]@{ shape: process } +19 --> 23 +24["Join"]@{ shape: process } +16 --> 24 +21 --> 24 +25["Datamash"]@{ shape: process } +22 --> 25 +26["Join"]@{ shape: process } +17 --> 26 +23 --> 26 +27["Cut"]@{ shape: process } +24 --> 27 +28["Cut"]@{ shape: process } +25 --> 28 +29["Join"]@{ shape: process } +26 --> 29 +20 --> 29 +30["Split file"]@{ shape: process } +27 --> 30 +31["Replace"]@{ shape: process } +28 --> 31 +32["Cut"]@{ shape: process } +29 --> 32 +33["Sort"]@{ shape: process } +31 --> 33 +34["Sort"]@{ shape: process } +32 --> 34 + +``` \ No newline at end of file diff --git a/workflows/virology/pox-virus-amplicon/pox-virus-half-genome_diagram.md b/workflows/virology/pox-virus-amplicon/pox-virus-half-genome_diagram.md new file mode 100644 index 000000000..83772a57c --- /dev/null +++ b/workflows/virology/pox-virus-amplicon/pox-virus-half-genome_diagram.md @@ -0,0 +1,106 @@ +```mermaid +graph LR +0["Reference FASTA"]@{ shape: doc } +1["Primer Scheme"]@{ shape: doc } +2["PE Reads Pool1"]@{ shape: docs } +3["PE Reads Pool2"]@{ shape: docs } +4["Minimum quality score to call base"]@{ shape: lean-l } +5["Allele frequency to call SNV"]@{ shape: lean-l } +6["Allele frequency to call indel"]@{ shape: lean-l } +7["Compute sequence length"]@{ shape: process } +0 --> 7 +8["Select pool1 primers"]@{ shape: process } +1 --> 8 +9["Select pool2 primers"]@{ shape: process } +1 --> 9 +10["Extract element identifiers"]@{ shape: process } +2 --> 10 +11["fastp: Trimmed Illumina Reads Pool1"]@{ shape: process } +2 --> 11 +12["Cut"]@{ shape: process } +7 --> 12 +13["Datamash"]@{ shape: process } +8 --> 13 +14["Datamash"]@{ shape: process } +9 --> 14 +15["Split file"]@{ shape: process } +10 --> 15 +16["Sort collection"]@{ shape: process } +3 --> 16 +10 --> 16 +17["Get end position of sequence"]@{ shape: process } +12 --> 17 +18["Get end position of Pool1"]@{ shape: process } +13 --> 18 +19["Get start position of Pool2"]@{ shape: process } +14 --> 19 +20["Parse parameter value"]@{ shape: process } +15 --> 20 +21["fastp: Trimmed Illumina Reads Pool2"]@{ shape: process } +16 --> 21 +22["Compose text parameter value"]@{ shape: process } +18 --> 22 +17 --> 22 +23["Compose text parameter value"]@{ shape: process } +19 --> 23 +24["Compose text parameter value"]@{ shape: process } +20 --> 24 +25["Compose text parameter value"]@{ shape: process } +20 --> 25 +26["Mask Reference for Pool1"]@{ shape: process } +0 --> 26 +22 --> 26 +27["Mask Reference for Pool2"]@{ shape: process } +0 --> 27 +23 --> 27 +28["Map with BWA-MEM"]@{ shape: process } +11 --> 28 +26 --> 28 +24 --> 28 +29["Map with BWA-MEM"]@{ shape: process } +21 --> 29 +27 --> 29 +25 --> 29 +30["Samtools view"]@{ shape: process } +28 --> 30 +31["Samtools stats"]@{ shape: process } +28 --> 31 +32["Samtools view"]@{ shape: process } +29 --> 32 +33["Samtools stats"]@{ shape: process } +29 --> 33 +34["MultiQC"]@{ shape: process } +11 --> 34 +31 --> 34 +35["Zip collections"]@{ shape: process } +30 --> 35 +32 --> 35 +36["MultiQC"]@{ shape: process } +21 --> 36 +33 --> 36 +37["Apply rules"]@{ shape: process } +35 --> 37 +38["Samtools merge"]@{ shape: process } +37 --> 38 +39["QualiMap BamQC"]@{ shape: process } +38 --> 39 +40["ivar trim"]@{ shape: process } +38 --> 40 +1 --> 40 +41["Filter failed datasets"]@{ shape: process } +39 --> 41 +42["ivar consensus"]@{ shape: process } +40 --> 42 +5 --> 42 +6 --> 42 +4 --> 42 +43["Flatten collection"]@{ shape: process } +41 --> 43 +44["Text transformation"]@{ shape: process } +42 --> 44 +45["MultiQC"]@{ shape: process } +43 --> 45 +46["Concatenate datasets"]@{ shape: process } +44 --> 46 + +``` \ No newline at end of file