diff --git a/workflows/VGP-assembly-v2/Assembly-Hifi-HiC-phasing-VGP4/Assembly-Hifi-HiC-phasing-VGP4_diagrams.md b/workflows/VGP-assembly-v2/Assembly-Hifi-HiC-phasing-VGP4/Assembly-Hifi-HiC-phasing-VGP4_diagrams.md new file mode 100644 index 000000000..5dd96cf20 --- /dev/null +++ b/workflows/VGP-assembly-v2/Assembly-Hifi-HiC-phasing-VGP4/Assembly-Hifi-HiC-phasing-VGP4_diagrams.md @@ -0,0 +1,168 @@ +# Workflow diagrams + +## Assembly-Hifi-HiC-phasing-VGP4 + +```mermaid +graph LR +0["Pacbio Reads Collection"]@{ shape: docs } +1["HiC forward reads"]@{ shape: doc } +2["HiC reverse reads"]@{ shape: doc } +3["Genomescope Summary"]@{ shape: doc } +4["Meryl Database"]@{ shape: doc } +5["Database for Busco Lineage"]@{ shape: lean-l } +6["Lineage"]@{ shape: lean-l } +7["Name for Haplotype 1"]@{ shape: lean-l } +8["Name for Haplotype 2"]@{ shape: lean-l } +9["Bits for bloom filter"]@{ shape: lean-l } +10["SAK input file"]@{ shape: doc } +11["Homozygous Read Coverage"]@{ shape: lean-l } +12["Genomescope Model Parameters"]@{ shape: doc } +13["Cutadapt"]@{ shape: process } +0 --> 13 +14["Search in textfiles"]@{ shape: process } +3 --> 14 +15["Compute"]@{ shape: process } +12 --> 15 +16["MultiQC"]@{ shape: process } +13 --> 16 +17["Replace Text"]@{ shape: process } +14 --> 17 +18["Cut"]@{ shape: process } +15 --> 18 +19["Convert"]@{ shape: process } +17 --> 19 +20["Estimated homozygous read coverage"]@{ shape: process } +18 --> 20 +21["Cut"]@{ shape: process } +19 --> 21 +22["Homozygous read coverage for Hifiasm"]@{ shape: process } +11 --> 22 +20 --> 22 +23["Estimated genome size"]@{ shape: process } +21 --> 23 +24["Hifiasm"]@{ shape: process } +22 --> 24 +9 --> 24 +1 --> 24 +2 --> 24 +13 --> 24 +25["Raw Unitig Image"]@{ shape: process } +24 --> 25 +26["gfastats"]@{ shape: process } +24 --> 26 +23 --> 26 +27["gfastats"]@{ shape: process } +24 --> 27 +23 --> 27 +28["gfastats"]@{ shape: process } +24 --> 28 +29["gfastats"]@{ shape: process } +24 --> 29 +30["gfastats"]@{ shape: process } +24 --> 30 +31["gfastats"]@{ shape: process } +24 --> 31 +32["gfastats"]@{ shape: process } +24 --> 32 +10 --> 32 +33["gfastats"]@{ shape: process } +24 --> 33 +10 --> 33 +34["Text reformatting"]@{ shape: process } +26 --> 34 +35["Text reformatting"]@{ shape: process } +27 --> 35 +36["Data Prep Hap2"]@{ shape: subprocess } +28 --> 36 +37["Data Prep Hap1"]@{ shape: subprocess } +30 --> 37 +38["Text transformation"]@{ shape: process } +32 --> 38 +39["Text transformation"]@{ shape: process } +33 --> 39 +40["Join two Datasets"]@{ shape: process } +35 --> 40 +34 --> 40 +41["Plot Data"]@{ shape: subprocess } +36 --> 41 +8 --> 41 +7 --> 41 +37 --> 41 +42["Busco"]@{ shape: process } +38 --> 42 +5 --> 42 +6 --> 42 +43["Merqury"]@{ shape: process } +39 --> 43 +38 --> 43 +4 --> 43 +44["Busco"]@{ shape: process } +39 --> 44 +5 --> 44 +6 --> 44 +45["Advanced Cut"]@{ shape: process } +40 --> 45 +46["output_merqury.spectra-cn.fl"]@{ shape: process } +43 --> 46 +47["output_merqury.spectra-asm.fl"]@{ shape: process } +43 --> 47 +48["merqury_qv"]@{ shape: process } +43 --> 48 +49["output_merqury.assembly_01.spectra-cn.fl"]@{ shape: process } +43 --> 49 +50["merqury_stats"]@{ shape: process } +43 --> 50 +51["output_merqury.assembly_02.spectra-cn.fl"]@{ shape: process } +43 --> 51 +52["Replace"]@{ shape: process } +45 --> 52 +``` + +## gfastats_plot + +```mermaid +graph LR +0["Primary data"]@{ shape: doc } +1["Alternate data"]@{ shape: doc } +2["Name of primary assembly"]@{ shape: lean-l } +3["Name of alternate assembly"]@{ shape: lean-l } +4["Add column"]@{ shape: process } +2 --> 4 +0 --> 4 +5["Add column"]@{ shape: process } +3 --> 5 +1 --> 5 +6["Concatenate datasets"]@{ shape: process } +4 --> 6 +5 --> 6 +7["Cut"]@{ shape: process } +6 --> 7 +8["Cut"]@{ shape: process } +6 --> 8 +9["Nx Plot"]@{ shape: process } +7 --> 9 +10["Size Plot"]@{ shape: process } +8 --> 10 +``` + +## gfastats_data_prep + +```mermaid +graph LR +0["gfa_stats"]@{ shape: doc } +1["Sort"]@{ shape: process } +0 --> 1 +2["Text reformatting"]@{ shape: process } +1 --> 2 +3["Datamash"]@{ shape: process } +2 --> 3 +4["Add column"]@{ shape: process } +2 --> 4 +5["Parse parameter value"]@{ shape: process } +3 --> 5 +6["Compose text parameter value"]@{ shape: process } +5 --> 6 +7["Compute"]@{ shape: process } +4 --> 7 +6 --> 7 +``` diff --git a/workflows/VGP-assembly-v2/Assembly-Hifi-Trio-phasing-VGP5/Assembly-Hifi-Trio-phasing-VGP5_diagrams.md b/workflows/VGP-assembly-v2/Assembly-Hifi-Trio-phasing-VGP5/Assembly-Hifi-Trio-phasing-VGP5_diagrams.md new file mode 100644 index 000000000..34e326dfe --- /dev/null +++ b/workflows/VGP-assembly-v2/Assembly-Hifi-Trio-phasing-VGP5/Assembly-Hifi-Trio-phasing-VGP5_diagrams.md @@ -0,0 +1,172 @@ +# Workflow diagrams + +## Assembly-Hifi-Trio-phasing-VGP5 + +```mermaid +graph LR +0["Pacbio Reads Collection : child"]@{ shape: docs } +1["Paternal Illumina reads (hap1)"]@{ shape: docs } +2["Maternal Illumina reads (hap2)"]@{ shape: docs } +3["Meryl Database : Child"]@{ shape: doc } +4["Hapmer Database : Paternal"]@{ shape: doc } +5["Hapmer Database : Maternal"]@{ shape: doc } +6["Bits for bloom filter"]@{ shape: lean-l } +7["Database for Busco Lineage"]@{ shape: lean-l } +8["Lineage"]@{ shape: lean-l } +9["Homozygous Read Coverage"]@{ shape: lean-l } +10["Genomescope Model Parameters"]@{ shape: doc } +11["Genomescope Summary"]@{ shape: doc } +12["Utilize homology information to correct trio-phasing errors"]@{ shape: lean-l } +13["SAK input file (Optional)"]@{ shape: doc } +14["Name for Haplotype 1"]@{ shape: lean-l } +15["Name for Haplotype 2"]@{ shape: lean-l } +16["Cutadapt"]@{ shape: process } +0 --> 16 +17["Compute"]@{ shape: process } +10 --> 17 +18["Search in textfiles"]@{ shape: process } +11 --> 18 +19["MultiQC"]@{ shape: process } +16 --> 19 +20["Cut"]@{ shape: process } +17 --> 20 +21["Replace Text"]@{ shape: process } +18 --> 21 +22["Parse parameter value"]@{ shape: process } +20 --> 22 +23["Convert"]@{ shape: process } +21 --> 23 +24["Homozygous read coverage for Hifiasm"]@{ shape: process } +9 --> 24 +22 --> 24 +25["Cut"]@{ shape: process } +23 --> 25 +26["Hifiasm"]@{ shape: process } +24 --> 26 +6 --> 26 +16 --> 26 +12 --> 26 +1 --> 26 +2 --> 26 +27["Estimated genome size"]@{ shape: process } +25 --> 27 +28["gfastats"]@{ shape: process } +26 --> 28 +13 --> 28 +29["gfastats"]@{ shape: process } +26 --> 29 +13 --> 29 +30["Raw Unitig Image"]@{ shape: process } +26 --> 30 +31["gfastats"]@{ shape: process } +26 --> 31 +13 --> 31 +32["gfastats"]@{ shape: process } +26 --> 32 +13 --> 32 +33["gfastats"]@{ shape: process } +26 --> 33 +34["gfastats"]@{ shape: process } +26 --> 34 +35["gfastats"]@{ shape: process } +26 --> 35 +27 --> 35 +36["gfastats"]@{ shape: process } +26 --> 36 +27 --> 36 +37["Busco"]@{ shape: process } +31 --> 37 +7 --> 37 +8 --> 37 +38["Busco"]@{ shape: process } +32 --> 38 +7 --> 38 +8 --> 38 +39["Merqury"]@{ shape: process } +31 --> 39 +32 --> 39 +3 --> 39 +5 --> 39 +4 --> 39 +40["Data prep Hap1"]@{ shape: subprocess } +33 --> 40 +41["Data Prep Hap2"]@{ shape: subprocess } +34 --> 41 +42["Text reformatting"]@{ shape: process } +35 --> 42 +43["Text reformatting"]@{ shape: process } +36 --> 43 +44["merqury_qv"]@{ shape: process } +39 --> 44 +45["output_merqury.spectra-cn.fl"]@{ shape: process } +39 --> 45 +46["output_merqury.spectra-asm.fl"]@{ shape: process } +39 --> 46 +47["output_merqury.assembly_01.spectra-cn.fl"]@{ shape: process } +39 --> 47 +48["output_merqury.assembly_02.spectra-cn.fl"]@{ shape: process } +39 --> 48 +49["merqury_stats"]@{ shape: process } +39 --> 49 +50["Plots"]@{ shape: subprocess } +41 --> 50 +15 --> 50 +14 --> 50 +40 --> 50 +51["Join two Datasets"]@{ shape: process } +43 --> 51 +42 --> 51 +52["Advanced Cut"]@{ shape: process } +51 --> 52 +53["Replace"]@{ shape: process } +52 --> 53 +``` + +## gfastats_plot + +```mermaid +graph LR +0["Primary data"]@{ shape: doc } +1["Alternate data"]@{ shape: doc } +2["Name of primary assembly"]@{ shape: lean-l } +3["Name of alternate assembly"]@{ shape: lean-l } +4["Add column"]@{ shape: process } +2 --> 4 +0 --> 4 +5["Add column"]@{ shape: process } +3 --> 5 +1 --> 5 +6["Concatenate datasets"]@{ shape: process } +4 --> 6 +5 --> 6 +7["Cut"]@{ shape: process } +6 --> 7 +8["Cut"]@{ shape: process } +6 --> 8 +9["Nx Plot"]@{ shape: process } +7 --> 9 +10["Size Plot"]@{ shape: process } +8 --> 10 +``` + +## gfastats_data_prep + +```mermaid +graph LR +0["gfa_stats"]@{ shape: doc } +1["Sort"]@{ shape: process } +0 --> 1 +2["Text reformatting"]@{ shape: process } +1 --> 2 +3["Datamash"]@{ shape: process } +2 --> 3 +4["Add column"]@{ shape: process } +2 --> 4 +5["Parse parameter value"]@{ shape: process } +3 --> 5 +6["Compose text parameter value"]@{ shape: process } +5 --> 6 +7["Compute"]@{ shape: process } +4 --> 7 +6 --> 7 +``` diff --git a/workflows/VGP-assembly-v2/Assembly-Hifi-only-VGP3/Assembly-Hifi-only-VGP3_diagrams.md b/workflows/VGP-assembly-v2/Assembly-Hifi-only-VGP3/Assembly-Hifi-only-VGP3_diagrams.md new file mode 100644 index 000000000..0da831ca6 --- /dev/null +++ b/workflows/VGP-assembly-v2/Assembly-Hifi-only-VGP3/Assembly-Hifi-only-VGP3_diagrams.md @@ -0,0 +1,158 @@ +# Workflow diagrams + +## Assembly-Hifi-only-VGP3 + +```mermaid +graph LR +0["Pacbio Reads Collection"]@{ shape: docs } +1["Meryl Database"]@{ shape: doc } +2["Genomescope Summary"]@{ shape: doc } +3["Bits for Hifiasm bloom filter"]@{ shape: lean-l } +4["Homozygous Read Coverage"]@{ shape: lean-l } +5["Genomescope Model Parameters"]@{ shape: doc } +6["Database for Busco Lineage"]@{ shape: lean-l } +7["Lineage"]@{ shape: lean-l } +8["SAK input file (Optional)"]@{ shape: doc } +9["Name of primary assembly"]@{ shape: lean-l } +10["Name of alternate assembly"]@{ shape: lean-l } +11["Cutadapt"]@{ shape: process } +0 --> 11 +12["Search in textfiles"]@{ shape: process } +2 --> 12 +13["Pick parameter value"]@{ shape: process } +3 --> 13 +14["Compute"]@{ shape: process } +5 --> 14 +15["MultiQC"]@{ shape: process } +11 --> 15 +16["Replace Text"]@{ shape: process } +12 --> 16 +17["Cut"]@{ shape: process } +14 --> 17 +18["Convert"]@{ shape: process } +16 --> 18 +19["Parse parameter value"]@{ shape: process } +17 --> 19 +20["Cut"]@{ shape: process } +18 --> 20 +21["Homozygous read coverage for Hifiasm"]@{ shape: process } +4 --> 21 +19 --> 21 +22["Estimated genome size"]@{ shape: process } +20 --> 22 +23["Hifiasm"]@{ shape: process } +21 --> 23 +13 --> 23 +11 --> 23 +24["Raw Unitig Image"]@{ shape: process } +23 --> 24 +25["gfastats"]@{ shape: process } +23 --> 25 +8 --> 25 +26["gfastats"]@{ shape: process } +23 --> 26 +8 --> 26 +27["gfastats"]@{ shape: process } +23 --> 27 +8 --> 27 +28["gfastats"]@{ shape: process } +23 --> 28 +8 --> 28 +29["gfastats"]@{ shape: process } +23 --> 29 +22 --> 29 +30["gfastats"]@{ shape: process } +23 --> 30 +22 --> 30 +31["gfastats"]@{ shape: process } +23 --> 31 +32["gfastats"]@{ shape: process } +23 --> 32 +33["Busco"]@{ shape: process } +27 --> 33 +6 --> 33 +7 --> 33 +34["Merqury"]@{ shape: process } +27 --> 34 +28 --> 34 +1 --> 34 +35["Text reformatting"]@{ shape: process } +29 --> 35 +36["Text reformatting"]@{ shape: process } +30 --> 36 +37["Data Prep Primary"]@{ shape: subprocess } +31 --> 37 +38["Data Prep Alternate"]@{ shape: subprocess } +32 --> 38 +39["merqury_qv"]@{ shape: process } +34 --> 39 +40["output_merqury.spectra-cn.fl"]@{ shape: process } +34 --> 40 +41["output_merqury.spectra-asm.fl"]@{ shape: process } +34 --> 41 +42["output_merqury.assembly_01.spectra-cn.fl"]@{ shape: process } +34 --> 42 +43["merqury_stats"]@{ shape: process } +34 --> 43 +44["Join two Datasets"]@{ shape: process } +35 --> 44 +36 --> 44 +45["Plotting Nx and Sizes"]@{ shape: subprocess } +38 --> 45 +10 --> 45 +9 --> 45 +37 --> 45 +46["Advanced Cut"]@{ shape: process } +44 --> 46 +47["Replace"]@{ shape: process } +46 --> 47 +``` + +## gfastats_plot + +```mermaid +graph LR +0["Primary data"]@{ shape: doc } +1["Alternate data"]@{ shape: doc } +2["Name of primary assembly"]@{ shape: lean-l } +3["Name of alternate assembly"]@{ shape: lean-l } +4["Add column"]@{ shape: process } +2 --> 4 +0 --> 4 +5["Add column"]@{ shape: process } +3 --> 5 +1 --> 5 +6["Concatenate datasets"]@{ shape: process } +4 --> 6 +5 --> 6 +7["Cut"]@{ shape: process } +6 --> 7 +8["Cut"]@{ shape: process } +6 --> 8 +9["Nx Plot"]@{ shape: process } +7 --> 9 +10["Size Plot"]@{ shape: process } +8 --> 10 +``` + +## gfastats_data_prep + +```mermaid +graph LR +0["gfa_stats"]@{ shape: doc } +1["Sort"]@{ shape: process } +0 --> 1 +2["Text reformatting"]@{ shape: process } +1 --> 2 +3["Datamash"]@{ shape: process } +2 --> 3 +4["Add column"]@{ shape: process } +2 --> 4 +5["Parse parameter value"]@{ shape: process } +3 --> 5 +6["Compose text parameter value"]@{ shape: process } +5 --> 6 +7["Compute"]@{ shape: process } +4 --> 7 +6 --> 7 +``` diff --git a/workflows/VGP-assembly-v2/Assembly-decontamination-VGP9/Assembly-decontamination-VGP9_diagrams.md b/workflows/VGP-assembly-v2/Assembly-decontamination-VGP9/Assembly-decontamination-VGP9_diagrams.md new file mode 100644 index 000000000..119574e8c --- /dev/null +++ b/workflows/VGP-assembly-v2/Assembly-decontamination-VGP9/Assembly-decontamination-VGP9_diagrams.md @@ -0,0 +1,30 @@ +# Workflow diagrams + +## Assembly-decontamination-VGP9 + +```mermaid +graph LR +0["Scaffolded assembly (fasta)"]@{ shape: doc } +1["Database for Kraken2"]@{ shape: lean-l } +2["soft-masking "]@{ shape: process } +0 --> 2 +3["hard-masking"]@{ shape: process } +2 --> 3 +4["ID non-target contaminants"]@{ shape: process } +1 --> 4 +3 --> 4 +5["blast mitochondria DB"]@{ shape: process } +3 --> 5 +6["Cut"]@{ shape: process } +4 --> 6 +7["parsing blast output"]@{ shape: process } +5 --> 7 +8["Filter"]@{ shape: process } +6 --> 8 +9["Cut"]@{ shape: process } +8 --> 9 +10["concatenate scaffold lists"]@{ shape: process } +11["removing scaffolds "]@{ shape: process } +0 --> 11 +10 --> 11 +``` diff --git a/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0_diagrams.md b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0_diagrams.md new file mode 100644 index 000000000..4e5732e43 --- /dev/null +++ b/workflows/VGP-assembly-v2/Mitogenome-assembly-VGP0/Mitogenome-Assembly-VGP0_diagrams.md @@ -0,0 +1,19 @@ +# Workflow diagrams + +## Mitogenome-Assembly-VGP0 + +```mermaid +graph LR +0["Collection of Pacbio Data"]@{ shape: docs } +1["Species name (latin name)"]@{ shape: lean-l } +2["Email adress"]@{ shape: lean-l } +3["MitoHiFi"]@{ shape: process } +2 --> 3 +1 --> 3 +4["MitoHiFi"]@{ shape: process } +0 --> 4 +3 --> 4 +3 --> 4 +5["Compress file(s)"]@{ shape: process } +4 --> 5 +``` diff --git a/workflows/VGP-assembly-v2/Plot-Nx-Size/Generate-Nx-and-Size-plots-for-multiple-assemblies_diagrams.md b/workflows/VGP-assembly-v2/Plot-Nx-Size/Generate-Nx-and-Size-plots-for-multiple-assemblies_diagrams.md new file mode 100644 index 000000000..ca7050670 --- /dev/null +++ b/workflows/VGP-assembly-v2/Plot-Nx-Size/Generate-Nx-and-Size-plots-for-multiple-assemblies_diagrams.md @@ -0,0 +1,37 @@ +# Workflow diagrams + +## Generate Nx and Size plots for multiple assemblies + +```mermaid +graph LR +0["Collection of genomes to plot"]@{ shape: docs } +1["gfastats"]@{ shape: process } +0 --> 1 +2["Sort"]@{ shape: process } +1 --> 2 +3["Text reformatting"]@{ shape: process } +2 --> 3 +4["Datamash"]@{ shape: process } +3 --> 4 +5["Add column"]@{ shape: process } +3 --> 5 +6["Parse parameter value"]@{ shape: process } +4 --> 6 +7["Compose text parameter value"]@{ shape: process } +6 --> 7 +8["Compute"]@{ shape: process } +5 --> 8 +7 --> 8 +9["Add input name as column"]@{ shape: process } +8 --> 9 +10["Collapse Collection"]@{ shape: process } +9 --> 10 +11["Cut"]@{ shape: process } +10 --> 11 +12["Cut"]@{ shape: process } +10 --> 12 +13["Nx Plot"]@{ shape: process } +11 --> 13 +14["Size Plot"]@{ shape: process } +12 --> 14 +``` diff --git a/workflows/VGP-assembly-v2/Purge-duplicate-contigs-VGP6/Purge-duplicate-contigs-VGP6_diagrams.md b/workflows/VGP-assembly-v2/Purge-duplicate-contigs-VGP6/Purge-duplicate-contigs-VGP6_diagrams.md new file mode 100644 index 000000000..27094df20 --- /dev/null +++ b/workflows/VGP-assembly-v2/Purge-duplicate-contigs-VGP6/Purge-duplicate-contigs-VGP6_diagrams.md @@ -0,0 +1,182 @@ +# Workflow diagrams + +## Purge-duplicate-contigs-VGP6 + +```mermaid +graph LR +0["Pacbio Reads Collection - Trimmed"]@{ shape: docs } +1["Hifiasm Primary assembly"]@{ shape: doc } +2["Hifiasm Alternate assembly"]@{ shape: doc } +3["Meryl Database"]@{ shape: doc } +4["Genomescope model parameters"]@{ shape: doc } +5["Estimated genome size - Parameter File"]@{ shape: doc } +6["Database for Busco Lineage"]@{ shape: lean-l } +7["Lineage"]@{ shape: lean-l } +8["SAK input file"]@{ shape: doc } +9["Name of primary assembly"]@{ shape: lean-l } +10["Name of alternate assembly"]@{ shape: lean-l } +11["Map with minimap2"]@{ shape: process } +0 --> 11 +1 --> 11 +12["Purge overlaps"]@{ shape: process } +1 --> 12 +13["Compute"]@{ shape: process } +4 --> 13 +14["Estimated genome size"]@{ shape: process } +5 --> 14 +15["Awk command for primary assembly"]@{ shape: process } +9 --> 15 +16["Awk command for alternate assembly 2"]@{ shape: process } +10 --> 16 +17["Map with minimap2"]@{ shape: process } +12 --> 17 +12 --> 17 +18["Cut"]@{ shape: process } +13 --> 18 +19["Cut"]@{ shape: process } +13 --> 19 +20["Parse parameter value"]@{ shape: process } +18 --> 20 +21["Parse parameter value"]@{ shape: process } +19 --> 21 +22["Purge overlaps"]@{ shape: process } +11 --> 22 +21 --> 22 +20 --> 22 +23["Purge overlaps"]@{ shape: process } +22 --> 23 +22 --> 23 +17 --> 23 +24["Purge overlaps"]@{ shape: process } +23 --> 24 +1 --> 24 +25["Concatenate datasets"]@{ shape: process } +24 --> 25 +2 --> 25 +26["Busco"]@{ shape: process } +24 --> 26 +6 --> 26 +7 --> 26 +27["gfastats"]@{ shape: process } +24 --> 27 +8 --> 27 +28["gfastats"]@{ shape: process } +24 --> 28 +14 --> 28 +29["gfastats"]@{ shape: process } +24 --> 29 +30["Map with minimap2"]@{ shape: process } +0 --> 30 +25 --> 30 +31["Purge overlaps"]@{ shape: process } +25 --> 31 +32["Text reformatting"]@{ shape: process } +15 --> 32 +28 --> 32 +33["gfastats_data_prep"]@{ shape: subprocess } +29 --> 33 +34["Purge overlaps"]@{ shape: process } +30 --> 34 +21 --> 34 +20 --> 34 +35["Map with minimap2"]@{ shape: process } +31 --> 35 +31 --> 35 +36["Purge overlaps"]@{ shape: process } +34 --> 36 +34 --> 36 +35 --> 36 +37["Purge overlaps"]@{ shape: process } +36 --> 37 +25 --> 37 +38["gfastats"]@{ shape: process } +37 --> 38 +14 --> 38 +39["gfastats"]@{ shape: process } +37 --> 39 +40["Merqury"]@{ shape: process } +24 --> 40 +37 --> 40 +3 --> 40 +41["gfastats"]@{ shape: process } +37 --> 41 +8 --> 41 +42["Text reformatting"]@{ shape: process } +16 --> 42 +38 --> 42 +43["gfastats_data_prep"]@{ shape: subprocess } +39 --> 43 +44["merqury_QV"]@{ shape: process } +40 --> 44 +45["output_merqury.spectra-cn.fl"]@{ shape: process } +40 --> 45 +46["output_merqury.spectra-asm.fl"]@{ shape: process } +40 --> 46 +47["output_merqury.assembly_01.spectra-cn.fl"]@{ shape: process } +40 --> 47 +48["merqury_stats"]@{ shape: process } +40 --> 48 +49["output_merqury.assembly_02.spectra-cn.fl"]@{ shape: process } +40 --> 49 +50["Join two Datasets"]@{ shape: process } +32 --> 50 +42 --> 50 +51["gfastats_plot"]@{ shape: subprocess } +43 --> 51 +10 --> 51 +9 --> 51 +33 --> 51 +52["Advanced Cut"]@{ shape: process } +50 --> 52 +53["Replace"]@{ shape: process } +52 --> 53 +``` + +## gfastats_plot + +```mermaid +graph LR +0["Primary data"]@{ shape: doc } +1["Alternate data"]@{ shape: doc } +2["Name of primary assembly"]@{ shape: lean-l } +3["Name of alternate assembly"]@{ shape: lean-l } +4["Add column"]@{ shape: process } +2 --> 4 +0 --> 4 +5["Add column"]@{ shape: process } +3 --> 5 +1 --> 5 +6["Concatenate datasets"]@{ shape: process } +4 --> 6 +5 --> 6 +7["Cut"]@{ shape: process } +6 --> 7 +8["Cut"]@{ shape: process } +6 --> 8 +9["Nx Plot"]@{ shape: process } +7 --> 9 +10["Size Plot"]@{ shape: process } +8 --> 10 +``` + +## gfastats_data_prep + +```mermaid +graph LR +0["gfa_stats"]@{ shape: doc } +1["Sort"]@{ shape: process } +0 --> 1 +2["Text reformatting"]@{ shape: process } +1 --> 2 +3["Datamash"]@{ shape: process } +2 --> 3 +4["Add column"]@{ shape: process } +2 --> 4 +5["Parse parameter value"]@{ shape: process } +3 --> 5 +6["Compose text parameter value"]@{ shape: process } +5 --> 6 +7["Compute"]@{ shape: process } +4 --> 7 +6 --> 7 +``` diff --git a/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b_diagrams.md b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b_diagrams.md new file mode 100644 index 000000000..eb2bbf03c --- /dev/null +++ b/workflows/VGP-assembly-v2/Purge-duplicates-one-haplotype-VGP6b/Purging-duplicates-one-haplotype-VGP6b_diagrams.md @@ -0,0 +1,151 @@ +# Workflow diagrams + +## Purging-duplicates-one-haplotype-VGP6b + +```mermaid +graph LR +0["Genomescope model parameters"]@{ shape: doc } +1["Pacbio Reads Collection - Trimmed"]@{ shape: docs } +2["Assembly to purge"]@{ shape: doc } +3["Meryl Database"]@{ shape: doc } +4["Assembly to leave alone (For Merqury comparison)"]@{ shape: doc } +5["Estimated genome size - Parameter File"]@{ shape: doc } +6["Database for Busco Lineage"]@{ shape: lean-l } +7["Lineage"]@{ shape: lean-l } +8["Name of purged assembly"]@{ shape: lean-l } +9["Name of un-altered assembly"]@{ shape: lean-l } +10["Compute"]@{ shape: process } +0 --> 10 +11["Map with minimap2"]@{ shape: process } +1 --> 11 +2 --> 11 +12["Purge overlaps"]@{ shape: process } +2 --> 12 +13["gfastats"]@{ shape: process } +4 --> 13 +14["Estimated genome size"]@{ shape: process } +5 --> 14 +15["Cut"]@{ shape: process } +10 --> 15 +16["Cut"]@{ shape: process } +10 --> 16 +17["Map with minimap2"]@{ shape: process } +12 --> 17 +12 --> 17 +18["gfastats_data_prep"]@{ shape: subprocess } +13 --> 18 +19["gfastats"]@{ shape: process } +4 --> 19 +14 --> 19 +20["Parse parameter value"]@{ shape: process } +15 --> 20 +21["Parse parameter value"]@{ shape: process } +16 --> 21 +22["Text reformatting"]@{ shape: process } +19 --> 22 +23["Purge overlaps"]@{ shape: process } +11 --> 23 +21 --> 23 +20 --> 23 +24["Purge overlaps"]@{ shape: process } +23 --> 24 +23 --> 24 +17 --> 24 +25["Remove REPEATs from BED"]@{ shape: process } +24 --> 25 +26["Purge overlaps"]@{ shape: process } +25 --> 26 +2 --> 26 +27["Merqury"]@{ shape: process } +26 --> 27 +4 --> 27 +3 --> 27 +28["gfastats"]@{ shape: process } +26 --> 28 +29["Busco"]@{ shape: process } +26 --> 29 +6 --> 29 +7 --> 29 +30["Convert purged fasta to gfa"]@{ shape: process } +26 --> 30 +31["gfastats"]@{ shape: process } +26 --> 31 +14 --> 31 +32["merqury_QV"]@{ shape: process } +27 --> 32 +33["output_merqury.spectra-cn.fl"]@{ shape: process } +27 --> 33 +34["output_merqury.spectra-asm.fl"]@{ shape: process } +27 --> 34 +35["output_merqury.assembly_01.spectra-cn.fl"]@{ shape: process } +27 --> 35 +36["merqury_stats"]@{ shape: process } +27 --> 36 +37["output_merqury.assembly_02.spectra-cn.fl"]@{ shape: process } +27 --> 37 +38["gfastats_data_prep"]@{ shape: subprocess } +28 --> 38 +39["Text reformatting"]@{ shape: process } +31 --> 39 +40["gfastats_plot"]@{ shape: subprocess } +18 --> 40 +9 --> 40 +8 --> 40 +38 --> 40 +41["Join two Datasets"]@{ shape: process } +39 --> 41 +22 --> 41 +42["Advanced Cut"]@{ shape: process } +41 --> 42 +43["Replace"]@{ shape: process } +42 --> 43 +``` + +## gfastats_plot + +```mermaid +graph LR +0["Primary data"]@{ shape: doc } +1["Alternate data"]@{ shape: doc } +2["Name of primary assembly"]@{ shape: lean-l } +3["Name of alternate assembly"]@{ shape: lean-l } +4["Add column"]@{ shape: process } +2 --> 4 +0 --> 4 +5["Add column"]@{ shape: process } +3 --> 5 +1 --> 5 +6["Concatenate datasets"]@{ shape: process } +4 --> 6 +5 --> 6 +7["Cut"]@{ shape: process } +6 --> 7 +8["Cut"]@{ shape: process } +6 --> 8 +9["Nx Plot"]@{ shape: process } +7 --> 9 +10["Size Plot"]@{ shape: process } +8 --> 10 +``` + +## gfastats_data_prep + +```mermaid +graph LR +0["gfa_stats"]@{ shape: doc } +1["Sort"]@{ shape: process } +0 --> 1 +2["Text reformatting"]@{ shape: process } +1 --> 2 +3["Datamash"]@{ shape: process } +2 --> 3 +4["Add column"]@{ shape: process } +2 --> 4 +5["Parse parameter value"]@{ shape: process } +3 --> 5 +6["Compose text parameter value"]@{ shape: process } +5 --> 6 +7["Compute"]@{ shape: process } +4 --> 7 +6 --> 7 +``` diff --git a/workflows/VGP-assembly-v2/Scaffolding-Bionano-VGP7/Scaffolding-BioNano-VGP7_diagrams.md b/workflows/VGP-assembly-v2/Scaffolding-Bionano-VGP7/Scaffolding-BioNano-VGP7_diagrams.md new file mode 100644 index 000000000..1377ae4b4 --- /dev/null +++ b/workflows/VGP-assembly-v2/Scaffolding-Bionano-VGP7/Scaffolding-BioNano-VGP7_diagrams.md @@ -0,0 +1,63 @@ +# Workflow diagrams + +## Scaffolding-BioNano-VGP7 + +```mermaid +graph LR +0["Bionano Data"]@{ shape: doc } +1["Estimated genome size - Parameter File"]@{ shape: doc } +2["Input GFA"]@{ shape: doc } +3["Conflict resolution files"]@{ shape: doc } +4["Parse parameter value"]@{ shape: process } +1 --> 4 +5["gfastats"]@{ shape: process } +2 --> 5 +6["Bionano Hybrid Scaffold"]@{ shape: process } +0 --> 6 +3 --> 6 +5 --> 6 +7["gfastats"]@{ shape: process } +2 --> 7 +6 --> 7 +8["gfastats"]@{ shape: process } +7 --> 8 +9["gfastats"]@{ shape: process } +7 --> 9 +4 --> 9 +10["gfastats"]@{ shape: process } +7 --> 10 +11["Replace"]@{ shape: process } +9 --> 11 +12["gfastats_data_prep"]@{ shape: subprocess } +10 --> 12 +13["Cut"]@{ shape: process } +12 --> 13 +14["Cut"]@{ shape: process } +12 --> 14 +15["Scatterplot with ggplot2"]@{ shape: process } +13 --> 15 +16["Scatterplot with ggplot2"]@{ shape: process } +14 --> 16 +``` + +## gfastats_data_prep + +```mermaid +graph LR +0["gfa_stats"]@{ shape: doc } +1["Sort"]@{ shape: process } +0 --> 1 +2["Text reformatting"]@{ shape: process } +1 --> 2 +3["Datamash"]@{ shape: process } +2 --> 3 +4["Add column"]@{ shape: process } +2 --> 4 +5["Parse parameter value"]@{ shape: process } +3 --> 5 +6["Compose text parameter value"]@{ shape: process } +5 --> 6 +7["Compute"]@{ shape: process } +4 --> 7 +6 --> 7 +``` diff --git a/workflows/VGP-assembly-v2/Scaffolding-HiC-VGP8/Scaffolding-HiC-VGP8_diagrams.md b/workflows/VGP-assembly-v2/Scaffolding-HiC-VGP8/Scaffolding-HiC-VGP8_diagrams.md new file mode 100644 index 000000000..e045479f4 --- /dev/null +++ b/workflows/VGP-assembly-v2/Scaffolding-HiC-VGP8/Scaffolding-HiC-VGP8_diagrams.md @@ -0,0 +1,115 @@ +# Workflow diagrams + +## Scaffolding with Hi-C data VGP8 + +```mermaid +graph LR +0["Input GFA"]@{ shape: doc } +1["Haplotype"]@{ shape: lean-l } +2["Sequence graph"]@{ shape: doc } +3["Database for Busco Lineage"]@{ shape: lean-l } +4["Lineage"]@{ shape: lean-l } +5["HiC Forward reads"]@{ shape: doc } +6["HiC reverse reads"]@{ shape: doc } +7["Restriction enzymes"]@{ shape: lean-l } +8["Estimated genome size - Parameter File"]@{ shape: doc } +9["SAK input file"]@{ shape: doc } +10["Compose text parameter value"]@{ shape: process } +1 --> 10 +11["Parse parameter value"]@{ shape: process } +8 --> 11 +12["gfastats"]@{ shape: process } +0 --> 12 +9 --> 12 +13["BWA-MEM2"]@{ shape: process } +5 --> 13 +12 --> 13 +14["BWA-MEM2"]@{ shape: process } +6 --> 14 +12 --> 14 +15["Filter and merge"]@{ shape: process } +13 --> 15 +14 --> 15 +16["PretextMap"]@{ shape: process } +15 --> 16 +17["YAHS"]@{ shape: process } +2 --> 17 +15 --> 17 +7 --> 17 +12 --> 17 +18["Pretext Snapshot"]@{ shape: process } +16 --> 18 +19["Replace"]@{ shape: process } +10 --> 19 +17 --> 19 +20["Extract dataset"]@{ shape: process } +18 --> 20 +21["gfastats"]@{ shape: process } +12 --> 21 +19 --> 21 +22["gfastats"]@{ shape: process } +21 --> 22 +9 --> 22 +23["gfastats"]@{ shape: process } +21 --> 23 +11 --> 23 +24["gfastats"]@{ shape: process } +21 --> 24 +25["BWA-MEM2"]@{ shape: process } +5 --> 25 +22 --> 25 +26["BWA-MEM2"]@{ shape: process } +6 --> 26 +22 --> 26 +27["Busco"]@{ shape: process } +22 --> 27 +3 --> 27 +4 --> 27 +28["Replace"]@{ shape: process } +23 --> 28 +29["gfastats_data_prep"]@{ shape: subprocess } +24 --> 29 +30["Filter and merge"]@{ shape: process } +25 --> 30 +26 --> 30 +31["Cut"]@{ shape: process } +29 --> 31 +32["Cut"]@{ shape: process } +29 --> 32 +33["PretextMap"]@{ shape: process } +30 --> 33 +34["bedtools BAM to BED"]@{ shape: process } +30 --> 34 +35["Nx Plot"]@{ shape: process } +31 --> 35 +36["Size Plot"]@{ shape: process } +32 --> 36 +37["Pretext Snapshot"]@{ shape: process } +33 --> 37 +38["Sort"]@{ shape: process } +34 --> 38 +39["Extract dataset"]@{ shape: process } +37 --> 39 +``` + +## gfastats_data_prep + +```mermaid +graph LR +0["gfa_stats"]@{ shape: doc } +1["Sort"]@{ shape: process } +0 --> 1 +2["Text reformatting"]@{ shape: process } +1 --> 2 +3["Datamash"]@{ shape: process } +2 --> 3 +4["Add column"]@{ shape: process } +2 --> 4 +5["Parse parameter value"]@{ shape: process } +3 --> 5 +6["Compose text parameter value"]@{ shape: process } +5 --> 6 +7["Compute"]@{ shape: process } +4 --> 7 +6 --> 7 +``` diff --git a/workflows/VGP-assembly-v2/kmer-profiling-hifi-VGP1/kmer-profiling-hifi-VGP1_diagrams.md b/workflows/VGP-assembly-v2/kmer-profiling-hifi-VGP1/kmer-profiling-hifi-VGP1_diagrams.md new file mode 100644 index 000000000..0e5871490 --- /dev/null +++ b/workflows/VGP-assembly-v2/kmer-profiling-hifi-VGP1/kmer-profiling-hifi-VGP1_diagrams.md @@ -0,0 +1,21 @@ +# Workflow diagrams + +## kmer-profiling-hifi-VGP1 + +```mermaid +graph LR +0["Collection of Pacbio Data"]@{ shape: docs } +1["K-mer length "]@{ shape: lean-l } +2["Ploidy"]@{ shape: lean-l } +3["Meryl"]@{ shape: process } +0 --> 3 +1 --> 3 +4["Meryl"]@{ shape: process } +3 --> 4 +5["Meryl"]@{ shape: process } +4 --> 5 +6["GenomeScope"]@{ shape: process } +5 --> 6 +1 --> 6 +2 --> 6 +``` diff --git a/workflows/VGP-assembly-v2/kmer-profiling-hifi-trio-VGP2/kmer-profiling-hifi-trio-VGP2_diagrams.md b/workflows/VGP-assembly-v2/kmer-profiling-hifi-trio-VGP2/kmer-profiling-hifi-trio-VGP2_diagrams.md new file mode 100644 index 000000000..6defb9886 --- /dev/null +++ b/workflows/VGP-assembly-v2/kmer-profiling-hifi-trio-VGP2/kmer-profiling-hifi-trio-VGP2_diagrams.md @@ -0,0 +1,43 @@ +# Workflow diagrams + +## kmer-profiling-hifi-trio-VGP2 + +```mermaid +graph LR +0["Pacbio Hifi reads"]@{ shape: docs } +1["Paternal reads"]@{ shape: docs } +2["Maternal reads"]@{ shape: docs } +3["K-mer length"]@{ shape: lean-l } +4["Ploidy"]@{ shape: lean-l } +5["Meryl"]@{ shape: process } +1 --> 5 +3 --> 5 +6["Meryl"]@{ shape: process } +0 --> 6 +2 --> 6 +3 --> 6 +1 --> 6 +7["Meryl"]@{ shape: process } +2 --> 7 +3 --> 7 +8["Meryl"]@{ shape: process } +5 --> 8 +9["GenomeScope"]@{ shape: process } +6 --> 9 +3 --> 9 +4 --> 9 +10["Meryl"]@{ shape: process } +7 --> 10 +11["Meryl"]@{ shape: process } +8 --> 11 +12["Meryl"]@{ shape: process } +10 --> 12 +13["Genomescope on paternal haplotype"]@{ shape: process } +11 --> 13 +3 --> 13 +4 --> 13 +14["Genomescope on maternal haplotype"]@{ shape: process } +12 --> 14 +3 --> 14 +4 --> 14 +``` diff --git a/workflows/amplicon/dada2/dada2_paired_diagrams.md b/workflows/amplicon/dada2/dada2_paired_diagrams.md new file mode 100644 index 000000000..25fc537b0 --- /dev/null +++ b/workflows/amplicon/dada2/dada2_paired_diagrams.md @@ -0,0 +1,55 @@ +# Workflow diagrams + +## dada2 amplicon analysis pipeline - for paired end data + +```mermaid +graph LR +0["Paired input data"]@{ shape: docs } +1["Read length forward read"]@{ shape: lean-l } +2["Read length reverse read"]@{ shape: lean-l } +3["Pool samples"]@{ shape: lean-l } +4["Cached reference database"]@{ shape: lean-l } +5["Sort samples"]@{ shape: process } +0 --> 5 +6["QualityProfile before filterAndTrim"]@{ shape: process } +5 --> 6 +7["dada2: filterAndTrim"]@{ shape: process } +5 --> 7 +2 --> 7 +1 --> 7 +8["QualityProfile after filterAndTrim"]@{ shape: process } +7 --> 8 +9["Unzip collection"]@{ shape: process } +7 --> 9 +10["dada2: learnErrors"]@{ shape: process } +9 --> 10 +11["dada2: learnErrors"]@{ shape: process } +9 --> 11 +12["dada2: dada"]@{ shape: process } +9 --> 12 +3 --> 12 +10 --> 12 +13["dada2: dada"]@{ shape: process } +9 --> 13 +3 --> 13 +11 --> 13 +14["dada2: mergePairs"]@{ shape: process } +12 --> 14 +13 --> 14 +9 --> 14 +9 --> 14 +15["dada2: makeSequenceTable"]@{ shape: process } +14 --> 15 +16["dada2: removeBimeraDenovo"]@{ shape: process } +15 --> 16 +17["dada2: sequence counts"]@{ shape: process } +7 --> 17 +12 --> 17 +13 --> 17 +14 --> 17 +15 --> 17 +16 --> 17 +18["dada2: assignTaxonomy"]@{ shape: process } +4 --> 18 +16 --> 18 +``` diff --git a/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ia-multiplexed-data-single-end_diagrams.md b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ia-multiplexed-data-single-end_diagrams.md new file mode 100644 index 000000000..3d80f0ae9 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ia-multiplexed-data-single-end_diagrams.md @@ -0,0 +1,24 @@ +# Workflow diagrams + +## QIIME2 Ia: multiplexed data (single-end) + +```mermaid +graph LR +0["Sequences"]@{ shape: doc } +1["Barcodes"]@{ shape: doc } +2["Metadata"]@{ shape: doc } +3["Metadata parameter"]@{ shape: lean-l } +4["Reverse complement barcodes"]@{ shape: lean-l } +5["Input files"]@{ shape: process } +1 --> 5 +0 --> 5 +6["Metadata as artifact"]@{ shape: process } +2 --> 6 +7["Demultiplex single-end data"]@{ shape: process } +4 --> 7 +3 --> 7 +6 --> 7 +5 --> 7 +8["Summarize demultiplexed output"]@{ shape: process } +7 --> 8 +``` diff --git a/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ib-multiplexed-data-paired-end_diagrams.md b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ib-multiplexed-data-paired-end_diagrams.md new file mode 100644 index 000000000..9bb7803a8 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ib-multiplexed-data-paired-end_diagrams.md @@ -0,0 +1,26 @@ +# Workflow diagrams + +## QIIME2 Ib: multiplexed data (paired-end) + +```mermaid +graph LR +0["Forward sequences"]@{ shape: doc } +1["Reverse sequences"]@{ shape: doc } +2["Barcodes"]@{ shape: doc } +3["Metadata"]@{ shape: doc } +4["Metadata parameter"]@{ shape: lean-l } +5["Reverse complement of barcodes needed?"]@{ shape: lean-l } +6["Import data into the pipeline"]@{ shape: process } +2 --> 6 +0 --> 6 +1 --> 6 +7["Metadata as artifact"]@{ shape: process } +3 --> 7 +8["Demultiplex paired-end sequences"]@{ shape: process } +5 --> 8 +4 --> 8 +7 --> 8 +6 --> 8 +9["Summarising the demultiplexed output"]@{ shape: process } +8 --> 9 +``` diff --git a/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ic-demultiplexed-data-single-end_diagrams.md b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ic-demultiplexed-data-single-end_diagrams.md new file mode 100644 index 000000000..59b51d60b --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Ic-demultiplexed-data-single-end_diagrams.md @@ -0,0 +1,19 @@ +# Workflow diagrams + +## QIIME2 Ic: Demultiplexed data (single-end) + +```mermaid +graph LR +0["Sequence collection"]@{ shape: docs } +1["Extract element identifiers"]@{ shape: process } +0 --> 1 +2["Screening laneless and single-lane"]@{ shape: process } +1 --> 2 +3["Relabel sequence files"]@{ shape: process } +2 --> 3 +0 --> 3 +4["Import data into the pipeline"]@{ shape: process } +3 --> 4 +5["Summarising the demultiplexed output"]@{ shape: process } +4 --> 5 +``` diff --git a/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Id-demultiplexed-data-paired-end_diagrams.md b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Id-demultiplexed-data-paired-end_diagrams.md new file mode 100644 index 000000000..8494de800 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-I-import/QIIME2-Id-demultiplexed-data-paired-end_diagrams.md @@ -0,0 +1,19 @@ +# Workflow diagrams + +## QIIME2 Id: Demultiplexed data (paired-end) + +```mermaid +graph LR +0["Sequence collection"]@{ shape: docs } +1["Extract element identifiers"]@{ shape: process } +0 --> 1 +2["Screening laneless and single-lane"]@{ shape: process } +1 --> 2 +3["Relabel sequence files"]@{ shape: process } +2 --> 3 +0 --> 3 +4["Import data into the pipeline"]@{ shape: process } +3 --> 4 +5["Summarising the demultiplexed output"]@{ shape: process } +4 --> 5 +``` diff --git a/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIa-denoising-and-feature-table-creation-single-end_diagrams.md b/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIa-denoising-and-feature-table-creation-single-end_diagrams.md new file mode 100644 index 000000000..dc46e4b23 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIa-denoising-and-feature-table-creation-single-end_diagrams.md @@ -0,0 +1,22 @@ +# Workflow diagrams + +## QIIME2 IIa: Denoising (sequence quality control) and feature table creation (single-end) + +```mermaid +graph LR +0["Metadata"]@{ shape: doc } +1["Demultiplexed sequences"]@{ shape: doc } +2["Truncation length"]@{ shape: lean-l } +3["Trimming length"]@{ shape: lean-l } +4["Denoising the datasets"]@{ shape: process } +3 --> 4 +1 --> 4 +2 --> 4 +5["Tabulate DADA2 denoised representative sequences"]@{ shape: process } +4 --> 5 +6["Tabulate DADA2 statistical metadata "]@{ shape: process } +4 --> 6 +7["Summing up the dada2 output table"]@{ shape: process } +0 --> 7 +4 --> 7 +``` diff --git a/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIb-denoising-and-feature-table-creation-paired-end_diagrams.md b/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIb-denoising-and-feature-table-creation-paired-end_diagrams.md new file mode 100644 index 000000000..a60a854c3 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-II-denoising/QIIME2-IIb-denoising-and-feature-table-creation-paired-end_diagrams.md @@ -0,0 +1,26 @@ +# Workflow diagrams + +## QIIME2 IIb: Denoising (sequence quality control) and feature table creation (paired-end) + +```mermaid +graph LR +0["Metadata"]@{ shape: doc } +1["Demultiplexed sequences"]@{ shape: doc } +2["Truncation length (forward)"]@{ shape: lean-l } +3["Truncation length (reverse)"]@{ shape: lean-l } +4["Trimming length (forward)"]@{ shape: lean-l } +5["Trimming length (reverse)"]@{ shape: lean-l } +6["Denoising the datasets"]@{ shape: process } +4 --> 6 +5 --> 6 +1 --> 6 +2 --> 6 +3 --> 6 +7["Tabulate DADA2 denoised representative sequences"]@{ shape: process } +6 --> 7 +8["Tabulate DADA2 statistical metadata "]@{ shape: process } +6 --> 8 +9["Summing up the dada2 output table"]@{ shape: process } +0 --> 9 +6 --> 9 +``` diff --git a/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-III-V-Phylogeny-Rarefaction-Taxonomic-Analysis_diagrams.md b/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-III-V-Phylogeny-Rarefaction-Taxonomic-Analysis_diagrams.md new file mode 100644 index 000000000..2a14f9b29 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-III-V-Phylogeny-Rarefaction-Taxonomic-Analysis_diagrams.md @@ -0,0 +1,75 @@ +# Workflow diagrams + +## QIIME2-III-V-Phylogeny-Rarefaction-Taxonomic-Analysis + +```mermaid +graph LR +0["Representative sequences"]@{ shape: doc } +1["Feature table"]@{ shape: doc } +2["Metadata"]@{ shape: doc } +3["Minimum depth"]@{ shape: lean-l } +4["Maximum depth"]@{ shape: lean-l } +5["SEPP fragment insertion reference"]@{ shape: doc } +6["Taxonomic classifier"]@{ shape: doc } +7["Phylogenetic tree for diversity analysis"]@{ shape: subprocess } +0 --> 7 +5 --> 7 +8["Taxonomic analysis"]@{ shape: subprocess } +1 --> 8 +0 --> 8 +2 --> 8 +6 --> 8 +9["Rarefaction"]@{ shape: subprocess } +1 --> 9 +4 --> 9 +2 --> 9 +3 --> 9 +7 --> 9 +``` + +## QIIME2 IV: Rarefaction + +```mermaid +graph LR +0["Metadata"]@{ shape: doc } +1["DADA2 feature table"]@{ shape: doc } +2["Rooted tree"]@{ shape: doc } +3["Minimum depth"]@{ shape: lean-l } +4["Maximum depth"]@{ shape: lean-l } +5["Alpha rarefaction"]@{ shape: process } +0 --> 5 +3 --> 5 +2 --> 5 +4 --> 5 +1 --> 5 +``` + +## QIIME2 V: Taxonomic analysis + +```mermaid +graph LR +0["Metadata"]@{ shape: doc } +1["DADA2 representative sequences"]@{ shape: doc } +2["Taxonomic classifier"]@{ shape: doc } +3["DADA2 feature table"]@{ shape: doc } +4["Taxonomy classification"]@{ shape: process } +2 --> 4 +1 --> 4 +5["Taxonomy barplot"]@{ shape: process } +0 --> 5 +4 --> 5 +3 --> 5 +6["Tabulate taxonomy classification"]@{ shape: process } +4 --> 6 +``` + +## QIIME2 III: Phylogenetic tree for diversity analysis + +```mermaid +graph LR +0["DADA2 representative sequences"]@{ shape: doc } +1["SEPP fragment insertion source file"]@{ shape: doc } +2["Phylogenetic tree generation"]@{ shape: process } +1 --> 2 +0 --> 2 +``` diff --git a/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-VI-diversity-metrics-and-estimations_diagrams.md b/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-VI-diversity-metrics-and-estimations_diagrams.md new file mode 100644 index 000000000..4a86fd2c6 --- /dev/null +++ b/workflows/amplicon/qiime2/qiime2-III-VI-downsteam/QIIME2-VI-diversity-metrics-and-estimations_diagrams.md @@ -0,0 +1,61 @@ +# Workflow diagrams + +## QIIME2 VI: Diversity metrics and estimations + +```mermaid +graph LR +0["Sampling depth"]@{ shape: lean-l } +1["Metadata"]@{ shape: doc } +2["Feature table"]@{ shape: doc } +3["Rooted tree"]@{ shape: doc } +4["Target metadata parameter (for beta diversity)"]@{ shape: lean-l } +5["qiime2 tools import"]@{ shape: process } +1 --> 5 +6["Diversity metrics"]@{ shape: process } +1 --> 6 +3 --> 6 +0 --> 6 +2 --> 6 +7["Alpha diversity metrics - Pielou's evenness"]@{ shape: process } +6 --> 7 +1 --> 7 +8["Alpha diversity metrics - Observed features"]@{ shape: process } +6 --> 8 +1 --> 8 +9["Alpha diversity metrics - Shannon's diversity index"]@{ shape: process } +6 --> 9 +1 --> 9 +10["Beta diversity - Jaccard distance matrix"]@{ shape: process } +6 --> 10 +4 --> 10 +5 --> 10 +11["Beta diversity - Bray-Curtis distance matrix"]@{ shape: process } +6 --> 11 +4 --> 11 +5 --> 11 +12["Emperor plot collection"]@{ shape: process } +6 --> 12 +6 --> 12 +6 --> 12 +6 --> 12 +13["Beta diversity - weighted UniFrac distance matrix"]@{ shape: process } +6 --> 13 +4 --> 13 +5 --> 13 +14["PCoA collection"]@{ shape: process } +6 --> 14 +6 --> 14 +6 --> 14 +6 --> 14 +15["Distance matrix collection"]@{ shape: process } +6 --> 15 +6 --> 15 +6 --> 15 +6 --> 15 +16["Richness and evenness collection"]@{ shape: process } +6 --> 16 +6 --> 16 +6 --> 16 +6 --> 16 +6 --> 16 +``` diff --git a/workflows/bacterial_genomics/amr_gene_detection/amr_gene_detection_diagrams.md b/workflows/bacterial_genomics/amr_gene_detection/amr_gene_detection_diagrams.md new file mode 100644 index 000000000..243c01a77 --- /dev/null +++ b/workflows/bacterial_genomics/amr_gene_detection/amr_gene_detection_diagrams.md @@ -0,0 +1,32 @@ +# Workflow diagrams + +## amr_gene_detection + +```mermaid +graph LR +0["Input sequence fasta"]@{ shape: doc } +1["Select a taxonomy group point mutation"]@{ shape: lean-l } +2["Select a AMR genes detection database"]@{ shape: lean-l } +3["Select a virulence genes detection database"]@{ shape: lean-l } +4["staramr_amr_genes"]@{ shape: process } +0 --> 4 +5["amrfinderplus_point_mutation"]@{ shape: process } +2 --> 5 +0 --> 5 +1 --> 5 +6["abricate_virulence"]@{ shape: process } +3 --> 6 +0 --> 6 +7["ToolDistillator"]@{ shape: process } +6 --> 7 +3 --> 7 +4 --> 7 +4 --> 7 +4 --> 7 +5 --> 7 +5 --> 7 +5 --> 7 +2 --> 7 +8["ToolDistillator summarize"]@{ shape: process } +7 --> 8 +``` diff --git a/workflows/bacterial_genomics/bacterial_genome_annotation/bacterial_genome_annotation_diagrams.md b/workflows/bacterial_genomics/bacterial_genome_annotation/bacterial_genome_annotation_diagrams.md new file mode 100644 index 000000000..02e161286 --- /dev/null +++ b/workflows/bacterial_genomics/bacterial_genome_annotation/bacterial_genome_annotation_diagrams.md @@ -0,0 +1,51 @@ +# Workflow diagrams + +## bacterial_genome_annotation + +```mermaid +graph LR +0["Input sequence fasta"]@{ shape: doc } +1["Select a plasmid detection database"]@{ shape: lean-l } +2["Select a bacterial genome annotation database"]@{ shape: lean-l } +3["Select a AMRFinderPlus database"]@{ shape: lean-l } +4["genomic_annotation_insertionelement_isescan"]@{ shape: process } +0 --> 4 +5["genomic_annotation_integron"]@{ shape: process } +0 --> 5 +6["genomic_annotation_plasmid_plasmidfinder"]@{ shape: process } +1 --> 6 +0 --> 6 +7["Bakta"]@{ shape: process } +3 --> 7 +2 --> 7 +0 --> 7 +8["ToolDistillator"]@{ shape: process } +6 --> 8 +6 --> 8 +6 --> 8 +6 --> 8 +1 --> 8 +4 --> 8 +4 --> 8 +4 --> 8 +4 --> 8 +4 --> 8 +4 --> 8 +5 --> 8 +5 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +2 --> 8 +7 --> 8 +9["ToolDistillator summarize"]@{ shape: process } +8 --> 9 +``` diff --git a/workflows/computational-chemistry/fragment-based-docking-scoring/fragment-based-docking-scoring_diagrams.md b/workflows/computational-chemistry/fragment-based-docking-scoring/fragment-based-docking-scoring_diagrams.md new file mode 100644 index 000000000..9d68316a6 --- /dev/null +++ b/workflows/computational-chemistry/fragment-based-docking-scoring/fragment-based-docking-scoring_diagrams.md @@ -0,0 +1,43 @@ +# Workflow diagrams + +## Fragment-based virtual screening using rDock for docking and SuCOS for pose scoring + +```mermaid +graph LR +0["Number of poses"]@{ shape: lean-l } +1["Receptor (PDB)"]@{ shape: doc } +2["All fragments (SDF)"]@{ shape: doc } +3["Collection size for docking"]@{ shape: lean-l } +4["SuCOS threshold"]@{ shape: lean-l } +5["Fragment for SuCOS scoring (SDF/MOL)"]@{ shape: doc } +6["Candidate compounds (SMILES)"]@{ shape: doc } +7["Compound conversion"]@{ shape: process } +1 --> 7 +8["Create Frankenstein ligand"]@{ shape: process } +2 --> 8 +9["Compose text parameter value"]@{ shape: process } +4 --> 9 +10["Enumerate changes"]@{ shape: process } +6 --> 10 +11["rDock cavity definition"]@{ shape: process } +8 --> 11 +7 --> 11 +12["Compound conversion"]@{ shape: process } +10 --> 12 +13["Split file"]@{ shape: process } +12 --> 13 +3 --> 13 +14["rDock docking"]@{ shape: process } +11 --> 14 +13 --> 14 +0 --> 14 +7 --> 14 +15["Collapse Collection"]@{ shape: process } +14 --> 15 +16["Score docked poses using SuCOS"]@{ shape: process } +15 --> 16 +5 --> 16 +17["rDock docking"]@{ shape: process } +9 --> 17 +16 --> 17 +``` diff --git a/workflows/computational-chemistry/gromacs-dctmd/gromacs-dctmd_diagrams.md b/workflows/computational-chemistry/gromacs-dctmd/gromacs-dctmd_diagrams.md new file mode 100644 index 000000000..793718508 --- /dev/null +++ b/workflows/computational-chemistry/gromacs-dctmd/gromacs-dctmd_diagrams.md @@ -0,0 +1,124 @@ +# Workflow diagrams + +## dcTMD calculations with GROMACS + +```mermaid +graph LR +0["Ligand SDF"]@{ shape: doc } +1["pH to protonate ligand"]@{ shape: lean-l } +2["Protein PDB"]@{ shape: doc } +3["Salt concentration"]@{ shape: lean-l } +4["Water model"]@{ shape: lean-l } +5["Force field"]@{ shape: lean-l } +6["Number of simulations"]@{ shape: lean-l } +7["Temperature"]@{ shape: lean-l } +8["Number of equilibration steps"]@{ shape: lean-l } +9["Online data"]@{ shape: process } +10["Pulling rate"]@{ shape: lean-l } +11["Step length (ps)"]@{ shape: lean-l } +12["Protein pull group"]@{ shape: lean-l } +13["Number of steps"]@{ shape: lean-l } +14["Pull group pbcatom"]@{ shape: lean-l } +15["Create GRO and TOP complex files"]@{ shape: subprocess } +2 --> 15 +5 --> 15 +0 --> 15 +4 --> 15 +1 --> 15 +16["Create text file"]@{ shape: process } +7 --> 16 +6 --> 16 +17["Compose text parameter value"]@{ shape: process } +10 --> 17 +18["Compose text parameter value"]@{ shape: process } +11 --> 18 +19["Compose text parameter value"]@{ shape: process } +13 --> 19 +20["Compose text parameter value"]@{ shape: process } +14 --> 20 +21["GROMACS solvation and adding ions"]@{ shape: process } +3 --> 21 +15 --> 21 +15 --> 21 +22["Split file"]@{ shape: process } +16 --> 22 +23["Add line to file"]@{ shape: process } +9 --> 23 +17 --> 23 +24["GROMACS energy minimization"]@{ shape: process } +21 --> 24 +21 --> 24 +25["Parse parameter value"]@{ shape: process } +22 --> 25 +26["Add line to file"]@{ shape: process } +23 --> 26 +19 --> 26 +27["Create GROMACS index files"]@{ shape: process } +24 --> 27 +28["GROMACS simulation"]@{ shape: process } +24 --> 28 +15 --> 28 +8 --> 28 +11 --> 28 +25 --> 28 +21 --> 28 +29["Add line to file"]@{ shape: process } +26 --> 29 +18 --> 29 +30["Text transformation"]@{ shape: process } +27 --> 30 +31["Add line to file"]@{ shape: process } +29 --> 31 +20 --> 31 +32["Add line to file"]@{ shape: process } +30 --> 32 +12 --> 32 +33["Concatenate datasets"]@{ shape: process } +27 --> 33 +32 --> 33 +34["GROMACS simulation"]@{ shape: process } +28 --> 34 +28 --> 34 +33 --> 34 +31 --> 34 +21 --> 34 +35["dcTMD friction correction"]@{ shape: process } +34 --> 35 +``` + +## Create GRO and TOP complex files + +```mermaid +graph LR +0["Ligand SDF"]@{ shape: doc } +1["pH"]@{ shape: lean-l } +2["Apoprotein PDB"]@{ shape: doc } +3["Water model"]@{ shape: lean-l } +4["Force field"]@{ shape: lean-l } +5["Descriptors"]@{ shape: process } +0 --> 5 +6["Compound conversion"]@{ shape: process } +0 --> 6 +1 --> 6 +7["GROMACS initial setup"]@{ shape: process } +4 --> 7 +2 --> 7 +3 --> 7 +8["Cut"]@{ shape: process } +5 --> 8 +9["Search in textfiles"]@{ shape: process } +6 --> 9 +10["Parse parameter value"]@{ shape: process } +8 --> 10 +11["AnteChamber"]@{ shape: process } +10 --> 11 +9 --> 11 +12["Generate MD topologies for small molecules"]@{ shape: process } +10 --> 12 +11 --> 12 +13["Merge GROMACS topologies"]@{ shape: process } +12 --> 13 +12 --> 13 +7 --> 13 +7 --> 13 +``` diff --git a/workflows/computational-chemistry/gromacs-mmgbsa/gromacs-mmgbsa_diagrams.md b/workflows/computational-chemistry/gromacs-mmgbsa/gromacs-mmgbsa_diagrams.md new file mode 100644 index 000000000..91e6263c1 --- /dev/null +++ b/workflows/computational-chemistry/gromacs-mmgbsa/gromacs-mmgbsa_diagrams.md @@ -0,0 +1,113 @@ +# Workflow diagrams + +## MMGBSA calculations with GROMACS + +```mermaid +graph LR +0["Salt concentration"]@{ shape: lean-l } +1["Number of simulations"]@{ shape: lean-l } +2["Apoprotein PDB"]@{ shape: doc } +3["Water model"]@{ shape: lean-l } +4["pH"]@{ shape: lean-l } +5["Force field"]@{ shape: lean-l } +6["Ligand SDF"]@{ shape: doc } +7["NVT equilibration steps"]@{ shape: lean-l } +8["NPT equilibration steps"]@{ shape: lean-l } +9["Production steps"]@{ shape: lean-l } +10["Compose text parameter value"]@{ shape: process } +0 --> 10 +11["Create GRO and TOP complex files"]@{ shape: subprocess } +2 --> 11 +5 --> 11 +6 --> 11 +3 --> 11 +4 --> 11 +12["Create text file"]@{ shape: process } +10 --> 12 +1 --> 12 +13["GROMACS structure configuration"]@{ shape: process } +11 --> 13 +14["Split file"]@{ shape: process } +12 --> 14 +15["Parse parameter value"]@{ shape: process } +14 --> 15 +16["GROMACS solvation and adding ions"]@{ shape: process } +15 --> 16 +13 --> 16 +11 --> 16 +17["GROMACS energy minimization"]@{ shape: process } +16 --> 17 +16 --> 17 +18["Convert Parameters"]@{ shape: process } +16 --> 18 +16 --> 18 +19["GROMACS simulation"]@{ shape: process } +17 --> 19 +11 --> 19 +7 --> 19 +16 --> 19 +20["GROMACS simulation"]@{ shape: process } +19 --> 20 +19 --> 20 +11 --> 20 +8 --> 20 +16 --> 20 +21["GROMACS simulation"]@{ shape: process } +20 --> 21 +20 --> 21 +9 --> 21 +16 --> 21 +22["MDTraj file converter"]@{ shape: process } +21 --> 22 +23["MMPBSA/MMGBSA"]@{ shape: process } +18 --> 23 +18 --> 23 +18 --> 23 +18 --> 23 +22 --> 23 +24["Search in textfiles"]@{ shape: process } +23 --> 24 +25["Collapse Collection"]@{ shape: process } +24 --> 25 +26["Cut"]@{ shape: process } +25 --> 26 +27["Summary Statistics"]@{ shape: process } +26 --> 27 +``` + +## Create GRO and TOP complex files + +```mermaid +graph LR +0["Ligand SDF"]@{ shape: doc } +1["pH"]@{ shape: lean-l } +2["Apoprotein PDB"]@{ shape: doc } +3["Water model"]@{ shape: lean-l } +4["Force field"]@{ shape: lean-l } +5["Descriptors"]@{ shape: process } +0 --> 5 +6["Compound conversion"]@{ shape: process } +0 --> 6 +1 --> 6 +7["GROMACS initial setup"]@{ shape: process } +4 --> 7 +2 --> 7 +3 --> 7 +8["Cut"]@{ shape: process } +5 --> 8 +9["Search in textfiles"]@{ shape: process } +6 --> 9 +10["Parse parameter value"]@{ shape: process } +8 --> 10 +11["AnteChamber"]@{ shape: process } +10 --> 11 +9 --> 11 +12["Generate MD topologies for small molecules"]@{ shape: process } +10 --> 12 +11 --> 12 +13["Merge GROMACS topologies"]@{ shape: process } +12 --> 13 +12 --> 13 +7 --> 13 +7 --> 13 +``` diff --git a/workflows/computational-chemistry/protein-ligand-complex-parameterization/protein-ligand-complex-parameterization_diagrams.md b/workflows/computational-chemistry/protein-ligand-complex-parameterization/protein-ligand-complex-parameterization_diagrams.md new file mode 100644 index 000000000..e47331f22 --- /dev/null +++ b/workflows/computational-chemistry/protein-ligand-complex-parameterization/protein-ligand-complex-parameterization_diagrams.md @@ -0,0 +1,38 @@ +# Workflow diagrams + +## Create GRO and TOP complex files + +```mermaid +graph LR +0["pH"]@{ shape: lean-l } +1["Ligand SDF"]@{ shape: doc } +2["Apoprotein PDB"]@{ shape: doc } +3["Water model"]@{ shape: lean-l } +4["Force field"]@{ shape: lean-l } +5["Compound conversion"]@{ shape: process } +1 --> 5 +0 --> 5 +6["Descriptors"]@{ shape: process } +1 --> 6 +7["GROMACS initial setup"]@{ shape: process } +4 --> 7 +2 --> 7 +3 --> 7 +8["Search in textfiles"]@{ shape: process } +5 --> 8 +9["Cut"]@{ shape: process } +6 --> 9 +10["Parse parameter value"]@{ shape: process } +9 --> 10 +11["AnteChamber"]@{ shape: process } +10 --> 11 +8 --> 11 +12["Generate MD topologies for small molecules"]@{ shape: process } +10 --> 12 +11 --> 12 +13["Merge GROMACS topologies"]@{ shape: process } +12 --> 13 +12 --> 13 +7 --> 13 +7 --> 13 +``` diff --git a/workflows/data-fetching/parallel-accession-download/parallel-accession-download_diagrams.md b/workflows/data-fetching/parallel-accession-download/parallel-accession-download_diagrams.md new file mode 100644 index 000000000..189bf2b8c --- /dev/null +++ b/workflows/data-fetching/parallel-accession-download/parallel-accession-download_diagrams.md @@ -0,0 +1,16 @@ +# Workflow diagrams + +## Parallel Accession Download + +```mermaid +graph LR +0["Run accessions"]@{ shape: doc } +1["Split accessions to collection"]@{ shape: process } +0 --> 1 +2["fasterq-dump"]@{ shape: process } +1 --> 2 +3["flatten paired output"]@{ shape: process } +2 --> 3 +4["flatten single end output"]@{ shape: process } +2 --> 4 +``` diff --git a/workflows/data-fetching/sra-manifest-to-concatenated-fastqs/sra-manifest-to-concatenated-fastqs_diagrams.md b/workflows/data-fetching/sra-manifest-to-concatenated-fastqs/sra-manifest-to-concatenated-fastqs_diagrams.md new file mode 100644 index 000000000..9dcad63ef --- /dev/null +++ b/workflows/data-fetching/sra-manifest-to-concatenated-fastqs/sra-manifest-to-concatenated-fastqs_diagrams.md @@ -0,0 +1,40 @@ +# Workflow diagrams + +## sra_manifest_to_concatenated_fastqs_parallel + +```mermaid +graph LR +0["SRA_manifest"]@{ shape: doc } +1["Column number with SRA ID"]@{ shape: lean-l } +2["Column number with final identifier"]@{ shape: lean-l } +3["Set SRA column to 1 if at 0"]@{ shape: process } +1 --> 3 +4["Compute column expression"]@{ shape: process } +3 --> 4 +2 --> 4 +5["Cut columns of interest"]@{ shape: process } +4 --> 5 +0 --> 5 +6["generate table for relabelling"]@{ shape: process } +5 --> 6 +7["Cut to get only SRA"]@{ shape: process } +5 --> 7 +8["split file to get one SRA per file + header"]@{ shape: process } +7 --> 8 +9["get Fastqs from SRA IDs"]@{ shape: process } +8 --> 9 +10["relabel pair collec to get SRA+sample"]@{ shape: process } +6 --> 10 +9 --> 10 +11["relabel single collec to get SRA+sample"]@{ shape: process } +6 --> 11 +9 --> 11 +12["Apply rules"]@{ shape: process } +10 --> 12 +13["Apply rules"]@{ shape: process } +11 --> 13 +14["Concatenate multiple datasets"]@{ shape: process } +12 --> 14 +15["Concatenate multiple datasets"]@{ shape: process } +13 --> 15 +``` diff --git a/workflows/epigenetics/atacseq/atacseq_diagrams.md b/workflows/epigenetics/atacseq/atacseq_diagrams.md new file mode 100644 index 000000000..5e0f45542 --- /dev/null +++ b/workflows/epigenetics/atacseq/atacseq_diagrams.md @@ -0,0 +1,80 @@ +# Workflow diagrams + +## ATACseq + +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["reference_genome"]@{ shape: lean-l } +2["effective_genome_size"]@{ shape: lean-l } +3["bin_size"]@{ shape: lean-l } +4["Cutadapt (remove adapter + bad quality bases)"]@{ shape: process } +0 --> 4 +5["Bowtie2 map on reference"]@{ shape: process } +4 --> 5 +1 --> 5 +6["filter MAPQ30 concordant pairs and not mitochondrial pairs"]@{ shape: process } +5 --> 6 +7["Get number of reads per chromosome"]@{ shape: process } +5 --> 7 +8["remove PCR duplicates"]@{ shape: process } +6 --> 8 +9["reads in chrM/MT for multiQC"]@{ shape: process } +7 --> 9 +10["convert BAM to BED to improve peak calling"]@{ shape: process } +8 --> 10 +11["Compute fragment length histogram"]@{ shape: process } +8 --> 11 +12["number of reads"]@{ shape: process } +8 --> 12 +13["Call Peak with MACS2"]@{ shape: process } +2 --> 13 +10 --> 13 +14["remove comments lines"]@{ shape: process } +11 --> 14 +15["compute 1/million reads"]@{ shape: process } +12 --> 15 +16["Bigwig from MACS2 (no norm)"]@{ shape: process } +13 --> 16 +17["get summits +/-500kb"]@{ shape: process } +1 --> 17 +13 --> 17 +18["summary of MACS2"]@{ shape: process } +13 --> 18 +19["Convert 1/million reads to parameter"]@{ shape: process } +15 --> 19 +20["Isolate each bigwig do normalize not average"]@{ shape: process } +16 --> 20 +21["Merge summits +/-500kb"]@{ shape: process } +17 --> 21 +22["normalize by million reads"]@{ shape: process } +3 --> 22 +19 --> 22 +20 --> 22 +23["Compute coverage on summits +/-500kb"]@{ shape: process } +21 --> 23 +8 --> 23 +24["number of reads in peaks"]@{ shape: process } +23 --> 24 +25["compute 1/million reads in peaks"]@{ shape: process } +24 --> 25 +26["Combine number of reads in peaks with total number of reads"]@{ shape: process } +24 --> 26 +12 --> 26 +27["Convert 1/million reads in peaks to parameter"]@{ shape: process } +25 --> 27 +28["reads in peaks multiQC"]@{ shape: process } +26 --> 28 +29["normalize by million reads in peaks"]@{ shape: process } +3 --> 29 +27 --> 29 +20 --> 29 +30["MultiQC"]@{ shape: process } +4 --> 30 +5 --> 30 +9 --> 30 +8 --> 30 +14 --> 30 +13 --> 30 +28 --> 30 +``` diff --git a/workflows/epigenetics/average-bigwig-between-replicates/average-bigwig-between-replicates_diagrams.md b/workflows/epigenetics/average-bigwig-between-replicates/average-bigwig-between-replicates_diagrams.md new file mode 100644 index 000000000..2012fce1d --- /dev/null +++ b/workflows/epigenetics/average-bigwig-between-replicates/average-bigwig-between-replicates_diagrams.md @@ -0,0 +1,14 @@ +# Workflow diagrams + +## Average Bigwig between replicates + +```mermaid +graph LR +0["Bigwig to average"]@{ shape: docs } +1["bin_size"]@{ shape: lean-l } +2["Apply rules"]@{ shape: process } +0 --> 2 +3["average bigwigs from different replicates"]@{ shape: process } +1 --> 3 +2 --> 3 +``` diff --git a/workflows/epigenetics/chipseq-pe/chipseq-pe_diagrams.md b/workflows/epigenetics/chipseq-pe/chipseq-pe_diagrams.md new file mode 100644 index 000000000..eb25874fe --- /dev/null +++ b/workflows/epigenetics/chipseq-pe/chipseq-pe_diagrams.md @@ -0,0 +1,34 @@ +# Workflow diagrams + +## ChIPseq_PE + +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["adapter_forward"]@{ shape: lean-l } +2["adapter_reverse"]@{ shape: lean-l } +3["reference_genome"]@{ shape: lean-l } +4["effective_genome_size"]@{ shape: lean-l } +5["normalize_profile"]@{ shape: lean-l } +6["Cutadapt (remove adapter + bad quality bases)"]@{ shape: process } +0 --> 6 +1 --> 6 +2 --> 6 +7["Bowtie2 map on reference"]@{ shape: process } +6 --> 7 +3 --> 7 +8["filter MAPQ30 concordent pairs"]@{ shape: process } +7 --> 8 +9["Call Peaks with MACS2"]@{ shape: process } +5 --> 9 +4 --> 9 +8 --> 9 +10["summary of MACS2"]@{ shape: process } +9 --> 10 +11["Bigwig from MACS2"]@{ shape: process } +9 --> 11 +12["MultiQC"]@{ shape: process } +6 --> 12 +7 --> 12 +9 --> 12 +``` diff --git a/workflows/epigenetics/chipseq-sr/chipseq-sr_diagrams.md b/workflows/epigenetics/chipseq-sr/chipseq-sr_diagrams.md new file mode 100644 index 000000000..cacacd1a3 --- /dev/null +++ b/workflows/epigenetics/chipseq-sr/chipseq-sr_diagrams.md @@ -0,0 +1,32 @@ +# Workflow diagrams + +## ChIPseq_SR + +```mermaid +graph LR +0["SR fastq input"]@{ shape: docs } +1["adapter_forward"]@{ shape: lean-l } +2["reference_genome"]@{ shape: lean-l } +3["effective_genome_size"]@{ shape: lean-l } +4["normalize_profile"]@{ shape: lean-l } +5["Cutadapt (remove adapter + bad quality bases)"]@{ shape: process } +0 --> 5 +1 --> 5 +6["Bowtie2 map on reference"]@{ shape: process } +5 --> 6 +2 --> 6 +7["filter MAPQ30"]@{ shape: process } +6 --> 7 +8["Call Peaks with MACS2"]@{ shape: process } +4 --> 8 +3 --> 8 +7 --> 8 +9["summary of MACS2"]@{ shape: process } +8 --> 9 +10["Bigwig from MACS2"]@{ shape: process } +8 --> 10 +11["MultiQC"]@{ shape: process } +5 --> 11 +6 --> 11 +8 --> 11 +``` diff --git a/workflows/epigenetics/consensus-peaks/consensus-peaks-atac-cutandrun_diagrams.md b/workflows/epigenetics/consensus-peaks/consensus-peaks-atac-cutandrun_diagrams.md new file mode 100644 index 000000000..df45458c8 --- /dev/null +++ b/workflows/epigenetics/consensus-peaks/consensus-peaks-atac-cutandrun_diagrams.md @@ -0,0 +1,67 @@ +# Workflow diagrams + +## Get Confident Peaks From ATAC or CUTandRUN replicates + +```mermaid +graph LR +0["n rmDup BAM"]@{ shape: docs } +1["Minimum number of overlap"]@{ shape: lean-l } +2["effective_genome_size"]@{ shape: lean-l } +3["bin_size"]@{ shape: lean-l } +4["convert BAM to BED"]@{ shape: process } +0 --> 4 +5["count number of reads"]@{ shape: process } +0 --> 5 +6["generate filter rule"]@{ shape: process } +1 --> 6 +7["call peaks individually"]@{ shape: process } +2 --> 7 +4 --> 7 +8["put all nb of reads into single dataset"]@{ shape: process } +5 --> 8 +9["compute multi intersect"]@{ shape: process } +7 --> 9 +10["individual normalized bigwig"]@{ shape: process } +7 --> 10 +11["get min value"]@{ shape: process } +8 --> 11 +12["get nb of replicates"]@{ shape: process } +8 --> 12 +13["filter multi intersect"]@{ shape: process } +6 --> 13 +9 --> 13 +14["average coverage from replicates"]@{ shape: process } +3 --> 14 +10 --> 14 +15["convert min value to text"]@{ shape: process } +11 --> 15 +16["Parse parameter value"]@{ shape: process } +12 --> 16 +17["create a dataset with the min value as many times as there are replicates"]@{ shape: process } +15 --> 17 +16 --> 17 +18["split min value"]@{ shape: process } +17 --> 18 +19["convert min nb of reads to parameter"]@{ shape: process } +18 --> 19 +20["select random reads"]@{ shape: process } +0 --> 20 +19 --> 20 +21["convert subsampled bam to bed"]@{ shape: process } +20 --> 21 +22["call peaks on merge"]@{ shape: process } +2 --> 22 +21 --> 22 +23["get merged peaks overlapping at least x replicates"]@{ shape: process } +22 --> 23 +13 --> 23 +24["multiQC"]@{ shape: process } +7 --> 24 +22 --> 24 +25["only keep peaks with summits overlapping intersection of at least x replicates"]@{ shape: process } +23 --> 25 +26["keep only columns of narrowPeak"]@{ shape: process } +25 --> 26 +27["discard duplicated lines"]@{ shape: process } +26 --> 27 +``` diff --git a/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-pe_diagrams.md b/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-pe_diagrams.md new file mode 100644 index 000000000..de9160e21 --- /dev/null +++ b/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-pe_diagrams.md @@ -0,0 +1,63 @@ +# Workflow diagrams + +## Get Confident Peaks From ChIP_PE replicates + +```mermaid +graph LR +0["n rmDup BAMPE"]@{ shape: docs } +1["Minimum number of overlap"]@{ shape: lean-l } +2["effective_genome_size"]@{ shape: lean-l } +3["bin_size"]@{ shape: lean-l } +4["count number of reads"]@{ shape: process } +0 --> 4 +5["generate filter rule"]@{ shape: process } +1 --> 5 +6["call peaks individually"]@{ shape: process } +2 --> 6 +0 --> 6 +7["put all nb of reads into single dataset"]@{ shape: process } +4 --> 7 +8["compute multi intersect"]@{ shape: process } +6 --> 8 +9["individual normalized bigwig"]@{ shape: process } +6 --> 9 +10["get min value"]@{ shape: process } +7 --> 10 +11["get nb of replicates"]@{ shape: process } +7 --> 11 +12["filter multi intersect"]@{ shape: process } +5 --> 12 +8 --> 12 +13["average coverage from replicates"]@{ shape: process } +3 --> 13 +9 --> 13 +14["convert min value to text"]@{ shape: process } +10 --> 14 +15["Parse parameter value"]@{ shape: process } +11 --> 15 +16["create a dataset with the min value as many times as there are replicates"]@{ shape: process } +14 --> 16 +15 --> 16 +17["split min value"]@{ shape: process } +16 --> 17 +18["convert min nb of reads to parameter"]@{ shape: process } +17 --> 18 +19["downsample BAM"]@{ shape: process } +0 --> 19 +18 --> 19 +20["call peaks on merge"]@{ shape: process } +2 --> 20 +19 --> 20 +21["get merged peaks overlapping at least x replicates"]@{ shape: process } +20 --> 21 +12 --> 21 +22["multiQC"]@{ shape: process } +6 --> 22 +20 --> 22 +23["only keep peaks with summits overlapping intersection of at least x replicates"]@{ shape: process } +21 --> 23 +24["keep only columns of narrowPeak"]@{ shape: process } +23 --> 24 +25["discard duplicated lines"]@{ shape: process } +24 --> 25 +``` diff --git a/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-sr_diagrams.md b/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-sr_diagrams.md new file mode 100644 index 000000000..52faf7045 --- /dev/null +++ b/workflows/epigenetics/consensus-peaks/consensus-peaks-chip-sr_diagrams.md @@ -0,0 +1,63 @@ +# Workflow diagrams + +## Get Confident Peaks From ChIP_SR replicates + +```mermaid +graph LR +0["n rmDup BAMSR"]@{ shape: docs } +1["Minimum number of overlap"]@{ shape: lean-l } +2["effective_genome_size"]@{ shape: lean-l } +3["bin_size"]@{ shape: lean-l } +4["count number of reads"]@{ shape: process } +0 --> 4 +5["generate filter rule"]@{ shape: process } +1 --> 5 +6["call peaks individually"]@{ shape: process } +2 --> 6 +0 --> 6 +7["put all nb of reads into single dataset"]@{ shape: process } +4 --> 7 +8["compute multi intersect"]@{ shape: process } +6 --> 8 +9["individual normalized bigwig"]@{ shape: process } +6 --> 9 +10["get min value"]@{ shape: process } +7 --> 10 +11["get nb of replicates"]@{ shape: process } +7 --> 11 +12["filter multi intersect"]@{ shape: process } +5 --> 12 +8 --> 12 +13["average coverage from replicates"]@{ shape: process } +3 --> 13 +9 --> 13 +14["convert min value to text"]@{ shape: process } +10 --> 14 +15["Parse parameter value"]@{ shape: process } +11 --> 15 +16["create a dataset with the min value as many times as there are replicates"]@{ shape: process } +14 --> 16 +15 --> 16 +17["split min value"]@{ shape: process } +16 --> 17 +18["convert min nb of reads to parameter"]@{ shape: process } +17 --> 18 +19["downsample BAM"]@{ shape: process } +0 --> 19 +18 --> 19 +20["call peaks on merge"]@{ shape: process } +2 --> 20 +19 --> 20 +21["get merged peaks overlapping at least x replicates"]@{ shape: process } +20 --> 21 +12 --> 21 +22["multiQC"]@{ shape: process } +6 --> 22 +20 --> 22 +23["only keep peaks with summits overlapping intersection of at least x replicates"]@{ shape: process } +21 --> 23 +24["keep only columns of narrowPeak"]@{ shape: process } +23 --> 24 +25["discard duplicated lines"]@{ shape: process } +24 --> 25 +``` diff --git a/workflows/epigenetics/cutandrun/cutandrun_diagrams.md b/workflows/epigenetics/cutandrun/cutandrun_diagrams.md new file mode 100644 index 000000000..69027007c --- /dev/null +++ b/workflows/epigenetics/cutandrun/cutandrun_diagrams.md @@ -0,0 +1,39 @@ +# Workflow diagrams + +## CUTandRUN + +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["adapter_forward"]@{ shape: lean-l } +2["adapter_reverse"]@{ shape: lean-l } +3["reference_genome"]@{ shape: lean-l } +4["effective_genome_size"]@{ shape: lean-l } +5["normalize_profile"]@{ shape: lean-l } +6["Cutadapt (remove adapter + bad quality bases)"]@{ shape: process } +0 --> 6 +1 --> 6 +2 --> 6 +7["Bowtie2 map on reference"]@{ shape: process } +6 --> 7 +3 --> 7 +8["filter MAPQ30 concordant pairs"]@{ shape: process } +7 --> 8 +9["remove PCR duplicates"]@{ shape: process } +8 --> 9 +10["convert BAM to BED to improve peak calling"]@{ shape: process } +9 --> 10 +11["Call Peaks with MACS2"]@{ shape: process } +5 --> 11 +4 --> 11 +10 --> 11 +12["summary of MACS2"]@{ shape: process } +11 --> 12 +13["Bigwig from MACS2"]@{ shape: process } +11 --> 13 +14["MultiQC"]@{ shape: process } +6 --> 14 +7 --> 14 +9 --> 14 +11 --> 14 +``` diff --git a/workflows/epigenetics/hic-hicup-cooler/chic-fastq-to-cool-hicup-cooler_diagrams.md b/workflows/epigenetics/hic-hicup-cooler/chic-fastq-to-cool-hicup-cooler_diagrams.md new file mode 100644 index 000000000..2b9564a6d --- /dev/null +++ b/workflows/epigenetics/hic-hicup-cooler/chic-fastq-to-cool-hicup-cooler_diagrams.md @@ -0,0 +1,94 @@ +# Workflow diagrams + +## cHi-C_fastqToCool_hicup_cooler + +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["genome name"]@{ shape: lean-l } +2["Restriction enzyme"]@{ shape: lean-l } +3["No fill-in"]@{ shape: lean-l } +4["minimum MAPQ"]@{ shape: lean-l } +5["Bin size in bp"]@{ shape: lean-l } +6["Interactions to consider to calculate weights in normalization step"]@{ shape: lean-l } +7["capture region (chromosome)"]@{ shape: lean-l } +8["capture region (start)"]@{ shape: lean-l } +9["capture region (end)"]@{ shape: lean-l } +10["Hi-C_fastqToPairs_hicup"]@{ shape: subprocess } +3 --> 10 +0 --> 10 +2 --> 10 +1 --> 10 +4 --> 10 +11["write filtering for capture region"]@{ shape: process } +8 --> 11 +7 --> 11 +9 --> 11 +8 --> 11 +7 --> 11 +9 --> 11 +12["write region for pyGenomeTracks"]@{ shape: process } +7 --> 12 +8 --> 12 +9 --> 12 +13["Filter for capture region"]@{ shape: process } +11 --> 13 +10 --> 13 +14["Sort filtered pairs and index"]@{ shape: process } +13 --> 14 +1 --> 14 +15["Hi-C_juicermediumtabixToCool_cooler"]@{ shape: subprocess } +5 --> 15 +6 --> 15 +14 --> 15 +1 --> 15 +16["final_plot"]@{ shape: process } +12 --> 16 +15 --> 16 +``` + +## Hi-C_juicermediumtabixToCool_cooler + +```mermaid +graph LR +0["Bin size in bp"]@{ shape: lean-l } +1["genome name"]@{ shape: lean-l } +2["Juicer Medium Tabix with validPairs"]@{ shape: docs } +3["Interactions to consider to calculate weights in normalization step"]@{ shape: lean-l } +4["make bed with bins"]@{ shape: process } +0 --> 4 +1 --> 4 +5["Load pairs in matrix"]@{ shape: process } +1 --> 5 +2 --> 5 +4 --> 5 +6["ICE normalization"]@{ shape: process } +3 --> 6 +5 --> 6 +``` + +## Hi-C_fastqToPairs_hicup + +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["genome name"]@{ shape: lean-l } +2["Restriction enzyme"]@{ shape: lean-l } +3["No fill-in"]@{ shape: lean-l } +4["minimum MAPQ"]@{ shape: lean-l } +5["HiCUP"]@{ shape: process } +3 --> 5 +1 --> 5 +0 --> 5 +2 --> 5 +1 --> 5 +6["build filtering rule for MAPQ"]@{ shape: process } +4 --> 6 +4 --> 6 +7["valid pairs in juicebox format"]@{ shape: process } +5 --> 7 +5 --> 7 +8["valid pairs in juicebox format MAPQ filtered"]@{ shape: process } +6 --> 8 +7 --> 8 +``` diff --git a/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-cool-hicup-cooler_diagrams.md b/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-cool-hicup-cooler_diagrams.md new file mode 100644 index 000000000..43f82f701 --- /dev/null +++ b/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-cool-hicup-cooler_diagrams.md @@ -0,0 +1,78 @@ +# Workflow diagrams + +## Hi-C_fastqToCool_hicup_cooler + +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["genome name"]@{ shape: lean-l } +2["Restriction enzyme"]@{ shape: lean-l } +3["No fill-in"]@{ shape: lean-l } +4["minimum MAPQ"]@{ shape: lean-l } +5["Bin size in bp"]@{ shape: lean-l } +6["Interactions to consider to calculate weights in normalization step"]@{ shape: lean-l } +7["region for matrix plotting"]@{ shape: lean-l } +8["Hi-C_fastqToPairs_hicup"]@{ shape: subprocess } +3 --> 8 +0 --> 8 +2 --> 8 +1 --> 8 +4 --> 8 +9["Sort pairs and index"]@{ shape: process } +8 --> 9 +1 --> 9 +10["Hi-C_juicermediumtabixToCool_cooler"]@{ shape: subprocess } +5 --> 10 +6 --> 10 +9 --> 10 +1 --> 10 +11["final plot"]@{ shape: process } +7 --> 11 +10 --> 11 +``` + +## Hi-C_juicermediumtabixToCool_cooler + +```mermaid +graph LR +0["Bin size in bp"]@{ shape: lean-l } +1["genome name"]@{ shape: lean-l } +2["Juicer Medium Tabix with validPairs"]@{ shape: docs } +3["Interactions to consider to calculate weights in normalization step"]@{ shape: lean-l } +4["make bed with bins"]@{ shape: process } +0 --> 4 +1 --> 4 +5["Load pairs in matrix"]@{ shape: process } +1 --> 5 +2 --> 5 +4 --> 5 +6["ICE normalization"]@{ shape: process } +3 --> 6 +5 --> 6 +``` + +## Hi-C_fastqToPairs_hicup + +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["genome name"]@{ shape: lean-l } +2["Restriction enzyme"]@{ shape: lean-l } +3["No fill-in"]@{ shape: lean-l } +4["minimum MAPQ"]@{ shape: lean-l } +5["HiCUP"]@{ shape: process } +3 --> 5 +1 --> 5 +0 --> 5 +2 --> 5 +1 --> 5 +6["build filtering rule for MAPQ"]@{ shape: process } +4 --> 6 +4 --> 6 +7["valid pairs in juicebox format"]@{ shape: process } +5 --> 7 +5 --> 7 +8["valid pairs in juicebox format MAPQ filtered"]@{ shape: process } +6 --> 8 +7 --> 8 +``` diff --git a/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-pairs-hicup_diagrams.md b/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-pairs-hicup_diagrams.md new file mode 100644 index 000000000..087439ce1 --- /dev/null +++ b/workflows/epigenetics/hic-hicup-cooler/hic-fastq-to-pairs-hicup_diagrams.md @@ -0,0 +1,27 @@ +# Workflow diagrams + +## Hi-C_fastqToPairs_hicup + +```mermaid +graph LR +0["PE fastq input"]@{ shape: docs } +1["genome name"]@{ shape: lean-l } +2["Restriction enzyme"]@{ shape: lean-l } +3["No fill-in"]@{ shape: lean-l } +4["minimum MAPQ"]@{ shape: lean-l } +5["HiCUP"]@{ shape: process } +3 --> 5 +1 --> 5 +0 --> 5 +2 --> 5 +1 --> 5 +6["build filtering rule for MAPQ"]@{ shape: process } +4 --> 6 +4 --> 6 +7["valid pairs in juicebox format"]@{ shape: process } +5 --> 7 +5 --> 7 +8["valid pairs in juicebox format MAPQ filtered"]@{ shape: process } +6 --> 8 +7 --> 8 +``` diff --git a/workflows/epigenetics/hic-hicup-cooler/hic-juicermediumtabix-to-cool-cooler_diagrams.md b/workflows/epigenetics/hic-hicup-cooler/hic-juicermediumtabix-to-cool-cooler_diagrams.md new file mode 100644 index 000000000..ff1461972 --- /dev/null +++ b/workflows/epigenetics/hic-hicup-cooler/hic-juicermediumtabix-to-cool-cooler_diagrams.md @@ -0,0 +1,21 @@ +# Workflow diagrams + +## Hi-C_juicermediumtabixToCool_cooler + +```mermaid +graph LR +0["Bin size in bp"]@{ shape: lean-l } +1["genome name"]@{ shape: lean-l } +2["Juicer Medium Tabix with validPairs"]@{ shape: docs } +3["Interactions to consider to calculate weights in normalization step"]@{ shape: lean-l } +4["make bed with bins"]@{ shape: process } +0 --> 4 +1 --> 4 +5["Load pairs in matrix"]@{ shape: process } +1 --> 5 +2 --> 5 +4 --> 5 +6["ICE normalization"]@{ shape: process } +3 --> 6 +5 --> 6 +``` diff --git a/workflows/genome-assembly/assembly-with-flye/Genome-assembly-with-Flye_diagrams.md b/workflows/genome-assembly/assembly-with-flye/Genome-assembly-with-Flye_diagrams.md new file mode 100644 index 000000000..89f618224 --- /dev/null +++ b/workflows/genome-assembly/assembly-with-flye/Genome-assembly-with-Flye_diagrams.md @@ -0,0 +1,16 @@ +# Workflow diagrams + +## Genome assembly with Flye + +```mermaid +graph LR +0["Input sequence reads"]@{ shape: doc } +1["Flye: assembly"]@{ shape: process } +0 --> 1 +2["Quast genome report"]@{ shape: process } +1 --> 2 +3["Fasta statistics"]@{ shape: process } +1 --> 3 +4["Bandage image: Flye assembly"]@{ shape: process } +1 --> 4 +``` diff --git a/workflows/genome-assembly/bacterial-genome-assembly/bacterial_genome_assembly_diagrams.md b/workflows/genome-assembly/bacterial-genome-assembly/bacterial_genome_assembly_diagrams.md new file mode 100644 index 000000000..fb737d0bd --- /dev/null +++ b/workflows/genome-assembly/bacterial-genome-assembly/bacterial_genome_assembly_diagrams.md @@ -0,0 +1,33 @@ +# Workflow diagrams + +## Bacterial Genome Assembly using Shovill + +```mermaid +graph LR +0["Input adapter trimmed sequence reads (forward)"]@{ shape: doc } +1["Input adapter trimmed sequence reads (reverse)"]@{ shape: doc } +2["shovill_genome_assembly"]@{ shape: process } +0 --> 2 +1 --> 2 +3["quast_quality"]@{ shape: process } +2 --> 3 +0 --> 3 +1 --> 3 +4["refseqmasher_genome"]@{ shape: process } +2 --> 4 +5["bandage_contig_graph_stats"]@{ shape: process } +2 --> 5 +6["bandage_contig_graph_plot"]@{ shape: process } +2 --> 6 +7["ToolDistillator"]@{ shape: process } +2 --> 7 +2 --> 7 +2 --> 7 +3 --> 7 +3 --> 7 +4 --> 7 +6 --> 7 +5 --> 7 +8["ToolDistillator summarize"]@{ shape: process } +7 --> 8 +``` diff --git a/workflows/genome-assembly/polish-with-long-reads/Assembly-polishing-with-long-reads_diagrams.md b/workflows/genome-assembly/polish-with-long-reads/Assembly-polishing-with-long-reads_diagrams.md new file mode 100644 index 000000000..45a415547 --- /dev/null +++ b/workflows/genome-assembly/polish-with-long-reads/Assembly-polishing-with-long-reads_diagrams.md @@ -0,0 +1,42 @@ +# Workflow diagrams + +## Assembly polishing with long reads + +```mermaid +graph LR +0["Assembly to be polished"]@{ shape: doc } +1["long reads"]@{ shape: doc } +2["minimap setting (for long reads) "]@{ shape: lean-l } +3["Minimap2: map long reads to assembly"]@{ shape: process } +2 --> 3 +1 --> 3 +0 --> 3 +4["Racon: polish 1"]@{ shape: process } +0 --> 4 +3 --> 4 +1 --> 4 +5["Minimap2: map long reads to polished assembly 1"]@{ shape: process } +2 --> 5 +1 --> 5 +4 --> 5 +6["Racon: polish 2"]@{ shape: process } +4 --> 6 +5 --> 6 +1 --> 6 +7["Minimap2: map long reads to polished assembly 2"]@{ shape: process } +2 --> 7 +1 --> 7 +6 --> 7 +8["Racon: polish 3"]@{ shape: process } +6 --> 8 +7 --> 8 +1 --> 8 +9["Minimap2: map long reads to polished assembly 3"]@{ shape: process } +2 --> 9 +1 --> 9 +8 --> 9 +10["Racon: polish 4"]@{ shape: process } +8 --> 10 +9 --> 10 +1 --> 10 +``` diff --git a/workflows/genome-assembly/quality-and-contamination-control/quality_and_contamination_control_diagrams.md b/workflows/genome-assembly/quality-and-contamination-control/quality_and_contamination_control_diagrams.md new file mode 100644 index 000000000..94170e132 --- /dev/null +++ b/workflows/genome-assembly/quality-and-contamination-control/quality_and_contamination_control_diagrams.md @@ -0,0 +1,41 @@ +# Workflow diagrams + +## Quality and Contamination Control For Genome Assembly + +```mermaid +graph LR +0["Input sequence reads (forward)"]@{ shape: doc } +1["Input sequence reads (reverse)"]@{ shape: doc } +2["Select a taxonomy database"]@{ shape: lean-l } +3["Select a NCBI taxonomy database"]@{ shape: lean-l } +4["fastp_trimming_step"]@{ shape: process } +0 --> 4 +1 --> 4 +5["kraken_taxonomy_assignation"]@{ shape: process } +2 --> 5 +4 --> 5 +4 --> 5 +6["bracken_abundance_estimation"]@{ shape: process } +5 --> 6 +2 --> 6 +7["recentrifuge_taxonomy_visualization"]@{ shape: process } +3 --> 7 +5 --> 7 +8["ToolDistillator"]@{ shape: process } +4 --> 8 +4 --> 8 +4 --> 8 +4 --> 8 +5 --> 8 +2 --> 8 +5 --> 8 +6 --> 8 +6 --> 8 +2 --> 8 +7 --> 8 +7 --> 8 +7 --> 8 +3 --> 8 +9["ToolDistillator summarize"]@{ shape: process } +8 --> 9 +``` diff --git a/workflows/imaging/fluorescence-nuclei-segmentation-and-counting/segmentation-and-counting_diagrams.md b/workflows/imaging/fluorescence-nuclei-segmentation-and-counting/segmentation-and-counting_diagrams.md new file mode 100644 index 000000000..2308620cd --- /dev/null +++ b/workflows/imaging/fluorescence-nuclei-segmentation-and-counting/segmentation-and-counting_diagrams.md @@ -0,0 +1,23 @@ +# Workflow diagrams + +## Segmentation and counting of cell nuclei in fluorescence microscopy images + +```mermaid +graph LR +0["input_image"]@{ shape: doc } +1["Filter 2-D image"]@{ shape: process } +0 --> 1 +2["Perform histogram equalization"]@{ shape: process } +0 --> 2 +3["Threshold image"]@{ shape: process } +1 --> 3 +4["Convert image format"]@{ shape: process } +2 --> 4 +5["Convert binary image to label map"]@{ shape: process } +3 --> 5 +6["Overlay images"]@{ shape: process } +4 --> 6 +5 --> 6 +7["Count objects in label map"]@{ shape: process } +5 --> 7 +``` diff --git a/workflows/metabomics/gcms-metams/Mass-spectrometry__GCMS-with-metaMS_diagrams.md b/workflows/metabomics/gcms-metams/Mass-spectrometry__GCMS-with-metaMS_diagrams.md new file mode 100644 index 000000000..b6d81c7f4 --- /dev/null +++ b/workflows/metabomics/gcms-metams/Mass-spectrometry__GCMS-with-metaMS_diagrams.md @@ -0,0 +1,29 @@ +# Workflow diagrams + +## Mass spectrometry: GCMS with metaMS + +```mermaid +graph LR +0["Mass-spectrometry Dataset Collection"]@{ shape: docs } +1["sampleMetadata"]@{ shape: doc } +2["MSnbase readMSData"]@{ shape: process } +0 --> 2 +3["xcms findChromPeaks (xcmsSet)"]@{ shape: process } +2 --> 3 +4["xcms plot chromatogram"]@{ shape: process } +3 --> 4 +1 --> 4 +5["xcms findChromPeaks Merger"]@{ shape: process } +3 --> 5 +1 --> 5 +6["metaMS.runGC"]@{ shape: process } +5 --> 6 +7["Check Format"]@{ shape: process } +6 --> 7 +1 --> 7 +6 --> 7 +8["Multivariate"]@{ shape: process } +7 --> 8 +7 --> 8 +7 --> 8 +``` diff --git a/workflows/metabomics/lcms-preprocessing/Mass_spectrometry__LC-MS_preprocessing_with_XCMS_diagrams.md b/workflows/metabomics/lcms-preprocessing/Mass_spectrometry__LC-MS_preprocessing_with_XCMS_diagrams.md new file mode 100644 index 000000000..dd59f41f7 --- /dev/null +++ b/workflows/metabomics/lcms-preprocessing/Mass_spectrometry__LC-MS_preprocessing_with_XCMS_diagrams.md @@ -0,0 +1,36 @@ +# Workflow diagrams + +## Mass spectrometry: LC-MS preprocessing with XCMS + +```mermaid +graph LR +0["SampleMetadata"]@{ shape: doc } +1["Mass-spectrometry Dataset Collection"]@{ shape: docs } +2["MSnbase readMSData"]@{ shape: process } +1 --> 2 +3["xcms plot chromatogram"]@{ shape: process } +2 --> 3 +0 --> 3 +4["xcms findChromPeaks (xcmsSet)"]@{ shape: process } +2 --> 4 +5["xcms findChromPeaks Merger"]@{ shape: process } +4 --> 5 +0 --> 5 +6["xcms groupChromPeaks (group)"]@{ shape: process } +5 --> 6 +7["xcms adjustRtime (retcor)"]@{ shape: process } +6 --> 7 +8["Intensity Check"]@{ shape: process } +6 --> 8 +0 --> 8 +6 --> 8 +9["xcms plot chromatogram"]@{ shape: process } +7 --> 9 +0 --> 9 +10["xcms groupChromPeaks (group)"]@{ shape: process } +7 --> 10 +11["xcms fillChromPeaks (fillPeaks)"]@{ shape: process } +10 --> 11 +12["CAMERA.annotate"]@{ shape: process } +11 --> 12 +``` diff --git a/workflows/microbiome/allele-based-pathogen-identification/Allele-based-Pathogen-Identification_diagrams.md b/workflows/microbiome/allele-based-pathogen-identification/Allele-based-Pathogen-Identification_diagrams.md new file mode 100644 index 000000000..700498dd3 --- /dev/null +++ b/workflows/microbiome/allele-based-pathogen-identification/Allele-based-Pathogen-Identification_diagrams.md @@ -0,0 +1,62 @@ +# Workflow diagrams + +## Allele-based Pathogen Identification + +```mermaid +graph LR +0["collection_of_preprocessed_samples"]@{ shape: docs } +1["samples_profile"]@{ shape: lean-l } +2["reference_genome_of_tested_strain"]@{ shape: doc } +3["Convert compressed file to uncompressed."]@{ shape: process } +2 --> 3 +4["Map with minimap2"]@{ shape: process } +1 --> 4 +0 --> 4 +3 --> 4 +5["Clair3"]@{ shape: process } +4 --> 5 +3 --> 5 +6["Samtools depth"]@{ shape: process } +4 --> 6 +7["Samtools coverage"]@{ shape: process } +4 --> 7 +8["bcftools norm"]@{ shape: process } +5 --> 8 +3 --> 8 +9["Advanced Cut"]@{ shape: process } +6 --> 9 +10["Remove beginning"]@{ shape: process } +7 --> 10 +11["SnpSift Filter"]@{ shape: process } +8 --> 11 +12["Table Compute"]@{ shape: process } +9 --> 12 +13["Cut"]@{ shape: process } +10 --> 13 +14["SnpSift Extract Fields"]@{ shape: process } +11 --> 14 +15["bcftools consensus"]@{ shape: process } +11 --> 15 +3 --> 15 +16["Select first"]@{ shape: process } +13 --> 16 +17["Remove beginning"]@{ shape: process } +14 --> 17 +18["Collapse Collection"]@{ shape: process } +16 --> 18 +19["Count"]@{ shape: process } +17 --> 19 +20["Advanced Cut"]@{ shape: process } +18 --> 20 +21["Cut"]@{ shape: process } +19 --> 21 +22["Paste"]@{ shape: process } +20 --> 22 +12 --> 22 +23["Select first"]@{ shape: process } +21 --> 23 +24["Collapse Collection"]@{ shape: process } +23 --> 24 +25["Column Regex Find And Replace"]@{ shape: process } +24 --> 25 +``` diff --git a/workflows/microbiome/gene-based-pathogen-identification/Gene-based-Pathogen-Identification_diagrams.md b/workflows/microbiome/gene-based-pathogen-identification/Gene-based-Pathogen-Identification_diagrams.md new file mode 100644 index 000000000..09bc23794 --- /dev/null +++ b/workflows/microbiome/gene-based-pathogen-identification/Gene-based-Pathogen-Identification_diagrams.md @@ -0,0 +1,42 @@ +# Workflow diagrams + +## Gene-based Pathogen Identification + +```mermaid +graph LR +0["collection_of_preprocessed_samples"]@{ shape: docs } +1["Extract element identifiers"]@{ shape: process } +0 --> 1 +2["Build list"]@{ shape: process } +0 --> 2 +3["Split file"]@{ shape: process } +1 --> 3 +4["Flye"]@{ shape: process } +2 --> 4 +5["Parse parameter value"]@{ shape: process } +3 --> 5 +6["medaka consensus pipeline"]@{ shape: process } +4 --> 6 +0 --> 6 +7["Bandage Image"]@{ shape: process } +4 --> 7 +8["Compose text parameter value"]@{ shape: process } +5 --> 8 +9["FASTA-to-Tabular"]@{ shape: process } +6 --> 9 +10["ABRicate"]@{ shape: process } +6 --> 10 +11["ABRicate"]@{ shape: process } +6 --> 11 +12["Replace"]@{ shape: process } +8 --> 12 +9 --> 12 +13["Replace"]@{ shape: process } +8 --> 13 +10 --> 13 +14["Replace"]@{ shape: process } +8 --> 14 +11 --> 14 +15["Tabular-to-FASTA"]@{ shape: process } +12 --> 15 +``` diff --git a/workflows/microbiome/nanopore-pre-processing/Nanopore-Pre-Processing_diagrams.md b/workflows/microbiome/nanopore-pre-processing/Nanopore-Pre-Processing_diagrams.md new file mode 100644 index 000000000..d2186f2ba --- /dev/null +++ b/workflows/microbiome/nanopore-pre-processing/Nanopore-Pre-Processing_diagrams.md @@ -0,0 +1,64 @@ +# Workflow diagrams + +## Nanopore Preprocessing + +```mermaid +graph LR +0["samples_profile"]@{ shape: lean-l } +1["host_reference_genome"]@{ shape: lean-l } +2["collection_of_all_samples"]@{ shape: docs } +3["Porechop"]@{ shape: process } +2 --> 3 +4["NanoPlot"]@{ shape: process } +2 --> 4 +5["FastQC"]@{ shape: process } +2 --> 5 +6["fastp"]@{ shape: process } +3 --> 6 +7["MultiQC"]@{ shape: process } +5 --> 7 +8["Map with minimap2"]@{ shape: process } +0 --> 8 +6 --> 8 +1 --> 8 +9["NanoPlot"]@{ shape: process } +6 --> 9 +10["FastQC"]@{ shape: process } +6 --> 10 +11["Split BAM by reads mapping status"]@{ shape: process } +8 --> 11 +12["Select"]@{ shape: process } +10 --> 12 +13["Samtools fastx"]@{ shape: process } +11 --> 13 +14["Samtools fastx"]@{ shape: process } +11 --> 14 +15["Collapse Collection"]@{ shape: process } +12 --> 15 +16["Filter failed datasets"]@{ shape: process } +13 --> 16 +17["Kraken2"]@{ shape: process } +14 --> 17 +18["Cut"]@{ shape: process } +15 --> 18 +19["FastQC"]@{ shape: process } +16 --> 19 +20["Krakentools: Extract Kraken Reads By ID"]@{ shape: process } +6 --> 20 +17 --> 20 +17 --> 20 +21["Select"]@{ shape: process } +19 --> 21 +22["Collapse Collection"]@{ shape: process } +21 --> 22 +23["Cut"]@{ shape: process } +22 --> 23 +24["Column join"]@{ shape: process } +25["Compute"]@{ shape: process } +24 --> 25 +26["Column Regex Find And Replace"]@{ shape: process } +25 --> 26 +27["MultiQC"]@{ shape: process } +10 --> 27 +26 --> 27 +``` diff --git a/workflows/microbiome/pathogen-detection-pathogfair-samples-aggregation-and-visualisation/Pathogen-Detection-PathoGFAIR-Samples-Aggregation-and-Visualisation_diagrams.md b/workflows/microbiome/pathogen-detection-pathogfair-samples-aggregation-and-visualisation/Pathogen-Detection-PathoGFAIR-Samples-Aggregation-and-Visualisation_diagrams.md new file mode 100644 index 000000000..aba9381c8 --- /dev/null +++ b/workflows/microbiome/pathogen-detection-pathogfair-samples-aggregation-and-visualisation/Pathogen-Detection-PathoGFAIR-Samples-Aggregation-and-Visualisation_diagrams.md @@ -0,0 +1,141 @@ +# Workflow diagrams + +## Pathogen Detection PathoGFAIR Samples Aggregation and Visualisation + +```mermaid +graph LR +0["amr_identified_by_ncbi"]@{ shape: docs } +1["vfs_of_genes_identified_by_vfdb"]@{ shape: docs } +2["amrs"]@{ shape: docs } +3["contigs"]@{ shape: docs } +4["vfs"]@{ shape: docs } +5["removed_hosts_percentage_tabular"]@{ shape: doc } +6["mapping_mean_depth_per_sample"]@{ shape: doc } +7["mapping_coverage_percentage_per_sample"]@{ shape: doc } +8["number_of_variants_per_sample"]@{ shape: doc } +9["metadata"]@{ shape: doc } +10["Filter failed datasets"]@{ shape: process } +0 --> 10 +11["Filter failed datasets"]@{ shape: process } +1 --> 11 +12["Filter failed datasets"]@{ shape: process } +2 --> 12 +13["Filter failed datasets"]@{ shape: process } +3 --> 13 +14["Filter failed datasets"]@{ shape: process } +4 --> 14 +15["Remove beginning"]@{ shape: process } +10 --> 15 +16["Remove beginning"]@{ shape: process } +11 --> 16 +17["Remove beginning"]@{ shape: process } +12 --> 17 +18["Collapse Collection"]@{ shape: process } +13 --> 18 +19["Collapse Collection"]@{ shape: process } +14 --> 19 +20["Remove beginning"]@{ shape: process } +14 --> 20 +21["Count"]@{ shape: process } +15 --> 21 +22["Count"]@{ shape: process } +16 --> 22 +23["Group"]@{ shape: process } +16 --> 23 +24["Unique"]@{ shape: process } +17 --> 24 +25["Split by group"]@{ shape: process } +19 --> 25 +26["Unique"]@{ shape: process } +20 --> 26 +27["Cut"]@{ shape: process } +21 --> 27 +28["Cut"]@{ shape: process } +22 --> 28 +29["Filter empty datasets"]@{ shape: process } +23 --> 29 +30["Cut"]@{ shape: process } +24 --> 30 +31["Cut"]@{ shape: process } +25 --> 31 +32["Cut"]@{ shape: process } +26 --> 32 +33["Collapse Collection"]@{ shape: process } +27 --> 33 +34["Collapse Collection"]@{ shape: process } +28 --> 34 +35["Column join"]@{ shape: process } +29 --> 35 +36["bedtools getfasta"]@{ shape: process } +18 --> 36 +30 --> 36 +37["Remove beginning"]@{ shape: process } +31 --> 37 +38["bedtools getfasta"]@{ shape: process } +18 --> 38 +32 --> 38 +39["Column Regex Find And Replace"]@{ shape: process } +33 --> 39 +40["Column Regex Find And Replace"]@{ shape: process } +34 --> 40 +41["Column Regex Find And Replace"]@{ shape: process } +35 --> 41 +42["Regex Find And Replace"]@{ shape: process } +36 --> 42 +43["bedtools getfasta"]@{ shape: process } +18 --> 43 +37 --> 43 +44["Regex Find And Replace"]@{ shape: process } +38 --> 44 +45["Multi-Join"]@{ shape: process } +40 --> 45 +39 --> 45 +46["Heatmap w ggplot"]@{ shape: process } +41 --> 46 +47["Filter empty datasets"]@{ shape: process } +42 --> 47 +48["ClustalW"]@{ shape: process } +43 --> 48 +49["Filter empty datasets"]@{ shape: process } +44 --> 49 +50["Replace Text"]@{ shape: process } +45 --> 50 +51["FASTA-to-Tabular"]@{ shape: process } +47 --> 51 +52["Filter empty datasets"]@{ shape: process } +48 --> 52 +53["FASTA-to-Tabular"]@{ shape: process } +49 --> 53 +54["Cut"]@{ shape: process } +51 --> 54 +55["FASTTREE"]@{ shape: process } +52 --> 55 +56["Cut"]@{ shape: process } +53 --> 56 +57["Group"]@{ shape: process } +54 --> 57 +58["Newick Display"]@{ shape: process } +55 --> 58 +59["Group"]@{ shape: process } +56 --> 59 +60["Tabular-to-FASTA"]@{ shape: process } +57 --> 60 +61["Tabular-to-FASTA"]@{ shape: process } +59 --> 61 +62["FASTA Merge Files and Filter Unique Sequences"]@{ shape: process } +60 --> 62 +63["FASTA Merge Files and Filter Unique Sequences"]@{ shape: process } +61 --> 63 +64["ClustalW"]@{ shape: process } +62 --> 64 +65["ClustalW"]@{ shape: process } +63 --> 65 +66["FASTTREE"]@{ shape: process } +64 --> 66 +67["FASTTREE"]@{ shape: process } +65 --> 67 +68["Newick Display"]@{ shape: process } +66 --> 68 +69["Newick Display"]@{ shape: process } +67 --> 69 +``` diff --git a/workflows/microbiome/taxonomy-profiling-and-visualization-with-krona/Taxonomy-Profiling-and-Visualization-with-Krona_diagrams.md b/workflows/microbiome/taxonomy-profiling-and-visualization-with-krona/Taxonomy-Profiling-and-Visualization-with-Krona_diagrams.md new file mode 100644 index 000000000..8c1c08410 --- /dev/null +++ b/workflows/microbiome/taxonomy-profiling-and-visualization-with-krona/Taxonomy-Profiling-and-Visualization-with-Krona_diagrams.md @@ -0,0 +1,16 @@ +# Workflow diagrams + +## Taxonomy Profiling and Visualization with Krona + +```mermaid +graph LR +0["collection_of_preprocessed_samples"]@{ shape: docs } +1["kraken_database"]@{ shape: lean-l } +2["Kraken2"]@{ shape: process } +1 --> 2 +0 --> 2 +3["Krakentools: Convert kraken report file"]@{ shape: process } +2 --> 3 +4["Krona pie chart"]@{ shape: process } +3 --> 4 +``` diff --git a/workflows/proteomics/clinicalmp/clinicalmp-data-interpretation/WF5_Data_Interpretation_Worklow_diagrams.md b/workflows/proteomics/clinicalmp/clinicalmp-data-interpretation/WF5_Data_Interpretation_Worklow_diagrams.md new file mode 100644 index 000000000..7f938bf4e --- /dev/null +++ b/workflows/proteomics/clinicalmp/clinicalmp-data-interpretation/WF5_Data_Interpretation_Worklow_diagrams.md @@ -0,0 +1,30 @@ +# Workflow diagrams + +## Clinical Metaproteomics Data Interpretation + +```mermaid +graph LR +0["Quantified Peptides"]@{ shape: doc } +1["MaxQuant Protein Groups"]@{ shape: doc } +2["MaxQuant Evidence"]@{ shape: doc } +3["Annotation"]@{ shape: doc } +4["Comparison Matrix"]@{ shape: doc } +5["Unipept"]@{ shape: process } +0 --> 5 +6["Microbial Proteins"]@{ shape: process } +1 --> 6 +7["Select"]@{ shape: process } +1 --> 7 +8["MSstatsTMT_for_microbial_proteins"]@{ shape: process } +4 --> 8 +3 --> 8 +2 --> 8 +6 --> 8 +9["Human Proteins"]@{ shape: process } +7 --> 9 +10["MSstatsTMT_for_human_proteins"]@{ shape: process } +4 --> 10 +3 --> 10 +2 --> 10 +9 --> 10 +``` diff --git a/workflows/proteomics/clinicalmp/clinicalmp-database-generation/iwc-clinicalmp-database-generation_diagrams.md b/workflows/proteomics/clinicalmp/clinicalmp-database-generation/iwc-clinicalmp-database-generation_diagrams.md new file mode 100644 index 000000000..763680b32 --- /dev/null +++ b/workflows/proteomics/clinicalmp/clinicalmp-database-generation/iwc-clinicalmp-database-generation_diagrams.md @@ -0,0 +1,22 @@ +# Workflow diagrams + +## Generate a Clinical Metaproteomics Database + +```mermaid +graph LR +0["Human SwissProt Protein Database"]@{ shape: doc } +1["Tandem Mass Spectrometry (MS/MS) datasets"]@{ shape: docs } +2["Species UniProt Protein Database"]@{ shape: doc } +3["Contaminants cRAP Protein Database"]@{ shape: doc } +4["Human UniProt Microbial Proteins cRAP for MetaNovo"]@{ shape: process } +0 --> 4 +2 --> 4 +3 --> 4 +5["Metanovo"]@{ shape: process } +4 --> 5 +1 --> 5 +6["Merge all FASTA"]@{ shape: process } +0 --> 6 +5 --> 6 +3 --> 6 +``` diff --git a/workflows/proteomics/clinicalmp/clinicalmp-quantitation/iwc-clinicalmp-quantitation_diagrams.md b/workflows/proteomics/clinicalmp/clinicalmp-quantitation/iwc-clinicalmp-quantitation_diagrams.md new file mode 100644 index 000000000..7f198ce72 --- /dev/null +++ b/workflows/proteomics/clinicalmp/clinicalmp-quantitation/iwc-clinicalmp-quantitation_diagrams.md @@ -0,0 +1,26 @@ +# Workflow diagrams + +## Clinical Metaproteomics Quantitation + +```mermaid +graph LR +0["Quantitation_Database-For-MaxQuant"]@{ shape: doc } +1["Experimental-Design Discovery MaxQuant"]@{ shape: doc } +2["Input Raw-files"]@{ shape: docs } +3["MaxQuant"]@{ shape: process } +0 --> 3 +2 --> 3 +1 --> 3 +4["extracting microbial Proteins"]@{ shape: process } +3 --> 4 +5["extracting microbial Peptides"]@{ shape: process } +3 --> 5 +6["extract proteins"]@{ shape: process } +4 --> 6 +7["extract peptides"]@{ shape: process } +5 --> 7 +8["Quantified-Proteins"]@{ shape: process } +6 --> 8 +9["Quantified-Peptides"]@{ shape: process } +7 --> 9 +``` diff --git a/workflows/proteomics/clinicalmp/clinicalmp-verification/clinicalmp-verification_diagrams.md b/workflows/proteomics/clinicalmp/clinicalmp-verification/clinicalmp-verification_diagrams.md new file mode 100644 index 000000000..9e52396c6 --- /dev/null +++ b/workflows/proteomics/clinicalmp/clinicalmp-verification/clinicalmp-verification_diagrams.md @@ -0,0 +1,52 @@ +# Workflow diagrams + +## Clinical Metaproteomics Verification Workflow + +```mermaid +graph LR +0["Human UniProt Isoforms FASTA"]@{ shape: process } +1["cRAP"]@{ shape: process } +2["Tandem Mass Spectrometry (MS/MS) datasets"]@{ shape: docs } +3["SGPS peptide report"]@{ shape: doc } +4["Distinct Peptides for PepQuery"]@{ shape: doc } +5["MaxQuant peptide report"]@{ shape: doc } +6["Human UniProt Isoforms cRAP FASTA"]@{ shape: process } +0 --> 6 +1 --> 6 +7["SGPS Cut"]@{ shape: process } +3 --> 7 +8["MQ Cut"]@{ shape: process } +5 --> 8 +9["PepQuery2"]@{ shape: process } +6 --> 9 +4 --> 9 +2 --> 9 +10["SGPS Remove Beginner"]@{ shape: process } +7 --> 10 +11["MQ Remove Beginner"]@{ shape: process } +8 --> 11 +12["Collapse Collection"]@{ shape: process } +9 --> 12 +13["Concatenate datasets"]@{ shape: process } +14["Filter"]@{ shape: process } +12 --> 14 +15["Filter Remove beginning"]@{ shape: process } +14 --> 15 +16["FRB Cut"]@{ shape: process } +15 --> 16 +17["Peptide and Protein from Peptide Reports"]@{ shape: process } +16 --> 17 +13 --> 17 +18["PPPR Remove beginning"]@{ shape: process } +17 --> 18 +19["Group"]@{ shape: process } +18 --> 19 +20["Uniprot ID from verified Peptides"]@{ shape: process } +19 --> 20 +21["UniProt"]@{ shape: process } +20 --> 21 +22["Quantitation Database for MaxQuant"]@{ shape: process } +0 --> 22 +1 --> 22 +21 --> 22 +``` diff --git a/workflows/proteomics/openms-metaprosip/metaprosip_diagrams.md b/workflows/proteomics/openms-metaprosip/metaprosip_diagrams.md new file mode 100644 index 000000000..fed16bdf6 --- /dev/null +++ b/workflows/proteomics/openms-metaprosip/metaprosip_diagrams.md @@ -0,0 +1,39 @@ +# Workflow diagrams + +## MetaProSIP OpenMS 2.8 + +```mermaid +graph LR +0["Centroided LC-MS datasets"]@{ shape: docs } +1["Fasta Database"]@{ shape: doc } +2["Precursor monoisotopic mass tolerance (ppm)"]@{ shape: lean-l } +3["Fixed modifications"]@{ shape: lean-l } +4["Variable modifications"]@{ shape: lean-l } +5["Labeled element"]@{ shape: lean-l } +6["Sort collection"]@{ shape: process } +0 --> 6 +7["DecoyDatabase"]@{ shape: process } +1 --> 7 +8["FeatureFinderMultiplex"]@{ shape: process } +6 --> 8 +9["MSGFPlusAdapter"]@{ shape: process } +7 --> 9 +3 --> 9 +6 --> 9 +2 --> 9 +4 --> 9 +10["PeptideIndexer"]@{ shape: process } +7 --> 10 +9 --> 10 +11["FalseDiscoveryRate"]@{ shape: process } +10 --> 11 +12["IDMapper"]@{ shape: process } +11 --> 12 +8 --> 12 +13["MetaProSIP"]@{ shape: process } +7 --> 13 +12 --> 13 +6 --> 13 +5 --> 13 +2 --> 13 +``` diff --git a/workflows/repeatmasking/RepeatMasking-Workflow_diagrams.md b/workflows/repeatmasking/RepeatMasking-Workflow_diagrams.md new file mode 100644 index 000000000..dcc14a197 --- /dev/null +++ b/workflows/repeatmasking/RepeatMasking-Workflow_diagrams.md @@ -0,0 +1,12 @@ +# Workflow diagrams + +## Repeat masking with RepeatModeler and RepeatMasker + +```mermaid +graph LR +0["input"]@{ shape: doc } +1["RepeatModeler"]@{ shape: process } +0 --> 1 +2["RepeatMasker"]@{ shape: process } +1 --> 2 +``` diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-consensus-from-variation/consensus-from-variation_diagrams.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-consensus-from-variation/consensus-from-variation_diagrams.md new file mode 100644 index 000000000..7d2b47b76 --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-consensus-from-variation/consensus-from-variation_diagrams.md @@ -0,0 +1,55 @@ +# Workflow diagrams + +## COVID-19: consensus construction + +```mermaid +graph LR +0["Variant calls"]@{ shape: docs } +1["min-AF for consensus variant"]@{ shape: lean-l } +2["min-AF for failed variants"]@{ shape: lean-l } +3["aligned reads data for depth calculation"]@{ shape: docs } +4["Depth-threshold for masking"]@{ shape: lean-l } +5["Reference genome"]@{ shape: doc } +6["Compose text parameter value"]@{ shape: process } +1 --> 6 +7["Compose text parameter value"]@{ shape: process } +2 --> 7 +1 --> 7 +8["bedtools Genome Coverage"]@{ shape: process } +3 --> 8 +9["Compose text parameter value"]@{ shape: process } +4 --> 9 +10["SnpSift Filter"]@{ shape: process } +6 --> 10 +0 --> 10 +11["SnpSift Filter"]@{ shape: process } +7 --> 11 +0 --> 11 +12["Filter"]@{ shape: process } +9 --> 12 +8 --> 12 +13["SnpSift Extract Fields"]@{ shape: process } +10 --> 13 +14["SnpSift Extract Fields"]@{ shape: process } +11 --> 14 +15["Compute"]@{ shape: process } +13 --> 15 +16["Compute"]@{ shape: process } +14 --> 16 +17["Concatenate"]@{ shape: process } +12 --> 17 +16 --> 17 +18["Merge"]@{ shape: process } +17 --> 18 +19["Subtract"]@{ shape: process } +18 --> 19 +15 --> 19 +20["Compute"]@{ shape: process } +19 --> 20 +21["bcftools consensus"]@{ shape: process } +10 --> 21 +5 --> 21 +20 --> 21 +22["Collapse Collection"]@{ shape: process } +21 --> 22 +``` diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-ont-artic-variant-calling/ont-artic-variation_diagrams.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-ont-artic-variant-calling/ont-artic-variation_diagrams.md new file mode 100644 index 000000000..11c297f2b --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-ont-artic-variant-calling/ont-artic-variation_diagrams.md @@ -0,0 +1,69 @@ +# Workflow diagrams + +## COVID-19: variation analysis of ARTIC ONT data + +```mermaid +graph LR +0["ONT-sequenced reads"]@{ shape: docs } +1["Minimum read length"]@{ shape: lean-l } +2["Maximum read length"]@{ shape: lean-l } +3["NC_045512.2 FASTA sequence of SARS-CoV-2"]@{ shape: doc } +4["Primer binding sites info in BED format"]@{ shape: doc } +5["fastp"]@{ shape: process } +2 --> 5 +1 --> 5 +0 --> 5 +6["Compute"]@{ shape: process } +4 --> 6 +7["Replace Text"]@{ shape: process } +4 --> 7 +8["Map with minimap2"]@{ shape: process } +5 --> 8 +3 --> 8 +9["Datamash"]@{ shape: process } +6 --> 9 +10["Samtools view"]@{ shape: process } +8 --> 10 +11["Parse parameter value"]@{ shape: process } +9 --> 11 +12["Samtools stats"]@{ shape: process } +10 --> 12 +13["BamLeftAlign"]@{ shape: process } +10 --> 13 +3 --> 13 +14["ivar trim"]@{ shape: process } +13 --> 14 +4 --> 14 +15["QualiMap BamQC"]@{ shape: process } +14 --> 15 +16["medaka consensus tool"]@{ shape: process } +14 --> 16 +17["Filter failed"]@{ shape: process } +15 --> 17 +18["medaka variant tool"]@{ shape: process } +13 --> 18 +16 --> 18 +3 --> 18 +19["medaka variant tool"]@{ shape: process } +13 --> 19 +11 --> 19 +16 --> 19 +3 --> 19 +20["Flatten Collection"]@{ shape: process } +17 --> 20 +21["bedtools Intersect intervals"]@{ shape: process } +19 --> 21 +7 --> 21 +22["MultiQC"]@{ shape: process } +12 --> 22 +20 --> 22 +23["bcftools annotate"]@{ shape: process } +18 --> 23 +21 --> 23 +24["SnpEff eff covid19 version"]@{ shape: process } +23 --> 24 +25["Lofreq filter"]@{ shape: process } +24 --> 25 +26["Replace"]@{ shape: process } +25 --> 26 +``` diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-ivar-analysis/pe-wgs-ivar-analysis_diagrams.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-ivar-analysis/pe-wgs-ivar-analysis_diagrams.md new file mode 100644 index 000000000..56cdf4338 --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-ivar-analysis/pe-wgs-ivar-analysis_diagrams.md @@ -0,0 +1,53 @@ +# Workflow diagrams + +## SARS-CoV-2 Illumina Amplicon pipeline - iVar based + +```mermaid +graph LR +0["Paired read collection for samples"]@{ shape: docs } +1["Reference FASTA"]@{ shape: doc } +2["Primer BED"]@{ shape: doc } +3["Read fraction to call variant"]@{ shape: lean-l } +4["Minimum quality score to call base"]@{ shape: lean-l } +5["fastp: Trimmed Illumina Reads"]@{ shape: process } +0 --> 5 +6["Rename reference to NC_045512.2"]@{ shape: process } +1 --> 6 +7["Map with BWA-MEM"]@{ shape: process } +5 --> 7 +6 --> 7 +8["Samtools stats"]@{ shape: process } +7 --> 8 +9["Samtools view"]@{ shape: process } +7 --> 9 +10["QualiMap BamQC"]@{ shape: process } +9 --> 10 +11["ivar trim"]@{ shape: process } +9 --> 11 +2 --> 11 +12["Flatten collection"]@{ shape: process } +10 --> 12 +13["ivar variants"]@{ shape: process } +11 --> 13 +3 --> 13 +4 --> 13 +1 --> 13 +14["ivar consensus"]@{ shape: process } +11 --> 14 +3 --> 14 +4 --> 14 +15["Quality Control Report"]@{ shape: process } +5 --> 15 +8 --> 15 +12 --> 15 +16["Annotated variants"]@{ shape: process } +13 --> 16 +17["Consensus genome (masked for depth)"]@{ shape: process } +14 --> 17 +18["Concatenate datasets"]@{ shape: process } +17 --> 18 +19["Pangolin"]@{ shape: process } +18 --> 19 +20["Nextclade"]@{ shape: process } +18 --> 20 +``` diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-variant-calling/pe-artic-variation_diagrams.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-variant-calling/pe-artic-variation_diagrams.md new file mode 100644 index 000000000..4f515d4d6 --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-artic-variant-calling/pe-artic-variation_diagrams.md @@ -0,0 +1,86 @@ +# Workflow diagrams + +## COVID-19: variation analysis on ARTIC PE data + +```mermaid +graph LR +0["Paired Collection"]@{ shape: docs } +1["NC_045512.2 FASTA sequence of SARS-CoV-2"]@{ shape: doc } +2["ARTIC primer BED"]@{ shape: doc } +3["ARTIC primers to amplicon assignments"]@{ shape: doc } +4["Read removal minimum AF"]@{ shape: lean-l } +5["Read removal maximum AF"]@{ shape: lean-l } +6["Minimum DP required after amplicon bias correction"]@{ shape: lean-l } +7["Minimum DP_ALT required after amplicon bias correction"]@{ shape: lean-l } +8["fastp"]@{ shape: process } +0 --> 8 +9["Compose text parameter value"]@{ shape: process } +4 --> 9 +5 --> 9 +10["Compose text parameter value"]@{ shape: process } +6 --> 10 +7 --> 10 +11["Map with BWA-MEM"]@{ shape: process } +8 --> 11 +1 --> 11 +12["Samtools view"]@{ shape: process } +11 --> 12 +13["Realign reads"]@{ shape: process } +12 --> 13 +1 --> 13 +14["Samtools stats"]@{ shape: process } +12 --> 14 +15["Insert indel qualities"]@{ shape: process } +13 --> 15 +1 --> 15 +16["ivar trim"]@{ shape: process } +3 --> 16 +15 --> 16 +2 --> 16 +17["Call variants"]@{ shape: process } +16 --> 17 +1 --> 17 +18["QualiMap BamQC"]@{ shape: process } +16 --> 18 +19["SnpSift Filter"]@{ shape: process } +9 --> 19 +17 --> 19 +20["SnpSift Filter"]@{ shape: process } +10 --> 20 +17 --> 20 +21["Filter failed datasets"]@{ shape: process } +18 --> 21 +22["ivar removereads"]@{ shape: process } +3 --> 22 +16 --> 22 +2 --> 22 +19 --> 22 +23["Flatten collection"]@{ shape: process } +21 --> 23 +24["Call variants"]@{ shape: process } +22 --> 24 +1 --> 24 +25["MultiQC"]@{ shape: process } +8 --> 25 +14 --> 25 +23 --> 25 +26["bcftools annotate"]@{ shape: process } +17 --> 26 +24 --> 26 +27["SnpSift Filter"]@{ shape: process } +10 --> 27 +24 --> 27 +28["VCF-VCFintersect:"]@{ shape: process } +1 --> 28 +27 --> 28 +20 --> 28 +29["bcftools annotate"]@{ shape: process } +26 --> 29 +28 --> 29 +30["Replace Text"]@{ shape: process } +29 --> 30 +31["SnpEff eff covid19 version"]@{ shape: process } +30 --> 31 +32["Lofreq filter"]@{ shape: process } +31 --> 32 +``` diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-wgs-variant-calling/pe-wgs-variation_diagrams.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-wgs-variant-calling/pe-wgs-variation_diagrams.md new file mode 100644 index 000000000..e9d1ca72d --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-pe-illumina-wgs-variant-calling/pe-wgs-variation_diagrams.md @@ -0,0 +1,37 @@ +# Workflow diagrams + +## COVID-19: variation analysis on WGS PE data + +```mermaid +graph LR +0["Paired Collection"]@{ shape: docs } +1["NC_045512.2 FASTA sequence of SARS-CoV-2"]@{ shape: doc } +2["fastp"]@{ shape: process } +0 --> 2 +3["Map with BWA-MEM"]@{ shape: process } +2 --> 3 +1 --> 3 +4["Samtools view"]@{ shape: process } +3 --> 4 +5["Samtools stats"]@{ shape: process } +4 --> 5 +6["MarkDuplicates"]@{ shape: process } +4 --> 6 +7["Realign reads"]@{ shape: process } +6 --> 7 +1 --> 7 +8["MultiQC"]@{ shape: process } +2 --> 8 +5 --> 8 +6 --> 8 +9["Insert indel qualities"]@{ shape: process } +7 --> 9 +1 --> 9 +10["Call variants"]@{ shape: process } +9 --> 10 +1 --> 10 +11["Lofreq filter"]@{ shape: process } +10 --> 11 +12["SnpEff eff covid19 version"]@{ shape: process } +11 --> 12 +``` diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-se-illumina-wgs-variant-calling/se-wgs-variation_diagrams.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-se-illumina-wgs-variant-calling/se-wgs-variation_diagrams.md new file mode 100644 index 000000000..0a66bb6e1 --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-se-illumina-wgs-variant-calling/se-wgs-variation_diagrams.md @@ -0,0 +1,33 @@ +# Workflow diagrams + +## COVID-19: variation analysis on WGS SE data + +```mermaid +graph LR +0["Single End Collection"]@{ shape: docs } +1["NC_045512.2 FASTA sequence of SARS-CoV-2"]@{ shape: doc } +2["fastp"]@{ shape: process } +0 --> 2 +3["Bowtie2"]@{ shape: process } +2 --> 3 +1 --> 3 +4["MarkDuplicates"]@{ shape: process } +3 --> 4 +5["MultiQC"]@{ shape: process } +2 --> 5 +3 --> 5 +4 --> 5 +6["Realign reads"]@{ shape: process } +4 --> 6 +1 --> 6 +7["Insert indel qualities"]@{ shape: process } +6 --> 7 +1 --> 7 +8["Call variants"]@{ shape: process } +7 --> 8 +1 --> 8 +9["Lofreq filter"]@{ shape: process } +8 --> 9 +10["SnpEff eff covid19 version"]@{ shape: process } +9 --> 10 +``` diff --git a/workflows/sars-cov-2-variant-calling/sars-cov-2-variation-reporting/variation-reporting_diagrams.md b/workflows/sars-cov-2-variant-calling/sars-cov-2-variation-reporting/variation-reporting_diagrams.md new file mode 100644 index 000000000..4873719f6 --- /dev/null +++ b/workflows/sars-cov-2-variant-calling/sars-cov-2-variation-reporting/variation-reporting_diagrams.md @@ -0,0 +1,85 @@ +# Workflow diagrams + +## COVID-19: variation analysis reporting + +```mermaid +graph LR +0["Variation data to report"]@{ shape: docs } +1["AF Filter"]@{ shape: lean-l } +2["DP Filter"]@{ shape: lean-l } +3["DP_ALT Filter"]@{ shape: lean-l } +4["gene products translations"]@{ shape: doc } +5["Number of Clusters"]@{ shape: lean-l } +6["SnpSift Filter"]@{ shape: process } +0 --> 6 +7["Compose text parameter value"]@{ shape: process } +1 --> 7 +2 --> 7 +3 --> 7 +8["Compose text parameter value"]@{ shape: process } +1 --> 8 +2 --> 8 +3 --> 8 +9["SnpSift Filter"]@{ shape: process } +7 --> 9 +8 --> 9 +6 --> 9 +10["SnpSift Extract Fields"]@{ shape: process } +9 --> 10 +11["Replace column"]@{ shape: process } +10 --> 11 +4 --> 11 +12["Compute"]@{ shape: process } +11 --> 12 +13["Datamash"]@{ shape: process } +12 --> 13 +14["Replace"]@{ shape: process } +13 --> 14 +15["Collapse Collection"]@{ shape: process } +14 --> 15 +16["Compute"]@{ shape: process } +15 --> 16 +17["Replace"]@{ shape: process } +16 --> 17 +18["Datamash"]@{ shape: process } +17 --> 18 +19["Filter"]@{ shape: process } +17 --> 19 +20["Datamash"]@{ shape: process } +17 --> 20 +21["Join"]@{ shape: process } +19 --> 21 +18 --> 21 +22["Datamash"]@{ shape: process } +19 --> 22 +23["Datamash"]@{ shape: process } +19 --> 23 +24["Datamash"]@{ shape: process } +21 --> 24 +25["Join"]@{ shape: process } +17 --> 25 +22 --> 25 +26["Join"]@{ shape: process } +16 --> 26 +23 --> 26 +27["Cut"]@{ shape: process } +24 --> 27 +28["Join"]@{ shape: process } +25 --> 28 +20 --> 28 +29["Cut"]@{ shape: process } +26 --> 29 +30["Replace"]@{ shape: process } +27 --> 30 +31["Cut"]@{ shape: process } +28 --> 31 +32["Split file"]@{ shape: process } +29 --> 32 +33["Sort"]@{ shape: process } +30 --> 33 +34["Sort"]@{ shape: process } +31 --> 34 +35["Variant Frequency Plot"]@{ shape: process } +5 --> 35 +32 --> 35 +``` diff --git a/workflows/scRNAseq/baredsc/baredSC-1d-logNorm_diagrams.md b/workflows/scRNAseq/baredsc/baredSC-1d-logNorm_diagrams.md new file mode 100644 index 000000000..bf9ad1288 --- /dev/null +++ b/workflows/scRNAseq/baredsc/baredSC-1d-logNorm_diagrams.md @@ -0,0 +1,40 @@ +# Workflow diagrams + +## baredSC_1d_logNorm + +```mermaid +graph LR +0["Tabular with raw expression values"]@{ shape: doc } +1["Gene name"]@{ shape: lean-l } +2["Maximum value in logNorm"]@{ shape: lean-l } +3["Maximum number of Gaussians to study"]@{ shape: lean-l } +4["generate_param_list_one_to_number"]@{ shape: subprocess } +3 --> 4 +5["baredSC"]@{ shape: process } +4 --> 5 +2 --> 5 +1 --> 5 +0 --> 5 +6["combine baredSC 1d"]@{ shape: process } +5 --> 6 +2 --> 6 +1 --> 6 +0 --> 6 +``` + +## generate_param_list_one_to_number + +```mermaid +graph LR +0["Maximum number in param output"]@{ shape: lean-l } +1["create first tabular with good number of rows."]@{ shape: process } +0 --> 1 +2["add column with numbers starting at 1"]@{ shape: process } +1 --> 2 +3["retrieve numbers"]@{ shape: process } +2 --> 3 +4["split numbers to collection"]@{ shape: process } +3 --> 4 +5["numbers to param"]@{ shape: process } +4 --> 5 +``` diff --git a/workflows/scRNAseq/baredsc/baredSC-2d-logNorm_diagrams.md b/workflows/scRNAseq/baredsc/baredSC-2d-logNorm_diagrams.md new file mode 100644 index 000000000..c4691ea23 --- /dev/null +++ b/workflows/scRNAseq/baredsc/baredSC-2d-logNorm_diagrams.md @@ -0,0 +1,48 @@ +# Workflow diagrams + +## baredSC_2d_logNorm + +```mermaid +graph LR +0["Tabular with raw expression values"]@{ shape: doc } +1["Gene name for x axis"]@{ shape: lean-l } +2["maximum value in logNorm for x-axis"]@{ shape: lean-l } +3["Gene name for y axis"]@{ shape: lean-l } +4["maximum value in logNorm for y-axis"]@{ shape: lean-l } +5["Maximum number of Gaussians to study"]@{ shape: lean-l } +6["compute p-value"]@{ shape: lean-l } +7["generate_param_list_one_to_number"]@{ shape: subprocess } +5 --> 7 +8["baredSC 2d"]@{ shape: process } +7 --> 8 +2 --> 8 +4 --> 8 +1 --> 8 +3 --> 8 +0 --> 8 +9["Combine multiple 2D Models"]@{ shape: process } +8 --> 9 +2 --> 9 +4 --> 9 +6 --> 9 +1 --> 9 +3 --> 9 +0 --> 9 +``` + +## generate_param_list_one_to_number + +```mermaid +graph LR +0["Maximum number in param output"]@{ shape: lean-l } +1["create first tabular with good number of rows."]@{ shape: process } +0 --> 1 +2["add column with numbers starting at 1"]@{ shape: process } +1 --> 2 +3["retrieve numbers"]@{ shape: process } +2 --> 3 +4["split numbers to collection"]@{ shape: process } +3 --> 4 +5["numbers to param"]@{ shape: process } +4 --> 5 +``` diff --git a/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-cellplex_diagrams.md b/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-cellplex_diagrams.md new file mode 100644 index 000000000..bbea1ae4b --- /dev/null +++ b/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-cellplex_diagrams.md @@ -0,0 +1,96 @@ +# Workflow diagrams + +## scRNA-seq_preprocessing_10X_cellPlex + +```mermaid +graph LR +0["fastq PE collection GEX"]@{ shape: docs } +1["reference genome"]@{ shape: lean-l } +2["gtf"]@{ shape: doc } +3["cellranger_barcodes_3M-february-2018.txt"]@{ shape: doc } +4["Barcode Size is same size of the Read"]@{ shape: lean-l } +5["fastq PE collection CMO"]@{ shape: docs } +6["sample name and CMO sequence collection"]@{ shape: docs } +7["Number of expected cells"]@{ shape: lean-l } +8["process GEX reads"]@{ shape: subprocess } +4 --> 8 +3 --> 8 +0 --> 8 +2 --> 8 +1 --> 8 +9["CITE-seq-Count"]@{ shape: process } +7 --> 9 +5 --> 9 +6 --> 9 +3 --> 9 +10["Rename STAR-solo output"]@{ shape: process } +8 --> 10 +11["translate 10x barcode v2 to 10x barcode v1"]@{ shape: process } +9 --> 11 +12["Re-organize CITE-seq-Count output"]@{ shape: subprocess } +11 --> 12 +9 --> 12 +9 --> 12 +13["rename CITE-seq output"]@{ shape: process } +12 --> 13 +``` + +## scRNA-seq_preprocessing_10X_v3_Bundle + +```mermaid +graph LR +0["fastq PE collection"]@{ shape: docs } +1["reference genome"]@{ shape: lean-l } +2["gtf"]@{ shape: doc } +3["cellranger_barcodes_3M-february-2018.txt"]@{ shape: doc } +4["Barcode Size is same size of the Read"]@{ shape: lean-l } +5["RNA STARSolo"]@{ shape: process } +1 --> 5 +2 --> 5 +0 --> 5 +4 --> 5 +3 --> 5 +6["multiQC"]@{ shape: process } +5 --> 6 +5 --> 6 +7["filter cells"]@{ shape: process } +5 --> 7 +5 --> 7 +5 --> 7 +8["Re-organize STAR-solo output"]@{ shape: subprocess } +7 --> 8 +7 --> 8 +7 --> 8 +``` + +## Re-organize-STAR-solo-output + +```mermaid +graph LR +0["STARsolo Genes"]@{ shape: docs } +1["STARsolo Barcodes"]@{ shape: docs } +2["STARsolo Matrix Gene Counts"]@{ shape: docs } +3["Extract element identifiers"]@{ shape: process } +2 --> 3 +4["Replace Text"]@{ shape: process } +3 --> 4 +5["Replace Text"]@{ shape: process } +3 --> 5 +6["Replace Text"]@{ shape: process } +3 --> 6 +7["Relabel identifiers"]@{ shape: process } +4 --> 7 +0 --> 7 +8["Relabel identifiers"]@{ shape: process } +5 --> 8 +1 --> 8 +9["Relabel identifiers"]@{ shape: process } +6 --> 9 +2 --> 9 +10["Merge collections"]@{ shape: process } +9 --> 10 +8 --> 10 +7 --> 10 +11["Apply rules"]@{ shape: process } +10 --> 11 +``` diff --git a/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-v3_diagrams.md b/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-v3_diagrams.md new file mode 100644 index 000000000..34b6e30dc --- /dev/null +++ b/workflows/scRNAseq/fastq-to-matrix-10x/scrna-seq-fastq-to-matrix-10x-v3_diagrams.md @@ -0,0 +1,61 @@ +# Workflow diagrams + +## scRNA-seq_preprocessing_10X_v3_Bundle + +```mermaid +graph LR +0["fastq PE collection"]@{ shape: docs } +1["reference genome"]@{ shape: lean-l } +2["gtf"]@{ shape: doc } +3["cellranger_barcodes_3M-february-2018.txt"]@{ shape: doc } +4["Barcode Size is same size of the Read"]@{ shape: lean-l } +5["RNA STARSolo"]@{ shape: process } +1 --> 5 +2 --> 5 +0 --> 5 +4 --> 5 +3 --> 5 +6["multiQC"]@{ shape: process } +5 --> 6 +5 --> 6 +7["filter cells"]@{ shape: process } +5 --> 7 +5 --> 7 +5 --> 7 +8["Re-organize STAR-solo output"]@{ shape: subprocess } +7 --> 8 +7 --> 8 +7 --> 8 +``` + +## Re-organize-STAR-solo-output + +```mermaid +graph LR +0["STARsolo Genes"]@{ shape: docs } +1["STARsolo Barcodes"]@{ shape: docs } +2["STARsolo Matrix Gene Counts"]@{ shape: docs } +3["Extract element identifiers"]@{ shape: process } +2 --> 3 +4["Replace Text"]@{ shape: process } +3 --> 4 +5["Replace Text"]@{ shape: process } +3 --> 5 +6["Replace Text"]@{ shape: process } +3 --> 6 +7["Relabel identifiers"]@{ shape: process } +4 --> 7 +0 --> 7 +8["Relabel identifiers"]@{ shape: process } +5 --> 8 +1 --> 8 +9["Relabel identifiers"]@{ shape: process } +6 --> 9 +2 --> 9 +10["Merge collections"]@{ shape: process } +9 --> 10 +8 --> 10 +7 --> 10 +11["Apply rules"]@{ shape: process } +10 --> 11 +``` diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR_diagrams.md b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR_diagrams.md new file mode 100644 index 000000000..5f1b6dfc8 --- /dev/null +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR_diagrams.md @@ -0,0 +1,52 @@ +# Workflow diagrams + +## Differential gene expression for single-cell data using pseudo-bulk counts with edgeR + +```mermaid +graph LR +0["Source AnnData file"]@{ shape: doc } +1["Pseudo-bulk: Fields to merge"]@{ shape: lean-l } +2["Group by column"]@{ shape: lean-l } +3["Sample key column"]@{ shape: lean-l } +4["Name Your Raw Counts Layer"]@{ shape: lean-l } +5["Factor fields"]@{ shape: lean-l } +6["Formula"]@{ shape: lean-l } +7["Gene symbol column"]@{ shape: lean-l } +8["Decoupler pseudo-bulk"]@{ shape: process } +1 --> 8 +5 --> 8 +2 --> 8 +0 --> 8 +4 --> 8 +3 --> 8 +9["Sanitize matrix"]@{ shape: process } +8 --> 9 +10["Sanitize factors"]@{ shape: process } +8 --> 10 +11["Remove start, end, width"]@{ shape: process } +8 --> 11 +12["Sanitize first factor for leading digits"]@{ shape: process } +10 --> 12 +13["Text reformatting"]@{ shape: process } +12 --> 13 +14["edgeR"]@{ shape: process } +11 --> 14 +13 --> 14 +6 --> 14 +9 --> 14 +12 --> 14 +15["Get contrast labels"]@{ shape: process } +14 --> 15 +16["Select gene symbols, logFC, PValue and FDR"]@{ shape: process } +7 --> 16 +14 --> 16 +17["Replace Text"]@{ shape: process } +15 --> 17 +18["Split contrasts"]@{ shape: process } +17 --> 18 +19["Contrast as parameters"]@{ shape: process } +18 --> 19 +20["Volcano Plot"]@{ shape: process } +16 --> 20 +19 --> 20 +``` diff --git a/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes_diagrams.md b/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes_diagrams.md new file mode 100644 index 000000000..c98ebbc41 --- /dev/null +++ b/workflows/scRNAseq/velocyto/Velocyto-on10X-filtered-barcodes_diagrams.md @@ -0,0 +1,14 @@ +# Workflow diagrams + +## Velocyto-on10X-filtered-barcodes + +```mermaid +graph LR +0["BAM files with CB and UB"]@{ shape: docs } +1["filtered barcodes"]@{ shape: docs } +2["gtf file"]@{ shape: doc } +3["velocyto"]@{ shape: process } +0 --> 3 +1 --> 3 +2 --> 3 +``` diff --git a/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled_diagrams.md b/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled_diagrams.md new file mode 100644 index 000000000..246250616 --- /dev/null +++ b/workflows/scRNAseq/velocyto/Velocyto-on10X-from-bundled_diagrams.md @@ -0,0 +1,29 @@ +# Workflow diagrams + +## Velocyto-on10X-from-bundled + +```mermaid +graph LR +0["BAM files with CB and UB"]@{ shape: docs } +1["filtered matrices in bundle"]@{ shape: docs } +2["gtf file"]@{ shape: doc } +3["extract barcodes from bundle"]@{ shape: process } +1 --> 3 +4["Velocyto_on10X_filtered_barcodes"]@{ shape: subprocess } +0 --> 4 +3 --> 4 +2 --> 4 +``` + +## Velocyto_on10X_filtered_barcodes + +```mermaid +graph LR +0["BAM files with CB and UB"]@{ shape: docs } +1["filtered barcodes"]@{ shape: docs } +2["gtf file"]@{ shape: doc } +3["velocyto"]@{ shape: process } +0 --> 3 +1 --> 3 +2 --> 3 +``` diff --git a/workflows/transcriptomics/brew3r/BREW3R_diagrams.md b/workflows/transcriptomics/brew3r/BREW3R_diagrams.md new file mode 100644 index 000000000..8b7de4c8f --- /dev/null +++ b/workflows/transcriptomics/brew3r/BREW3R_diagrams.md @@ -0,0 +1,28 @@ +# Workflow diagrams + +## BREW3R + +```mermaid +graph LR +0["Input gtf"]@{ shape: doc } +1["BAM collection"]@{ shape: docs } +2["strandedness"]@{ shape: lean-l } +3["minimum coverage"]@{ shape: lean-l } +4["minimum FPKM for merge"]@{ shape: lean-l } +5["Map parameter value"]@{ shape: process } +2 --> 5 +6["Unstranded"]@{ shape: process } +2 --> 6 +7["assembl with StringTie"]@{ shape: process } +3 --> 7 +3 --> 7 +1 --> 7 +5 --> 7 +8["merge assembled transcripts"]@{ shape: process } +7 --> 8 +4 --> 8 +9["BREW3R.r"]@{ shape: process } +6 --> 9 +0 --> 9 +8 --> 9 +``` diff --git a/workflows/transcriptomics/goseq/goseq-go-kegg-enrichment-analsis_diagrams.md b/workflows/transcriptomics/goseq/goseq-go-kegg-enrichment-analsis_diagrams.md new file mode 100644 index 000000000..8f77becfa --- /dev/null +++ b/workflows/transcriptomics/goseq/goseq-go-kegg-enrichment-analsis_diagrams.md @@ -0,0 +1,35 @@ +# Workflow diagrams + +## Goseq GO-KEGG Enrichment Analysis + +```mermaid +graph LR +0["Select genome to use"]@{ shape: lean-l } +1["Differential expression result"]@{ shape: doc } +2["Select gene ID format"]@{ shape: lean-l } +3["gene length"]@{ shape: doc } +4["KEGG pathways"]@{ shape: doc } +5["goseq - Cellular Component"]@{ shape: process } +2 --> 5 +0 --> 5 +1 --> 5 +3 --> 5 +6["goseq - Biological Process"]@{ shape: process } +2 --> 6 +0 --> 6 +1 --> 6 +3 --> 6 +7["goseq - Molecular Function"]@{ shape: process } +2 --> 7 +0 --> 7 +1 --> 7 +3 --> 7 +8["goseq - KEGG"]@{ shape: process } +2 --> 8 +0 --> 8 +1 --> 8 +3 --> 8 +9["Join two Datasets"]@{ shape: process } +8 --> 9 +4 --> 9 +``` diff --git a/workflows/transcriptomics/rnaseq-de/rnaseq-de-filtering-plotting_diagrams.md b/workflows/transcriptomics/rnaseq-de/rnaseq-de-filtering-plotting_diagrams.md new file mode 100644 index 000000000..80618cb3b --- /dev/null +++ b/workflows/transcriptomics/rnaseq-de/rnaseq-de-filtering-plotting_diagrams.md @@ -0,0 +1,61 @@ +# Workflow diagrams + +## RNAseq_DE_filtering_plotting + +```mermaid +graph LR +0["Counts from changed condition"]@{ shape: docs } +1["Counts from reference condition"]@{ shape: docs } +2["Count files have header"]@{ shape: lean-l } +3["Gene Annotaton"]@{ shape: doc } +4["Adjusted p-value threshold"]@{ shape: lean-l } +5["Create text file"]@{ shape: process } +6["log2 fold change threshold"]@{ shape: lean-l } +7["Pick parameter value"]@{ shape: process } +4 --> 7 +8["Text transformation"]@{ shape: process } +5 --> 8 +9["Pick parameter value"]@{ shape: process } +6 --> 9 +10["Differential Analysis"]@{ shape: process } +2 --> 10 +7 --> 10 +0 --> 10 +1 --> 10 +11["Compose text parameter value"]@{ shape: process } +7 --> 11 +12["Compose text parameter value"]@{ shape: process } +9 --> 12 +13["Annotate DESeq2/DEXSeq output tables"]@{ shape: process } +3 --> 13 +10 --> 13 +14["Text reformatting"]@{ shape: process } +10 --> 14 +15["Annotate DESeq2 table"]@{ shape: process } +8 --> 15 +13 --> 15 +16["Parse parameter value"]@{ shape: process } +14 --> 16 +17["Filter with p-adj threshold"]@{ shape: process } +11 --> 17 +15 --> 17 +18["Generate Valcanot plot of DE genes"]@{ shape: process } +15 --> 18 +9 --> 18 +7 --> 18 +19["Compose text parameter value"]@{ shape: process } +16 --> 19 +20["Filter with log2 FC threshold"]@{ shape: process } +12 --> 20 +17 --> 20 +21["Join two Datasets"]@{ shape: process } +10 --> 21 +20 --> 21 +22["Cut"]@{ shape: process } +19 --> 22 +21 --> 22 +23["Generate Heatmap of counts"]@{ shape: process } +22 --> 23 +24["Generate Heatmap of Z-scores"]@{ shape: process } +22 --> 24 +``` diff --git a/workflows/transcriptomics/rnaseq-pe/rnaseq-pe_diagrams.md b/workflows/transcriptomics/rnaseq-pe/rnaseq-pe_diagrams.md new file mode 100644 index 000000000..7be3f024b --- /dev/null +++ b/workflows/transcriptomics/rnaseq-pe/rnaseq-pe_diagrams.md @@ -0,0 +1,196 @@ +# Workflow diagrams + +## RNA-seq for Paired-end fastqs + +```mermaid +graph LR +0["Collection paired FASTQ files"]@{ shape: docs } +1["Forward adapter"]@{ shape: lean-l } +2["Reverse adapter"]@{ shape: lean-l } +3["Generate additional QC reports"]@{ shape: lean-l } +4["Reference genome"]@{ shape: lean-l } +5["GTF file of annotation"]@{ shape: doc } +6["Strandedness"]@{ shape: lean-l } +7["Use featureCounts for generating count tables"]@{ shape: lean-l } +8["Compute Cufflinks FPKM"]@{ shape: lean-l } +9["GTF with regions to exclude from FPKM normalization with Cufflinks"]@{ shape: doc } +10["Compute StringTie FPKM"]@{ shape: lean-l } +11["Flatten collection"]@{ shape: process } +0 --> 11 +12["remove adapters + bad quality bases"]@{ shape: process } +1 --> 12 +2 --> 12 +0 --> 12 +13["no additional QC"]@{ shape: process } +3 --> 13 +14["get reference_genome as text parameter"]@{ shape: process } +4 --> 14 +15["Get featureCounts strandedness parameter"]@{ shape: process } +6 --> 15 +16["Get cufflinks strandedness parameter"]@{ shape: process } +6 --> 16 +17["Get Stringtie strandedness parameter"]@{ shape: process } +6 --> 17 +18["STAR: map and count and coverage splitted"]@{ shape: process } +4 --> 18 +5 --> 18 +12 --> 18 +19["Generate Unstranded Coverage"]@{ shape: subprocess } +18 --> 19 +18 --> 19 +20["Generate Stranded Coverage"]@{ shape: subprocess } +18 --> 20 +18 --> 20 +6 --> 20 +21["featureCounts"]@{ shape: process } +18 --> 21 +5 --> 21 +15 --> 21 +7 --> 21 +22["Compute FPKM with StringTie"]@{ shape: process } +5 --> 22 +18 --> 22 +17 --> 22 +10 --> 22 +23["Compute FPKM with cufflinks"]@{ shape: process } +16 --> 23 +9 --> 23 +14 --> 23 +18 --> 23 +5 --> 23 +8 --> 23 +24["Process Count files"]@{ shape: subprocess } +18 --> 24 +6 --> 24 +21 --> 24 +21 --> 24 +25["Combined MultiQC without additional QC"]@{ shape: process } +12 --> 25 +18 --> 25 +18 --> 25 +24 --> 25 +13 --> 25 +26["Combined MultiQC Quality Report with additional QC"]@{ shape: subprocess } +11 --> 26 +18 --> 26 +18 --> 26 +18 --> 26 +12 --> 26 +24 --> 26 +5 --> 26 +3 --> 26 +``` + +## RNA-seq-Paired-QC + +```mermaid +graph LR +0["FASTQ collection"]@{ shape: docs } +1["fastp Reports"]@{ shape: docs } +2["STAR logs"]@{ shape: docs } +3["STAR gene counts"]@{ shape: docs } +4["featureCounts summaries"]@{ shape: docs } +5["reference_annotation_gtf"]@{ shape: doc } +6["STAR paired-end BAM"]@{ shape: docs } +7["FastQC check read qualities"]@{ shape: process } +0 --> 7 +8["convert gtf to bed12"]@{ shape: process } +5 --> 8 +9["Subsample 200k reads"]@{ shape: process } +6 --> 9 +10["Get reads number per chromosome"]@{ shape: process } +6 --> 10 +11["Remove duplicates"]@{ shape: process } +6 --> 11 +12["Read distribution over genomic features"]@{ shape: process } +6 --> 12 +8 --> 12 +13["Get gene body coverage"]@{ shape: process } +9 --> 13 +8 --> 13 +14["Combined Quality Report"]@{ shape: process } +7 --> 14 +1 --> 14 +2 --> 14 +3 --> 14 +4 --> 14 +12 --> 14 +13 --> 14 +10 --> 14 +11 --> 14 +``` + +## Process Count files + +```mermaid +graph LR +0["A dummy featureCounts summary file"]@{ shape: process } +1["featureCounts summaries collection"]@{ shape: docs } +2["Strandness param"]@{ shape: lean-l } +3["RNA STAR count tables"]@{ shape: docs } +4["featureCounts count table"]@{ shape: docs } +5["Text transformation"]@{ shape: process } +0 --> 5 +6["awk command from strand for STAR counts"]@{ shape: process } +2 --> 6 +7["featureCounts summaries"]@{ shape: process } +1 --> 7 +5 --> 7 +8["Extract gene counts"]@{ shape: process } +6 --> 8 +3 --> 8 +9["Counts table"]@{ shape: process } +4 --> 9 +8 --> 9 +``` + +## Re-arrange Stranded RNA-seq coverage + +```mermaid +graph LR +0["strandedness"]@{ shape: lean-l } +1["Bedgraph strand 1"]@{ shape: docs } +2["Bedgraph strand 2"]@{ shape: docs } +3["Get replacement for strand2"]@{ shape: process } +0 --> 3 +4["Get replacement for strand1"]@{ shape: process } +0 --> 4 +5["get identifiers"]@{ shape: process } +1 --> 5 +6["New labels strand 2"]@{ shape: process } +3 --> 6 +5 --> 6 +7["New labels strand 1"]@{ shape: process } +4 --> 7 +5 --> 7 +8["Relabelled strand 2"]@{ shape: process } +6 --> 8 +2 --> 8 +9["Relabelled strand 1"]@{ shape: process } +7 --> 9 +1 --> 9 +10["Merge collections"]@{ shape: process } +9 --> 10 +8 --> 10 +11["convert to bigwig"]@{ shape: process } +10 --> 11 +``` + +## Get Uniquely mapped unstranded coverage + +```mermaid +graph LR +0["STAR log"]@{ shape: docs } +1["STAR BAM"]@{ shape: docs } +2["get scaling factor"]@{ shape: process } +0 --> 2 +3["keep uniquely mapped reads"]@{ shape: process } +1 --> 3 +4["Parse parameter value"]@{ shape: process } +2 --> 4 +5["Scaled Coverage both strands combined"]@{ shape: process } +3 --> 5 +4 --> 5 +6["convert both strands coverage to bigwig"]@{ shape: process } +5 --> 6 +``` diff --git a/workflows/transcriptomics/rnaseq-sr/rnaseq-sr_diagrams.md b/workflows/transcriptomics/rnaseq-sr/rnaseq-sr_diagrams.md new file mode 100644 index 000000000..3c7ddc8f6 --- /dev/null +++ b/workflows/transcriptomics/rnaseq-sr/rnaseq-sr_diagrams.md @@ -0,0 +1,192 @@ +# Workflow diagrams + +## RNA-seq for Single-read fastqs + +```mermaid +graph LR +0["Collection of FASTQ files"]@{ shape: docs } +1["Forward adapter"]@{ shape: lean-l } +2["Generate additional QC reports"]@{ shape: lean-l } +3["Reference genome"]@{ shape: lean-l } +4["GTF file of annotation"]@{ shape: doc } +5["Strandedness"]@{ shape: lean-l } +6["Use featureCounts for generating count tables"]@{ shape: lean-l } +7["Compute Cufflinks FPKM"]@{ shape: lean-l } +8["GTF with regions to exclude from FPKM normalization with Cufflinks"]@{ shape: doc } +9["Compute StringTie FPKM"]@{ shape: lean-l } +10["remove adapters + bad quality bases"]@{ shape: process } +1 --> 10 +0 --> 10 +11["no additional QC"]@{ shape: process } +2 --> 11 +12["get reference_genome as text parameter"]@{ shape: process } +3 --> 12 +13["Get featureCounts strandedness parameter"]@{ shape: process } +5 --> 13 +14["Get cufflinks strandedness parameter"]@{ shape: process } +5 --> 14 +15["Get Stringtie strandedness parameter"]@{ shape: process } +5 --> 15 +16["STAR: map and count and coverage splitted"]@{ shape: process } +3 --> 16 +4 --> 16 +10 --> 16 +17["Generate Unstranded Coverage"]@{ shape: subprocess } +16 --> 17 +16 --> 17 +18["Generate Stranded Coverage"]@{ shape: subprocess } +16 --> 18 +16 --> 18 +5 --> 18 +19["featureCounts"]@{ shape: process } +16 --> 19 +4 --> 19 +13 --> 19 +6 --> 19 +20["Compute FPKM with StringTie"]@{ shape: process } +4 --> 20 +16 --> 20 +15 --> 20 +9 --> 20 +21["Compute FPKM with cufflinks"]@{ shape: process } +14 --> 21 +8 --> 21 +12 --> 21 +16 --> 21 +4 --> 21 +7 --> 21 +22["Process Count files"]@{ shape: subprocess } +16 --> 22 +5 --> 22 +19 --> 22 +19 --> 22 +23["Combined MultiQC without additional QC"]@{ shape: process } +10 --> 23 +16 --> 23 +16 --> 23 +22 --> 23 +11 --> 23 +24["Combined MultiQC Quality Report"]@{ shape: subprocess } +0 --> 24 +16 --> 24 +16 --> 24 +16 --> 24 +10 --> 24 +22 --> 24 +4 --> 24 +2 --> 24 +``` + +## RNA-seq-QC + +```mermaid +graph LR +0["FASTQ collection"]@{ shape: docs } +1["fastp Reports"]@{ shape: docs } +2["STAR logs"]@{ shape: docs } +3["STAR counts"]@{ shape: docs } +4["featureCounts summaries"]@{ shape: docs } +5["reference_annotation_gtf"]@{ shape: doc } +6["STAR BAM"]@{ shape: docs } +7["FastQC check read qualities"]@{ shape: process } +0 --> 7 +8["convert gtf to bed12"]@{ shape: process } +5 --> 8 +9["Subsample 200k reads"]@{ shape: process } +6 --> 9 +10["Get reads number per chromosome"]@{ shape: process } +6 --> 10 +11["Remove duplicates"]@{ shape: process } +6 --> 11 +12["Read distribution over genomic features"]@{ shape: process } +6 --> 12 +8 --> 12 +13["Get gene body coverage"]@{ shape: process } +9 --> 13 +8 --> 13 +14["Combined Quality Report"]@{ shape: process } +7 --> 14 +1 --> 14 +2 --> 14 +3 --> 14 +4 --> 14 +12 --> 14 +13 --> 14 +10 --> 14 +11 --> 14 +``` + +## Process Count files + +```mermaid +graph LR +0["A dummy featureCounts summary file"]@{ shape: process } +1["featureCounts summaries collection"]@{ shape: docs } +2["Strandness param"]@{ shape: lean-l } +3["RNA STAR count tables"]@{ shape: docs } +4["featureCounts count table"]@{ shape: docs } +5["Text transformation"]@{ shape: process } +0 --> 5 +6["awk command from strand for STAR counts"]@{ shape: process } +2 --> 6 +7["featureCounts summaries"]@{ shape: process } +1 --> 7 +5 --> 7 +8["Extract gene counts"]@{ shape: process } +6 --> 8 +3 --> 8 +9["Counts table"]@{ shape: process } +4 --> 9 +8 --> 9 +``` + +## Re-arrange Stranded RNA-seq coverage + +```mermaid +graph LR +0["strandedness"]@{ shape: lean-l } +1["Bedgraph strand 1"]@{ shape: docs } +2["Bedgraph strand 2"]@{ shape: docs } +3["Get replacement for strand2"]@{ shape: process } +0 --> 3 +4["Get replacement for strand1"]@{ shape: process } +0 --> 4 +5["get identifiers"]@{ shape: process } +1 --> 5 +6["New labels strand 2"]@{ shape: process } +3 --> 6 +5 --> 6 +7["New labels strand 1"]@{ shape: process } +4 --> 7 +5 --> 7 +8["Relabelled strand 2"]@{ shape: process } +6 --> 8 +2 --> 8 +9["Relabelled strand 1"]@{ shape: process } +7 --> 9 +1 --> 9 +10["Merge collections"]@{ shape: process } +9 --> 10 +8 --> 10 +11["convert to bigwig"]@{ shape: process } +10 --> 11 +``` + +## Get Uniquely mapped unstranded coverage + +```mermaid +graph LR +0["STAR log"]@{ shape: docs } +1["STAR BAM"]@{ shape: docs } +2["get scaling factor"]@{ shape: process } +0 --> 2 +3["keep uniquely mapped reads"]@{ shape: process } +1 --> 3 +4["Parse parameter value"]@{ shape: process } +2 --> 4 +5["Scaled Coverage both strands combined"]@{ shape: process } +3 --> 5 +4 --> 5 +6["convert both strands coverage to bigwig"]@{ shape: process } +5 --> 6 +``` diff --git a/workflows/variant-calling/generic-variant-calling-wgs-pe/Generic-variation-analysis-on-WGS-PE-data_diagrams.md b/workflows/variant-calling/generic-variant-calling-wgs-pe/Generic-variation-analysis-on-WGS-PE-data_diagrams.md new file mode 100644 index 000000000..d11741d7c --- /dev/null +++ b/workflows/variant-calling/generic-variant-calling-wgs-pe/Generic-variation-analysis-on-WGS-PE-data_diagrams.md @@ -0,0 +1,42 @@ +# Workflow diagrams + +## Generic variation analysis on WGS PE data + +```mermaid +graph LR +0["Paired Collection"]@{ shape: docs } +1["GenBank genome"]@{ shape: doc } +2["Name for genome database"]@{ shape: lean-l } +3["fastp"]@{ shape: process } +0 --> 3 +4["SnpEff build:"]@{ shape: process } +2 --> 4 +1 --> 4 +5["Map with BWA-MEM"]@{ shape: process } +3 --> 5 +4 --> 5 +6["Samtools view"]@{ shape: process } +5 --> 6 +7["MarkDuplicates"]@{ shape: process } +6 --> 7 +8["Samtools stats"]@{ shape: process } +6 --> 8 +9["Realign reads"]@{ shape: process } +7 --> 9 +4 --> 9 +10["MultiQC"]@{ shape: process } +3 --> 10 +8 --> 10 +7 --> 10 +11["Insert indel qualities"]@{ shape: process } +9 --> 11 +4 --> 11 +12["Call variants"]@{ shape: process } +11 --> 12 +4 --> 12 +13["Lofreq filter"]@{ shape: process } +12 --> 13 +14["SnpEff eff:"]@{ shape: process } +13 --> 14 +4 --> 14 +``` diff --git a/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system_diagrams.md b/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system_diagrams.md new file mode 100644 index 000000000..a1733c44a --- /dev/null +++ b/workflows/variant-calling/haploid-variant-calling-wgs-pe/WGS-PE-variant-calling-in-haploid-system_diagrams.md @@ -0,0 +1,43 @@ +# Workflow diagrams + +## Paired end variant calling in haploid system + +```mermaid +graph LR +0["Paired Collection"]@{ shape: docs } +1["Annotation GTF"]@{ shape: doc } +2["Genome fasta"]@{ shape: doc } +3["fastp"]@{ shape: process } +0 --> 3 +4["SnpEff build:"]@{ shape: process } +1 --> 4 +2 --> 4 +5["Map with BWA-MEM"]@{ shape: process } +3 --> 5 +2 --> 5 +6["Samtools view"]@{ shape: process } +5 --> 6 +7["Samtools stats"]@{ shape: process } +6 --> 7 +8["MarkDuplicates"]@{ shape: process } +6 --> 8 +9["MultiQC"]@{ shape: process } +3 --> 9 +7 --> 9 +8 --> 9 +10["Realign reads"]@{ shape: process } +8 --> 10 +2 --> 10 +11["Call variants"]@{ shape: process } +10 --> 11 +2 --> 11 +12["Text reformatting"]@{ shape: process } +11 --> 12 +13["SnpEff eff:"]@{ shape: process } +12 --> 13 +4 --> 13 +14["SnpSift Extract Fields"]@{ shape: process } +13 --> 14 +15["Collapse Collection"]@{ shape: process } +14 --> 15 +``` diff --git a/workflows/variant-calling/variation-reporting/Generic-variation-analysis-reporting_diagrams.md b/workflows/variant-calling/variation-reporting/Generic-variation-analysis-reporting_diagrams.md new file mode 100644 index 000000000..76436228a --- /dev/null +++ b/workflows/variant-calling/variation-reporting/Generic-variation-analysis-reporting_diagrams.md @@ -0,0 +1,83 @@ +# Workflow diagrams + +## Generic variation analysis reporting + +```mermaid +graph LR +0["Variation data to report"]@{ shape: docs } +1["AF Filter"]@{ shape: lean-l } +2["DP Filter"]@{ shape: lean-l } +3["DP_ALT Filter"]@{ shape: lean-l } +4["SnpSift Filter"]@{ shape: process } +0 --> 4 +5["Compose text parameter value"]@{ shape: process } +1 --> 5 +2 --> 5 +3 --> 5 +6["Compose text parameter value"]@{ shape: process } +1 --> 6 +2 --> 6 +3 --> 6 +7["SnpSift Filter"]@{ shape: process } +5 --> 7 +6 --> 7 +4 --> 7 +8["SnpSift Extract Fields"]@{ shape: process } +7 --> 8 +9["Compute"]@{ shape: process } +8 --> 9 +10["Datamash"]@{ shape: process } +9 --> 10 +11["Replace"]@{ shape: process } +10 --> 11 +12["Replace"]@{ shape: process } +11 --> 12 +13["Replace"]@{ shape: process } +12 --> 13 +14["Collapse Collection"]@{ shape: process } +13 --> 14 +15["Compute"]@{ shape: process } +14 --> 15 +16["Compute"]@{ shape: process } +15 --> 16 +17["Replace"]@{ shape: process } +16 --> 17 +18["Datamash"]@{ shape: process } +17 --> 18 +19["Filter"]@{ shape: process } +17 --> 19 +20["Datamash"]@{ shape: process } +17 --> 20 +21["Datamash"]@{ shape: process } +19 --> 21 +22["Join"]@{ shape: process } +19 --> 22 +18 --> 22 +23["Datamash"]@{ shape: process } +19 --> 23 +24["Join"]@{ shape: process } +16 --> 24 +21 --> 24 +25["Datamash"]@{ shape: process } +22 --> 25 +26["Join"]@{ shape: process } +17 --> 26 +23 --> 26 +27["Cut"]@{ shape: process } +24 --> 27 +28["Cut"]@{ shape: process } +25 --> 28 +29["Join"]@{ shape: process } +26 --> 29 +20 --> 29 +30["Split file"]@{ shape: process } +27 --> 30 +31["Replace"]@{ shape: process } +28 --> 31 +32["Cut"]@{ shape: process } +29 --> 32 +33["Sort"]@{ shape: process } +31 --> 33 +34["Sort"]@{ shape: process } +32 --> 34 +``` diff --git a/workflows/virology/pox-virus-amplicon/pox-virus-half-genome_diagrams.md b/workflows/virology/pox-virus-amplicon/pox-virus-half-genome_diagrams.md new file mode 100644 index 000000000..2bd588942 --- /dev/null +++ b/workflows/virology/pox-virus-amplicon/pox-virus-half-genome_diagrams.md @@ -0,0 +1,109 @@ +# Workflow diagrams + +## Pox Virus Illumina Amplicon Workflow from half-genomes + +```mermaid +graph LR +0["Reference FASTA"]@{ shape: doc } +1["Primer Scheme"]@{ shape: doc } +2["PE Reads Pool1"]@{ shape: docs } +3["PE Reads Pool2"]@{ shape: docs } +4["Minimum quality score to call base"]@{ shape: lean-l } +5["Allele frequency to call SNV"]@{ shape: lean-l } +6["Allele frequency to call indel"]@{ shape: lean-l } +7["Compute sequence length"]@{ shape: process } +0 --> 7 +8["Select pool1 primers"]@{ shape: process } +1 --> 8 +9["Select pool2 primers"]@{ shape: process } +1 --> 9 +10["Extract element identifiers"]@{ shape: process } +2 --> 10 +11["fastp: Trimmed Illumina Reads Pool1"]@{ shape: process } +2 --> 11 +12["Cut"]@{ shape: process } +7 --> 12 +13["Datamash"]@{ shape: process } +8 --> 13 +14["Datamash"]@{ shape: process } +9 --> 14 +15["Split file"]@{ shape: process } +10 --> 15 +16["Sort collection"]@{ shape: process } +3 --> 16 +10 --> 16 +17["Get end position of sequence"]@{ shape: process } +12 --> 17 +18["Get end position of Pool1"]@{ shape: process } +13 --> 18 +19["Get start position of Pool2"]@{ shape: process } +14 --> 19 +20["Parse parameter value"]@{ shape: process } +15 --> 20 +21["fastp: Trimmed Illumina Reads Pool2"]@{ shape: process } +16 --> 21 +22["Compose text parameter value"]@{ shape: process } +18 --> 22 +17 --> 22 +23["Compose text parameter value"]@{ shape: process } +19 --> 23 +24["Compose text parameter value"]@{ shape: process } +20 --> 24 +25["Compose text parameter value"]@{ shape: process } +20 --> 25 +26["Mask Reference for Pool1"]@{ shape: process } +0 --> 26 +22 --> 26 +27["Mask Reference for Pool2"]@{ shape: process } +0 --> 27 +23 --> 27 +28["Map with BWA-MEM"]@{ shape: process } +11 --> 28 +26 --> 28 +24 --> 28 +29["Map with BWA-MEM"]@{ shape: process } +21 --> 29 +27 --> 29 +25 --> 29 +30["Samtools view"]@{ shape: process } +28 --> 30 +31["Samtools stats"]@{ shape: process } +28 --> 31 +32["Samtools view"]@{ shape: process } +29 --> 32 +33["Samtools stats"]@{ shape: process } +29 --> 33 +34["MultiQC"]@{ shape: process } +11 --> 34 +31 --> 34 +35["Zip collections"]@{ shape: process } +30 --> 35 +32 --> 35 +36["MultiQC"]@{ shape: process } +21 --> 36 +33 --> 36 +37["Apply rules"]@{ shape: process } +35 --> 37 +38["Samtools merge"]@{ shape: process } +37 --> 38 +39["QualiMap BamQC"]@{ shape: process } +38 --> 39 +40["ivar trim"]@{ shape: process } +38 --> 40 +1 --> 40 +41["Filter failed datasets"]@{ shape: process } +39 --> 41 +42["ivar consensus"]@{ shape: process } +40 --> 42 +5 --> 42 +6 --> 42 +4 --> 42 +43["Flatten collection"]@{ shape: process } +41 --> 43 +44["Text transformation"]@{ shape: process } +42 --> 44 +45["MultiQC"]@{ shape: process } +43 --> 45 +46["Concatenate datasets"]@{ shape: process } +44 --> 46 +```