From 87898519dac8831ff11a84896913b6c91edc2413 Mon Sep 17 00:00:00 2001 From: dianichj Date: Mon, 14 Oct 2024 21:14:09 +0200 Subject: [PATCH 01/21] Adding initial files to pseudo-bulk-edger-workflow --- .../.dockstore.yml | 15 + .../CHANGELOG.md | 4 + .../README.md | 41 + .../pseudo-bulk_edgeR-tests.yml | 54 + .../pseudo-bulk_edgeR.ga | 969 ++++++++++++++++++ .../test-data/contrasts_files.txt | 2 + 6 files changed, 1085 insertions(+) create mode 100644 workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/.dockstore.yml create mode 100644 workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/CHANGELOG.md create mode 100644 workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/README.md create mode 100644 workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml create mode 100644 workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga create mode 100644 workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/test-data/contrasts_files.txt diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/.dockstore.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/.dockstore.yml new file mode 100644 index 000000000..c848e606d --- /dev/null +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/.dockstore.yml @@ -0,0 +1,15 @@ +version: 1.2 +workflows: +- name: main + subclass: Galaxy + publish: true + primaryDescriptorPath: /pseudo-bulk_edgeR.ga + testParameterFiles: + - /pseudo-bulk_edgeR-tests.yml + authors: + - name: Diana Chiang Jurado + orcid: 0000-0002-5857-1477 + - name: Pavankumar Videm + orcid: 0000-0002-5192-126X + - name: Pablo Moreno + orcid: 0000-0002-9856-1679 diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/CHANGELOG.md b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/CHANGELOG.md new file mode 100644 index 000000000..47bbdc1d6 --- /dev/null +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/CHANGELOG.md @@ -0,0 +1,4 @@ +# Changelog + +## [0.1] 2024-10-14 +First release. \ No newline at end of file diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/README.md b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/README.md new file mode 100644 index 000000000..b51062682 --- /dev/null +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/README.md @@ -0,0 +1,41 @@ +# Pseudobulk-edgeR workflows + + +## Inputs + +- deCoupler: Source AnnData (`h5ad`). + - Parameter: Pseudobulk: Fields to merge / optional + - Parameter: Group by column / has to be given + - Parameter: Sample key column / has to be given + - Parameter: Name your raw count layer / has to be given + - Parameter: Factor Field / has to be given +- edgeR: + - Sanitzed Count Matrix + - Sanitized Factor File + - Cleaned Gene Annotations file + - Parameter: Formula for linear model / has to be given + - Contrast file / has to be given +- Volcano Plot: + - Input (`tabular`) file with genesymbol, logFC, Pvalue and FDR columns. + +## Processing + +Sanitzation steps after decoupler: +- Sanitize Matrix and Factors(`tabular`): finds [ --+*^]+ and replace with - +- Remove start, end with (`tabular`): A column that may affect EdgeR and DESeq2. +- Sanitize First Factor for leading digits (`tabular`): Finds ^([0-9])(.+) and replace it with GG_\\1\\2 +- Get Contrast labels +- Replace text +- Split Contrasts +- Contrasts as Parameters: Plot title +- Select columns for volcano plot using (`Remove columns`) from DEG edgeR (`Table`)output. + + +## Outputs + + - Pseudobulk_count_matrix (`tabular`) + - Pseudobulk Plot (`png`) + - Filtered by expression (`png`) + - Table DEG + - Results (`HTML`) File and plots for download within the output as (`png`) + - Volcano plot (`PDF`) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml new file mode 100644 index 000000000..bfe7d713b --- /dev/null +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -0,0 +1,54 @@ +- doc: Test outline for pseudo-bulk_edgeR + job: + Source AnnData file: + class: File + location: https://zenodo.org/records/13929549/files/Source%20AnnData%20file.h5ad?download=1 + filetype: h5ad + contrasts_files: + class: File + path: test-data/contrasts_files.txt + filetype: txt + 'Pseudo-bulk: Fields to merge': null + Groupy column: cell_type + Sample key column: individual + Name Your Raw Counts Layer: counts + Factor fields: disease + Gene symbol column: gene_symbol + Formula: '~ 0 + disease ' + outputs: + 'pseudobulk_count_matrix': + has_text_matching: + expression: "ACAP2\t9.0\t18.0\t20.0\t68.0\t106.0\t122.0\t14.0\t259.0\t279.0\t184.0\t612.0\t293.0\t297.0\t46.0\t1.0\t0.0\t1.0\t12.0\t229.0\t151.0\t141.0\t309.0\t299.0\t181.0\t2.0\t2.0\t28.0\t15.0\t54.0\t210.0\t1.0\t1.0\t1.0\t11.0" + expression: "ACER3\t4.0\t25.0\t21.0\t110.0\t82.0\t91.0\t22.0\t326.0\t297.0\t211.0\t1004.0\t574.0\t370.0\t108.0\t0.0\t0.0\t2.0\t2.0\t188.0\t113.0\t135.0\t322.0\t324.0\t159.0\t7.0\t7.0\t32.0\t5.0\t33.0\t89.0\t2.0\t2.0\t8.0\t48.0" + 'Pseudobulk Plot': + element_test: + has_size: 40116 + delta: 2000 + 'Filtered by expression': + element_test: + has_size: 23490 + delta: 2000 + 'Report Results: HTML File': + element_test: + has_size: 531761 + delta: 25000 + 'Tables: DEG ': + element_tests: + edgeR_normal-COVID_19: + has_text_matching: + expression: "RALBP1\tENSG00000017797\tFalse\t0.518[0-9]*\t1.609[0-9]*\t0.402[0-9]*\t2\tFalse\t0.286[0-9]*\t0.552[0-9]*\t-1.967[0-9]*\t7.483[0-9]*\t12.0213[0-9]*\t0.001[0-9]*\t0.436[0-9]*" + expression: "NAPA\tENSG00000105402\tTrue\t0.342[0-9]\t1.686[0-9]\t0.846[0-9]\t4\tFalse\t0.180[0-9]\t0.440[0-9]\t-1.059[0-9]\t6.833[0-9]\t3.291[0-9]\t0.076[0-9]\t0.619[0-9]" + has_n_lines: + n: 1430 + delta: 1 + 'Tables for volcano plot': + element_tests: + edgeR_normal-COVID_19: + has_text_matching: + expression: "CPEB4\t-2.402[0-9]\t0.001[0-9]\t0.436[0-9]" + expression: "FGFR1OP2\t-2.367[0-9]\t0.004[0-9]\t0.458[0-9]" + 'Volcano Plot on input dataset(s): PDF': + element_tests: + edgeR_normal-COVID_19: + has_size: 85052 + delta: 2000 diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga new file mode 100644 index 000000000..c581952a8 --- /dev/null +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga @@ -0,0 +1,969 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "Pseudobulk", + "comments": [], + "creator": [ + { + "class": "Person", + "identifier": "0000-0002-5857-1477", + "name": "Diana Chiang Jurado" + }, + { + "class": "Person", + "identifier": "0000-0002-5192-126X", + "name": "Pavankumar Videm" + }, + { + "class": "Person", + "identifier": "0000-0002-9856-1679", + "name": "Pablo Moreno" + } + ], + "format-version": "0.1", + "license": "CC-BY-4.0", + "release": "0.1", + "name": "pseudo-bulk_edgeR", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Ensure your AnnData object contains all necessary layers before using the decoupler tool. The raw counts should be included in AnnData. If they are missing, create a new layer (e.g., 'raw_counts') and copy the raw counts into it.", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Ensure your AnnData object contains all necessary layers before using the decoupler tool. The raw counts should be included in AnnData. If they are missing, create a new layer (e.g., 'raw_counts') and copy the raw counts into it.", + "name": "Source AnnData file" + } + ], + "label": "Source AnnData file", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 1.299886068362639, + "top": 516.1706375133249 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"h5\", \"h5ad\"], \"tag\": \"\"}", + "tool_version": null, + "type": "data_input", + "uuid": "96604e3b-861d-4d7c-8408-9d68489c1e41", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "Merge Obs fields before pseudo-bulk analysis to create new categories for grouping, such as 'sample,phase' or 'sample,louvain'. Ensure the fields exist in the Obs of the AnnData object. Multiple groups can be merged with a colon (':'), e.g., 'sample,phase\n,phase' creates 'sample_phase' and 'louvain_phase'.", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "Merge Obs fields before pseudo-bulk analysis to create new categories for grouping, such as 'sample,phase' or 'sample,louvain'. Ensure the fields exist in the Obs of the AnnData object. Multiple groups can be merged with a colon (':'), e.g., 'sample,phase\n,phase' creates 'sample_phase' and 'louvain_phase'.", + "name": "Pseudo-bulk: Fields to merge" + } + ], + "label": "Pseudo-bulk: Fields to merge", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 0, + "top": 647.32421875 + }, + "tool_id": null, + "tool_state": "{\"parameter_type\": \"text\", \"optional\": true}", + "tool_version": null, + "type": "parameter_input", + "uuid": "b765c928-15de-4a9b-8bf2-96f9a9a5cdba", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "Typically, the column in obs that you want to use for comparisons later (the main contrast field) should be specified here. This column will also be used for plotting the pseudo-bulk samples, showing the number of counts and cells.", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "Typically, the column in obs that you want to use for comparisons later (the main contrast field) should be specified here. This column will also be used for plotting the pseudo-bulk samples, showing the number of counts and cells.", + "name": "Groupy column" + } + ], + "label": "Groupy column", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 7.383289960141873, + "top": 751.7249615417315 + }, + "tool_id": null, + "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "fa4b1127-560a-4ce5-8ae7-65b1cbd466f1", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "The field used to create the pseudo-bulk replicates is typically a combination of multiple Obs fields merged together.", + "content_id": null, + "errors": null, + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "The field used to create the pseudo-bulk replicates is typically a combination of multiple Obs fields merged together.", + "name": "Sample key column" + } + ], + "label": "Sample key column", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 10.529437950525221, + "top": 861.5770833419796 + }, + "tool_id": null, + "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "5daa239e-1acc-4730-993d-d7a62ae30575", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "Name of the layer containing your raw (non-normalized) counts.", + "content_id": null, + "errors": null, + "id": 4, + "input_connections": {}, + "inputs": [ + { + "description": "Name of the layer containing your raw (non-normalized) counts.", + "name": "Name Your Raw Counts Layer" + } + ], + "label": "Name Your Raw Counts Layer", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 9.002804977774513, + "top": 986.7809696988667 + }, + "tool_id": null, + "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "e2b06f23-4090-4014-b194-9afcd48e3905", + "when": null, + "workflow_outputs": [] + }, + "5": { + "annotation": "The fields from Obs to be provided to EdgeR as factors. The first field should represent the main contrast for comparisons, while the subsequent fields will be used as covariates.", + "content_id": null, + "errors": null, + "id": 5, + "input_connections": {}, + "inputs": [ + { + "description": "The fields from Obs to be provided to EdgeR as factors. The first field should represent the main contrast for comparisons, while the subsequent fields will be used as covariates.", + "name": "Factor fields" + } + ], + "label": "Factor fields", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 19.748813513029425, + "top": 1113.5786527382497 + }, + "tool_id": null, + "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "d7f40c82-930a-45d4-8392-ecd071162040", + "when": null, + "workflow_outputs": [] + }, + "6": { + "annotation": "Specify the name of the column containing your gene symbols. For example: gene_symbol, gene_name, x, etc.", + "content_id": null, + "errors": null, + "id": 6, + "input_connections": {}, + "inputs": [ + { + "description": "Specify the name of the column containing your gene symbols. For example: gene_symbol, gene_name, x, etc.", + "name": "Gene symbol column" + } + ], + "label": "Gene symbol column", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 1140.710055348184, + "top": 640.9960493841977 + }, + "tool_id": null, + "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "e22b8666-3c73-41e2-8d3d-e6a8f4e2ba62", + "when": null, + "workflow_outputs": [] + }, + "7": { + "annotation": "Example 1: ~ 0 + Factor_1\n(Use this formula when you only want to account for one factor, Factor_1).\n\nExample 2 (With covariate adjustment): ~ 0 + Factor_1 + Factor_2\n(Use this formula if you need to adjust for additional factors, such as Factor_2, which serves as a covariate).\n\nNote: Ensure that all factors (e.g., Factor_1, Factor_2) included in the formula are defined in your factor file.", + "content_id": null, + "errors": null, + "id": 7, + "input_connections": {}, + "inputs": [ + { + "description": "Example 1: ~ 0 + Factor_1\n(Use this formula when you only want to account for one factor, Factor_1).\n\nExample 2 (With covariate adjustment): ~ 0 + Factor_1 + Factor_2\n(Use this formula if you need to adjust for additional factors, such as Factor_2, which serves as a covariate).\n\nNote: Ensure that all factors (e.g., Factor_1, Factor_2) included in the formula are defined in your factor file.", + "name": "Formula" + } + ], + "label": "Formula", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 995.3732100945864, + "top": 1273.76953125 + }, + "tool_id": null, + "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "71d2cf0a-7eaa-41fc-9ffd-f0ffbdbf9657", + "when": null, + "workflow_outputs": [] + }, + "8": { + "annotation": "File with one contrast per line. Each contrast should be a combination of factor name and value, such as FactorA.ValueX - FactorA.ValueY", + "content_id": null, + "errors": null, + "id": 8, + "input_connections": {}, + "inputs": [ + { + "description": "File with one contrast per line. Each contrast should be a combination of factor name and value, such as FactorA.ValueX - FactorA.ValueY", + "name": "contrasts_files" + } + ], + "label": "contrasts_files", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 996.953160069549, + "top": 1362.6562035835825 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"txt\", \"tabular\"], \"tag\": \"\"}", + "tool_version": null, + "type": "data_input", + "uuid": "6506a188-624d-4607-813e-e89fa063e318", + "when": null, + "workflow_outputs": [] + }, + "9": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/ebi-gxa/decoupler_pseudobulk/decoupler_pseudobulk/1.4.0+galaxy5", + "errors": null, + "id": 9, + "input_connections": { + "adata_obs_fields_to_merge": { + "id": 1, + "output_name": "output" + }, + "factor_fields": { + "id": 5, + "output_name": "output" + }, + "groupby": { + "id": 2, + "output_name": "output" + }, + "input_file": { + "id": 0, + "output_name": "output" + }, + "layer": { + "id": 4, + "output_name": "output" + }, + "sample_key": { + "id": 3, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Decoupler pseudo-bulk", + "outputs": [ + { + "name": "count_matrix", + "type": "tabular" + }, + { + "name": "samples_metadata", + "type": "tabular" + }, + { + "name": "genes_metadata", + "type": "tabular" + }, + { + "name": "plot_output", + "type": "png" + }, + { + "name": "filter_by_expr_plot", + "type": "png" + }, + { + "name": "genes_ignore_per_contrast_field", + "type": "tabular" + } + ], + "position": { + "left": 385.0194442492359, + "top": 850.2148583687383 + }, + "post_job_actions": { + "DeleteIntermediatesActioncount_matrix": { + "action_arguments": {}, + "action_type": "DeleteIntermediatesAction", + "output_name": "count_matrix" + }, + "HideDatasetActiongenes_metadata": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "genes_metadata" + }, + "HideDatasetActionsamples_metadata": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "samples_metadata" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/ebi-gxa/decoupler_pseudobulk/decoupler_pseudobulk/1.4.0+galaxy5", + "tool_shed_repository": { + "changeset_revision": "f6040492b499", + "name": "decoupler_pseudobulk", + "owner": "ebi-gxa", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"adata_obs_fields_to_merge\": {\"__class__\": \"ConnectedValue\"}, \"factor_fields\": {\"__class__\": \"ConnectedValue\"}, \"filter_expr\": true, \"filter_per_contrast\": {\"filter\": \"no\", \"__current_case__\": 1}, \"groupby\": {\"__class__\": \"ConnectedValue\"}, \"input_file\": {\"__class__\": \"ConnectedValue\"}, \"layer\": {\"__class__\": \"ConnectedValue\"}, \"min_cells\": \"10\", \"min_counts\": \"10\", \"min_counts_per_sample\": \"20\", \"min_total_counts\": \"1000\", \"mode\": \"sum\", \"plot_filtering_figsize\": \"13 13\", \"plot_samples_figsize\": \"13 13\", \"produce_anndata\": false, \"produce_plots\": true, \"sample_key\": {\"__class__\": \"ConnectedValue\"}, \"use_raw\": false, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.4.0+galaxy5", + "type": "tool", + "uuid": "12b43361-2dbc-4dd5-89c9-6a9842bb1531", + "when": null, + "workflow_outputs": [ + { + "label": "pseudobulk_count_matrix", + "output_name": "count_matrix", + "uuid": "b28fbf89-5621-459e-9321-6cd0a72261ea" + }, + { + "label": "Pseudobulk Plot", + "output_name": "plot_output", + "uuid": "6e8b4090-3ab7-4158-805a-2d757fd5e0fb" + }, + { + "label": "Filtered by expression", + "output_name": "filter_by_expr_plot", + "uuid": "93e42181-49b2-47cf-ab73-98373376caf9" + } + ] + }, + "10": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_line/9.3+galaxy1", + "errors": null, + "id": 10, + "input_connections": { + "infile": { + "id": 9, + "output_name": "count_matrix" + } + }, + "inputs": [], + "label": "Sanitize matrix", + "name": "Replace Text", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 688.1746137411936, + "top": 716.864770683492 + }, + "post_job_actions": { + "DeleteIntermediatesActionoutfile": { + "action_arguments": {}, + "action_type": "DeleteIntermediatesAction", + "output_name": "outfile" + }, + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + }, + "RenameDatasetActionoutfile": { + "action_arguments": { + "newname": "matrix.tsv" + }, + "action_type": "RenameDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_line/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"infile\": {\"__class__\": \"ConnectedValue\"}, \"replacements\": [{\"__index__\": 0, \"find_pattern\": \"[ --+*^]+\", \"replace_pattern\": \"_\"}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "15d8b67b-a232-4ea9-9639-4308a23823f1", + "when": null, + "workflow_outputs": [] + }, + "11": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_line/9.3+galaxy1", + "errors": null, + "id": 11, + "input_connections": { + "infile": { + "id": 9, + "output_name": "samples_metadata" + } + }, + "inputs": [], + "label": "Sanitize factors", + "name": "Replace Text", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 688.4911915852701, + "top": 882.8932432784438 + }, + "post_job_actions": { + "DeleteIntermediatesActionoutfile": { + "action_arguments": {}, + "action_type": "DeleteIntermediatesAction", + "output_name": "outfile" + }, + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + }, + "RenameDatasetActionoutfile": { + "action_arguments": { + "newname": "factors.tsv" + }, + "action_type": "RenameDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_line/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"infile\": {\"__class__\": \"ConnectedValue\"}, \"replacements\": [{\"__index__\": 0, \"find_pattern\": \"[ --+*^]+\", \"replace_pattern\": \"_\"}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "a001701e-e3ae-491a-b5cb-8ccf5a1fe80d", + "when": null, + "workflow_outputs": [] + }, + "12": { + "annotation": "This columns offend EdgeR and DESeq2.", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/column_remove_by_header/column_remove_by_header/1.0", + "errors": null, + "id": 12, + "input_connections": { + "input_tabular": { + "id": 9, + "output_name": "genes_metadata" + } + }, + "inputs": [], + "label": "Remove start, end, width", + "name": "Remove columns", + "outputs": [ + { + "name": "output_tabular", + "type": "tabular" + } + ], + "position": { + "left": 694.3886167394281, + "top": 1069.883945552724 + }, + "post_job_actions": { + "HideDatasetActionoutput_tabular": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_tabular" + }, + "RenameDatasetActionoutput_tabular": { + "action_arguments": { + "newname": "genes_metadata.tsv" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_tabular" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/column_remove_by_header/column_remove_by_header/1.0", + "tool_shed_repository": { + "changeset_revision": "2040e4c2750a", + "name": "column_remove_by_header", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"headers\": [{\"__index__\": 0, \"name\": \"start\"}, {\"__index__\": 1, \"name\": \"end\"}, {\"__index__\": 2, \"name\": \"width\"}], \"input_tabular\": {\"__class__\": \"ConnectedValue\"}, \"keep_columns\": false, \"strip_characters\": \"#\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0", + "type": "tool", + "uuid": "b48e0354-6a07-4ae0-a5fe-1dbb84010a26", + "when": null, + "workflow_outputs": [] + }, + "13": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/9.3+galaxy1", + "errors": null, + "id": 13, + "input_connections": { + "infile": { + "id": 11, + "output_name": "outfile" + } + }, + "inputs": [], + "label": "Sanitise first factor for leading digits", + "name": "Replace Text", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 942.8842589893927, + "top": 1022.3450832446107 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"infile\": {\"__class__\": \"ConnectedValue\"}, \"replacements\": [{\"__index__\": 0, \"column\": \"2\", \"find_pattern\": \"^([0-9])(.+)\", \"replace_pattern\": \"GG_\\\\\\\\1\\\\\\\\2\"}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "2505066d-49d0-4ed0-888d-455867e6f07d", + "when": null, + "workflow_outputs": [] + }, + "14": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/edger/edger/3.36.0+galaxy5", + "errors": null, + "id": 14, + "input_connections": { + "anno|geneanno": { + "id": 12, + "output_name": "output_tabular" + }, + "contrasts|cinfo": { + "id": 8, + "output_name": "output" + }, + "formula": { + "id": 7, + "output_name": "output" + }, + "input|counts": { + "id": 10, + "output_name": "outfile" + }, + "input|fact|finfo": { + "id": 13, + "output_name": "outfile" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool edgeR", + "name": "anno" + }, + { + "description": "runtime parameter for tool edgeR", + "name": "contrasts" + }, + { + "description": "runtime parameter for tool edgeR", + "name": "input" + } + ], + "label": null, + "name": "edgeR", + "outputs": [ + { + "name": "outTables", + "type": "input" + }, + { + "name": "outReport", + "type": "html" + } + ], + "position": { + "left": 1333.9860916368436, + "top": 1075.1709496390679 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/edger/edger/3.36.0+galaxy5", + "tool_shed_repository": { + "changeset_revision": "ae2aad0a6d50", + "name": "edger", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"adv\": {\"lfc\": \"0.0\", \"pVal\": \"0.05\", \"pAdjust\": \"BH\", \"normalisationOption\": \"TMM\", \"robOption\": true, \"lrtOption\": false}, \"anno\": {\"annoOpt\": \"yes\", \"__current_case__\": 0, \"geneanno\": {\"__class__\": \"ConnectedValue\"}}, \"contrasts\": {\"contrastOpt\": \"file\", \"__current_case__\": 1, \"cinfo\": {\"__class__\": \"ConnectedValue\"}}, \"f\": {\"filt\": {\"filt_select\": \"no\", \"__current_case__\": 1}}, \"formula\": {\"__class__\": \"ConnectedValue\"}, \"input\": {\"format\": \"matrix\", \"__current_case__\": 1, \"counts\": {\"__class__\": \"ConnectedValue\"}, \"fact\": {\"ffile\": \"yes\", \"__current_case__\": 0, \"finfo\": {\"__class__\": \"ConnectedValue\"}}}, \"out\": {\"normCounts\": false, \"rscript\": false, \"rdaOption\": false}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.36.0+galaxy5", + "type": "tool", + "uuid": "33e1a4fc-7001-4d1a-88c2-d492d8faf7e3", + "when": null, + "workflow_outputs": [ + { + "label": "Tables: DEG ", + "output_name": "outTables", + "uuid": "851ac5bc-9c57-4f36-b469-33a2e8dde894" + }, + { + "label": "Report Results: HTML File", + "output_name": "outReport", + "uuid": "5ad48faa-d0a7-4bb1-b307-7861f540ec29" + } + ] + }, + "15": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2", + "errors": null, + "id": 15, + "input_connections": { + "input_collection": { + "id": 14, + "output_name": "outTables" + } + }, + "inputs": [], + "label": "Get contrast labels", + "name": "Extract element identifiers", + "outputs": [ + { + "name": "output", + "type": "txt" + } + ], + "position": { + "left": 1469.7602879692938, + "top": 372.90924868740876 + }, + "post_job_actions": { + "HideDatasetActionoutput": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2", + "tool_shed_repository": { + "changeset_revision": "d3c07d270a50", + "name": "collection_element_identifiers", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"input_collection\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.0.2", + "type": "tool", + "uuid": "6f390fc9-b4ac-485e-867c-f338c902eacb", + "when": null, + "workflow_outputs": [] + }, + "16": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/column_remove_by_header/column_remove_by_header/1.0", + "errors": null, + "id": 16, + "input_connections": { + "headers_0|name": { + "id": 6, + "output_name": "output" + }, + "input_tabular": { + "id": 14, + "output_name": "outTables" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Remove columns", + "name": "input_tabular" + } + ], + "label": "Select gene symbols, logFC, PValue and FDR", + "name": "Remove columns", + "outputs": [ + { + "name": "output_tabular", + "type": "tabular" + } + ], + "position": { + "left": 1676.8387926808718, + "top": 527.0458386379028 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/column_remove_by_header/column_remove_by_header/1.0", + "tool_shed_repository": { + "changeset_revision": "2040e4c2750a", + "name": "column_remove_by_header", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"headers\": [{\"__index__\": 0, \"name\": {\"__class__\": \"ConnectedValue\"}}, {\"__index__\": 1, \"name\": \"logFC\"}, {\"__index__\": 2, \"name\": \"PValue\"}, {\"__index__\": 3, \"name\": \"FDR\"}], \"input_tabular\": {\"__class__\": \"RuntimeValue\"}, \"keep_columns\": true, \"strip_characters\": \"#\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0", + "type": "tool", + "uuid": "9935a582-7775-4706-be28-720224f5ba9e", + "when": null, + "workflow_outputs": [ + { + "label": "Tables for volcano plot", + "output_name": "output_tabular", + "uuid": "647df402-93cb-4f9e-9bde-7afcbcffad5b" + } + ] + }, + "17": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_line/9.3+galaxy1", + "errors": null, + "id": 17, + "input_connections": { + "infile": { + "id": 15, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Replace Text", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 1736.4192938751548, + "top": 238.42686256263465 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_line/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"infile\": {\"__class__\": \"ConnectedValue\"}, \"replacements\": [{\"__index__\": 0, \"find_pattern\": \"edgeR_\", \"replace_pattern\": \"\"}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "6595e60f-c1a5-4667-946a-69f5bf201d0f", + "when": null, + "workflow_outputs": [] + }, + "18": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/split_file_to_collection/split_file_to_collection/0.5.2", + "errors": null, + "id": 18, + "input_connections": { + "split_parms|input": { + "id": 17, + "output_name": "outfile" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Split file", + "name": "split_parms" + } + ], + "label": "Split contrasts", + "name": "Split file", + "outputs": [ + { + "name": "list_output_txt", + "type": "input" + } + ], + "position": { + "left": 2006.4343450573622, + "top": 148.05037289296598 + }, + "post_job_actions": { + "HideDatasetActionlist_output_txt": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "list_output_txt" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/split_file_to_collection/split_file_to_collection/0.5.2", + "tool_shed_repository": { + "changeset_revision": "2dae863c8f42", + "name": "split_file_to_collection", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"split_parms\": {\"select_ftype\": \"txt\", \"__current_case__\": 5, \"input\": {\"__class__\": \"ConnectedValue\"}, \"select_mode\": {\"mode\": \"chunk\", \"__current_case__\": 0, \"chunksize\": \"1\"}, \"newfilenames\": \"split_file\", \"select_allocate\": {\"allocate\": \"batch\", \"__current_case__\": 1}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.5.2", + "type": "tool", + "uuid": "0eb6f773-5ef2-4b50-b4fa-21fdbd7671ff", + "when": null, + "workflow_outputs": [] + }, + "19": { + "annotation": "", + "content_id": "param_value_from_file", + "errors": null, + "id": 19, + "input_connections": { + "input1": { + "id": 18, + "output_name": "list_output_txt" + } + }, + "inputs": [], + "label": "Contrast as parameters", + "name": "Parse parameter value", + "outputs": [ + { + "name": "text_param", + "type": "expression.json" + } + ], + "position": { + "left": 2242.840767653825, + "top": 0 + }, + "post_job_actions": { + "HideDatasetActiontext_param": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "text_param" + } + }, + "tool_id": "param_value_from_file", + "tool_state": "{\"input1\": {\"__class__\": \"ConnectedValue\"}, \"param_type\": \"text\", \"remove_newlines\": true, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.1.0", + "type": "tool", + "uuid": "81b2bd80-00c7-4ce5-8d26-f48b0f9f7f08", + "when": null, + "workflow_outputs": [] + }, + "20": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/volcanoplot/volcanoplot/0.0.6", + "errors": null, + "id": 20, + "input_connections": { + "input": { + "id": 16, + "output_name": "output_tabular" + }, + "plot_options|title": { + "id": 19, + "output_name": "text_param" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Volcano Plot", + "name": "plot_options" + } + ], + "label": null, + "name": "Volcano Plot", + "outputs": [ + { + "name": "plot", + "type": "pdf" + } + ], + "position": { + "left": 2461.98898332815, + "top": 223.8346304244149 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/volcanoplot/volcanoplot/0.0.6", + "tool_shed_repository": { + "changeset_revision": "2f557f6abbfb", + "name": "volcanoplot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"fdr_col\": \"4\", \"header\": \"yes\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"label_col\": \"1\", \"labels\": {\"label_select\": \"signif\", \"__current_case__\": 0, \"top_num\": \"40\"}, \"lfc_col\": \"2\", \"lfc_thresh\": \"0.58\", \"out_options\": {\"rscript_out\": false}, \"plot_options\": {\"boxes\": false, \"title\": {\"__class__\": \"ConnectedValue\"}, \"xlab\": \"\", \"ylab\": \"\", \"xmin\": null, \"xmax\": null, \"ymax\": null, \"legend\": \"\", \"legend_labs\": \"Down,Not Sig,Up\"}, \"pval_col\": \"3\", \"signif_thresh\": \"0.05\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.0.6", + "type": "tool", + "uuid": "36e45e14-8bec-4321-9b9f-557db6d6ed97", + "when": null, + "workflow_outputs": [ + { + "label": "Volcano Plot on input dataset(s): PDF", + "output_name": "plot", + "uuid": "02cc64ea-d34d-4081-8a2b-2dc59e102c66" + } + ] + } + }, + "tags": [], + "uuid": "f91cd317-1c52-43be-baaf-3365de3ba45a", + "version": 24 +} \ No newline at end of file diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/test-data/contrasts_files.txt b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/test-data/contrasts_files.txt new file mode 100644 index 000000000..249abe781 --- /dev/null +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/test-data/contrasts_files.txt @@ -0,0 +1,2 @@ +Header +normal-COVID_19 From 7c8dbf1b0bab8364a8e2e15c0deb5290e9ffb909 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Wed, 30 Oct 2024 11:49:27 +0100 Subject: [PATCH 02/21] Update README.md Added a small description to the README file to explain what the workflow does. --- .../scRNAseq/pseudobulk-worflow-decoupler-edger/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/README.md b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/README.md index b51062682..1878903fe 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/README.md +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/README.md @@ -1,5 +1,8 @@ # Pseudobulk-edgeR workflows +This workflow uses the decoupler tool in Galaxy to generate pseudobulk counts from an annotated AnnData file obtained from scRNA-seq analysis. Following the pseudobulk step, differential expression genes (DEG) are calculated +using the edgeR tool. The workflow also includes data sanitation steps to ensure smooth operation of edgeR and minimizing potential issues. Additionally, a Volcano plot tool is used to visualize the results after the DEG +analysis. ## Inputs From ed4908cfb9d2b55d54c9deb4a6a5f46402cd0a20 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Wed, 30 Oct 2024 12:03:26 +0100 Subject: [PATCH 03/21] Update workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml Co-authored-by: Marius van den Beek --- .../pseudo-bulk_edgeR-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml index bfe7d713b..dc9892ca0 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -14,7 +14,7 @@ Name Your Raw Counts Layer: counts Factor fields: disease Gene symbol column: gene_symbol - Formula: '~ 0 + disease ' + Formula: '~ 0 + disease' outputs: 'pseudobulk_count_matrix': has_text_matching: From 4bf98e52126b38ed3d19923e2d2717dfbde066b9 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Wed, 30 Oct 2024 12:04:54 +0100 Subject: [PATCH 04/21] Update workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml Co-authored-by: Marius van den Beek --- .../pseudo-bulk_edgeR-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml index dc9892ca0..5fb8974b9 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -32,7 +32,7 @@ element_test: has_size: 531761 delta: 25000 - 'Tables: DEG ': + 'Tables: DEG': element_tests: edgeR_normal-COVID_19: has_text_matching: From a6ac2a700d4e4a2b06307e50de1cf50c62dc75e0 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Wed, 30 Oct 2024 14:12:39 +0100 Subject: [PATCH 05/21] Update pseudo-bulk_edgeR-tests.yml --- .../pseudo-bulk_edgeR-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml index 5fb8974b9..dc9892ca0 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -32,7 +32,7 @@ element_test: has_size: 531761 delta: 25000 - 'Tables: DEG': + 'Tables: DEG ': element_tests: edgeR_normal-COVID_19: has_text_matching: From dd20459491e2f41d1580033db817ca32aeaf6dbe Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Wed, 30 Oct 2024 14:49:41 +0100 Subject: [PATCH 06/21] Update pseudo-bulk_edgeR-tests.yml --- .../pseudo-bulk_edgeR-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml index dc9892ca0..cc3ff7478 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -16,7 +16,7 @@ Gene symbol column: gene_symbol Formula: '~ 0 + disease' outputs: - 'pseudobulk_count_matrix': + 'Pseudobulk Count Matrix': has_text_matching: expression: "ACAP2\t9.0\t18.0\t20.0\t68.0\t106.0\t122.0\t14.0\t259.0\t279.0\t184.0\t612.0\t293.0\t297.0\t46.0\t1.0\t0.0\t1.0\t12.0\t229.0\t151.0\t141.0\t309.0\t299.0\t181.0\t2.0\t2.0\t28.0\t15.0\t54.0\t210.0\t1.0\t1.0\t1.0\t11.0" expression: "ACER3\t4.0\t25.0\t21.0\t110.0\t82.0\t91.0\t22.0\t326.0\t297.0\t211.0\t1004.0\t574.0\t370.0\t108.0\t0.0\t0.0\t2.0\t2.0\t188.0\t113.0\t135.0\t322.0\t324.0\t159.0\t7.0\t7.0\t32.0\t5.0\t33.0\t89.0\t2.0\t2.0\t8.0\t48.0" @@ -41,7 +41,7 @@ has_n_lines: n: 1430 delta: 1 - 'Tables for volcano plot': + 'Tables for Volcano Plot': element_tests: edgeR_normal-COVID_19: has_text_matching: From 7d0f73d65ae8228e67e27f1f4a103e49f2435766 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Mon, 4 Nov 2024 14:32:49 +0100 Subject: [PATCH 07/21] Update pseudo-bulk_edgeR-tests.yml deleted contrast file txt input - not needed anymore --- .../pseudo-bulk_edgeR-tests.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml index cc3ff7478..3ad715f19 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -4,10 +4,6 @@ class: File location: https://zenodo.org/records/13929549/files/Source%20AnnData%20file.h5ad?download=1 filetype: h5ad - contrasts_files: - class: File - path: test-data/contrasts_files.txt - filetype: txt 'Pseudo-bulk: Fields to merge': null Groupy column: cell_type Sample key column: individual From 185c24c1221615084ada6c40cd29b7aa5eef4052 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Mon, 4 Nov 2024 14:51:40 +0100 Subject: [PATCH 08/21] Update pseudo-bulk_edgeR-tests.yml --- .../pseudo-bulk_edgeR-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml index 3ad715f19..e02fb744f 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -12,7 +12,7 @@ Gene symbol column: gene_symbol Formula: '~ 0 + disease' outputs: - 'Pseudobulk Count Matrix': + 'pseudobulk_count_matrix': has_text_matching: expression: "ACAP2\t9.0\t18.0\t20.0\t68.0\t106.0\t122.0\t14.0\t259.0\t279.0\t184.0\t612.0\t293.0\t297.0\t46.0\t1.0\t0.0\t1.0\t12.0\t229.0\t151.0\t141.0\t309.0\t299.0\t181.0\t2.0\t2.0\t28.0\t15.0\t54.0\t210.0\t1.0\t1.0\t1.0\t11.0" expression: "ACER3\t4.0\t25.0\t21.0\t110.0\t82.0\t91.0\t22.0\t326.0\t297.0\t211.0\t1004.0\t574.0\t370.0\t108.0\t0.0\t0.0\t2.0\t2.0\t188.0\t113.0\t135.0\t322.0\t324.0\t159.0\t7.0\t7.0\t32.0\t5.0\t33.0\t89.0\t2.0\t2.0\t8.0\t48.0" @@ -37,7 +37,7 @@ has_n_lines: n: 1430 delta: 1 - 'Tables for Volcano Plot': + 'Tables for volcano plot': element_tests: edgeR_normal-COVID_19: has_text_matching: From e592be3235016fb787bf86f146533eb8a4f1920d Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Mon, 4 Nov 2024 14:56:31 +0100 Subject: [PATCH 09/21] Update pseudo-bulk_edgeR.ga Editions for better automatisation of workflow. User contrast_file input not needed anymore. --- .../pseudo-bulk_edgeR.ga | 285 +++++++++++------- 1 file changed, 169 insertions(+), 116 deletions(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga index c581952a8..eb9bb57d3 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga @@ -21,7 +21,6 @@ ], "format-version": "0.1", "license": "CC-BY-4.0", - "release": "0.1", "name": "pseudo-bulk_edgeR", "report": { "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" @@ -43,8 +42,8 @@ "name": "Input dataset", "outputs": [], "position": { - "left": 1.299886068362639, - "top": 516.1706375133249 + "left": 0, + "top": 114.82675544995811 }, "tool_id": null, "tool_state": "{\"optional\": false, \"format\": [\"h5\", \"h5ad\"], \"tag\": \"\"}", @@ -70,8 +69,8 @@ "name": "Input parameter", "outputs": [], "position": { - "left": 0, - "top": 647.32421875 + "left": 60.495251676792236, + "top": 230.35374378726198 }, "tool_id": null, "tool_state": "{\"parameter_type\": \"text\", \"optional\": true}", @@ -79,7 +78,13 @@ "type": "parameter_input", "uuid": "b765c928-15de-4a9b-8bf2-96f9a9a5cdba", "when": null, - "workflow_outputs": [] + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "c70d7068-d535-430f-ba40-199bbfed605c" + } + ] }, "2": { "annotation": "Typically, the column in obs that you want to use for comparisons later (the main contrast field) should be specified here. This column will also be used for plotting the pseudo-bulk samples, showing the number of counts and cells.", @@ -97,8 +102,8 @@ "name": "Input parameter", "outputs": [], "position": { - "left": 7.383289960141873, - "top": 751.7249615417315 + "left": 123.70374778716867, + "top": 341.8746744746166 }, "tool_id": null, "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", @@ -106,7 +111,13 @@ "type": "parameter_input", "uuid": "fa4b1127-560a-4ce5-8ae7-65b1cbd466f1", "when": null, - "workflow_outputs": [] + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "9b6c5134-f9fb-413a-8a65-62ca29401dd9" + } + ] }, "3": { "annotation": "The field used to create the pseudo-bulk replicates is typically a combination of multiple Obs fields merged together.", @@ -124,8 +135,8 @@ "name": "Input parameter", "outputs": [], "position": { - "left": 10.529437950525221, - "top": 861.5770833419796 + "left": 151.20372670956235, + "top": 465.91578876790373 }, "tool_id": null, "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", @@ -133,7 +144,13 @@ "type": "parameter_input", "uuid": "5daa239e-1acc-4730-993d-d7a62ae30575", "when": null, - "workflow_outputs": [] + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "e6d9cb6d-1128-4bd1-bb7e-e8d0d4dd11f6" + } + ] }, "4": { "annotation": "Name of the layer containing your raw (non-normalized) counts.", @@ -151,8 +168,8 @@ "name": "Input parameter", "outputs": [], "position": { - "left": 9.002804977774513, - "top": 986.7809696988667 + "left": 198.40796524172788, + "top": 596.2242453471495 }, "tool_id": null, "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", @@ -160,7 +177,13 @@ "type": "parameter_input", "uuid": "e2b06f23-4090-4014-b194-9afcd48e3905", "when": null, - "workflow_outputs": [] + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "ed10dd0c-7c42-4c37-a2ec-788b25900c69" + } + ] }, "5": { "annotation": "The fields from Obs to be provided to EdgeR as factors. The first field should represent the main contrast for comparisons, while the subsequent fields will be used as covariates.", @@ -178,8 +201,8 @@ "name": "Input parameter", "outputs": [], "position": { - "left": 19.748813513029425, - "top": 1113.5786527382497 + "left": 223.45860152251703, + "top": 713.2366686237074 }, "tool_id": null, "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", @@ -187,94 +210,85 @@ "type": "parameter_input", "uuid": "d7f40c82-930a-45d4-8392-ecd071162040", "when": null, - "workflow_outputs": [] + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "b3885b84-8cf3-47ce-9e57-51890d8f6aa8" + } + ] }, "6": { - "annotation": "Specify the name of the column containing your gene symbols. For example: gene_symbol, gene_name, x, etc.", + "annotation": "Example 1: ~ 0 + Factor_1\n(Use this formula when you only want to account for one factor, Factor_1).\n\nExample 2 (With covariate adjustment): ~ 0 + Factor_1 + Factor_2\n(Use this formula if you need to adjust for additional factors, such as Factor_2, which serves as a covariate).\n\nNote: Ensure that all factors (e.g., Factor_1, Factor_2) included in the formula are defined in your factor file.", "content_id": null, "errors": null, "id": 6, "input_connections": {}, "inputs": [ { - "description": "Specify the name of the column containing your gene symbols. For example: gene_symbol, gene_name, x, etc.", - "name": "Gene symbol column" + "description": "Example 1: ~ 0 + Factor_1\n(Use this formula when you only want to account for one factor, Factor_1).\n\nExample 2 (With covariate adjustment): ~ 0 + Factor_1 + Factor_2\n(Use this formula if you need to adjust for additional factors, such as Factor_2, which serves as a covariate).\n\nNote: Ensure that all factors (e.g., Factor_1, Factor_2) included in the formula are defined in your factor file.", + "name": "Formula" } ], - "label": "Gene symbol column", + "label": "Formula", "name": "Input parameter", "outputs": [], "position": { - "left": 1140.710055348184, - "top": 640.9960493841977 + "left": 1084.8910833860734, + "top": 839.4664158173962 }, "tool_id": null, "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", "tool_version": null, "type": "parameter_input", - "uuid": "e22b8666-3c73-41e2-8d3d-e6a8f4e2ba62", + "uuid": "71d2cf0a-7eaa-41fc-9ffd-f0ffbdbf9657", "when": null, - "workflow_outputs": [] + "workflow_outputs": [ + { + "label": null, + "output_name": "output", + "uuid": "e1911fda-16a7-4f48-8a17-5d9f4bd635ee" + } + ] }, "7": { - "annotation": "Example 1: ~ 0 + Factor_1\n(Use this formula when you only want to account for one factor, Factor_1).\n\nExample 2 (With covariate adjustment): ~ 0 + Factor_1 + Factor_2\n(Use this formula if you need to adjust for additional factors, such as Factor_2, which serves as a covariate).\n\nNote: Ensure that all factors (e.g., Factor_1, Factor_2) included in the formula are defined in your factor file.", + "annotation": "Specify the name of the column containing your gene symbols. For example: gene_symbol, gene_name, x, etc.", "content_id": null, "errors": null, "id": 7, "input_connections": {}, "inputs": [ { - "description": "Example 1: ~ 0 + Factor_1\n(Use this formula when you only want to account for one factor, Factor_1).\n\nExample 2 (With covariate adjustment): ~ 0 + Factor_1 + Factor_2\n(Use this formula if you need to adjust for additional factors, such as Factor_2, which serves as a covariate).\n\nNote: Ensure that all factors (e.g., Factor_1, Factor_2) included in the formula are defined in your factor file.", - "name": "Formula" + "description": "Specify the name of the column containing your gene symbols. For example: gene_symbol, gene_name, x, etc.", + "name": "Gene symbol column" } ], - "label": "Formula", + "label": "Gene symbol column", "name": "Input parameter", "outputs": [], "position": { - "left": 995.3732100945864, - "top": 1273.76953125 + "left": 1357.37679290235, + "top": 445.59148543814814 }, "tool_id": null, "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", "tool_version": null, "type": "parameter_input", - "uuid": "71d2cf0a-7eaa-41fc-9ffd-f0ffbdbf9657", + "uuid": "e22b8666-3c73-41e2-8d3d-e6a8f4e2ba62", "when": null, - "workflow_outputs": [] - }, - "8": { - "annotation": "File with one contrast per line. Each contrast should be a combination of factor name and value, such as FactorA.ValueX - FactorA.ValueY", - "content_id": null, - "errors": null, - "id": 8, - "input_connections": {}, - "inputs": [ + "workflow_outputs": [ { - "description": "File with one contrast per line. Each contrast should be a combination of factor name and value, such as FactorA.ValueX - FactorA.ValueY", - "name": "contrasts_files" + "label": null, + "output_name": "output", + "uuid": "7b3c4345-840d-4305-97ec-20ffe71a3a36" } - ], - "label": "contrasts_files", - "name": "Input dataset", - "outputs": [], - "position": { - "left": 996.953160069549, - "top": 1362.6562035835825 - }, - "tool_id": null, - "tool_state": "{\"optional\": false, \"format\": [\"txt\", \"tabular\"], \"tag\": \"\"}", - "tool_version": null, - "type": "data_input", - "uuid": "6506a188-624d-4607-813e-e89fa063e318", - "when": null, - "workflow_outputs": [] + ] }, - "9": { + "8": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/ebi-gxa/decoupler_pseudobulk/decoupler_pseudobulk/1.4.0+galaxy5", "errors": null, - "id": 9, + "id": 8, "input_connections": { "adata_obs_fields_to_merge": { "id": 1, @@ -331,8 +345,8 @@ } ], "position": { - "left": 385.0194442492359, - "top": 850.2148583687383 + "left": 516.0467803847828, + "top": 418.1072761184286 }, "post_job_actions": { "DeleteIntermediatesActioncount_matrix": { @@ -381,14 +395,14 @@ } ] }, - "10": { + "9": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_line/9.3+galaxy1", "errors": null, - "id": 10, + "id": 9, "input_connections": { "infile": { - "id": 9, + "id": 8, "output_name": "count_matrix" } }, @@ -402,8 +416,8 @@ } ], "position": { - "left": 688.1746137411936, - "top": 716.864770683492 + "left": 817.694628152607, + "top": 326.31168881503567 }, "post_job_actions": { "DeleteIntermediatesActionoutfile": { @@ -438,14 +452,14 @@ "when": null, "workflow_outputs": [] }, - "11": { + "10": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_line/9.3+galaxy1", "errors": null, - "id": 11, + "id": 10, "input_connections": { "infile": { - "id": 9, + "id": 8, "output_name": "samples_metadata" } }, @@ -459,8 +473,8 @@ } ], "position": { - "left": 688.4911915852701, - "top": 882.8932432784438 + "left": 818.0112059966837, + "top": 492.3401614099875 }, "post_job_actions": { "DeleteIntermediatesActionoutfile": { @@ -495,14 +509,14 @@ "when": null, "workflow_outputs": [] }, - "12": { - "annotation": "This columns offend EdgeR and DESeq2.", + "11": { + "annotation": "A column that may affect EdgeR and DESeq2.", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/column_remove_by_header/column_remove_by_header/1.0", "errors": null, - "id": 12, + "id": 11, "input_connections": { "input_tabular": { - "id": 9, + "id": 8, "output_name": "genes_metadata" } }, @@ -516,8 +530,8 @@ } ], "position": { - "left": 694.3886167394281, - "top": 1069.883945552724 + "left": 834.6675907510573, + "top": 728.5754761171477 }, "post_job_actions": { "HideDatasetActionoutput_tabular": { @@ -547,19 +561,19 @@ "when": null, "workflow_outputs": [] }, - "13": { + "12": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/9.3+galaxy1", "errors": null, - "id": 13, + "id": 12, "input_connections": { "infile": { - "id": 11, + "id": 10, "output_name": "outfile" } }, "inputs": [], - "label": "Sanitise first factor for leading digits", + "label": "Sanitize first factor for leading digits", "name": "Replace Text", "outputs": [ { @@ -568,8 +582,8 @@ } ], "position": { - "left": 942.8842589893927, - "top": 1022.3450832446107 + "left": 1073.6410833860734, + "top": 630.5406345673962 }, "post_job_actions": { "HideDatasetActionoutfile": { @@ -592,6 +606,50 @@ "when": null, "workflow_outputs": [] }, + "13": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "errors": null, + "id": 13, + "input_connections": { + "infile": { + "id": 12, + "output_name": "outfile" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Text reformatting", + "name": "infile" + } + ], + "label": null, + "name": "Text reformatting", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 1364.7928022139954, + "top": 910.5816068561169 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"code\": \"BEGIN { print \\\"header\\\" } NR > 1 { if (!seen[$2]++) words[++count]=$2 } END { for (i=1; i<=count; i++) for (j=i+1; j<=count; j++) print words[i]\\\"-\\\"words[j] }\", \"infile\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "5701bab4-15b7-401a-9689-c127d17e5bdf", + "when": null, + "workflow_outputs": [] + }, "14": { "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/edger/edger/3.36.0+galaxy5", @@ -599,23 +657,23 @@ "id": 14, "input_connections": { "anno|geneanno": { - "id": 12, + "id": 11, "output_name": "output_tabular" }, "contrasts|cinfo": { - "id": 8, - "output_name": "output" + "id": 13, + "output_name": "outfile" }, "formula": { - "id": 7, + "id": 6, "output_name": "output" }, "input|counts": { - "id": 10, + "id": 9, "output_name": "outfile" }, "input|fact|finfo": { - "id": 13, + "id": 12, "output_name": "outfile" } }, @@ -646,8 +704,8 @@ } ], "position": { - "left": 1333.9860916368436, - "top": 1075.1709496390679 + "left": 1623.5448063037918, + "top": 655.3696689453593 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/edger/edger/3.36.0+galaxy5", @@ -696,8 +754,8 @@ } ], "position": { - "left": 1469.7602879692938, - "top": 372.90924868740876 + "left": 1855.0434149796085, + "top": 281.6915885082918 }, "post_job_actions": { "HideDatasetActionoutput": { @@ -727,7 +785,7 @@ "id": 16, "input_connections": { "headers_0|name": { - "id": 6, + "id": 7, "output_name": "output" }, "input_tabular": { @@ -735,12 +793,7 @@ "output_name": "outTables" } }, - "inputs": [ - { - "description": "runtime parameter for tool Remove columns", - "name": "input_tabular" - } - ], + "inputs": [], "label": "Select gene symbols, logFC, PValue and FDR", "name": "Remove columns", "outputs": [ @@ -750,8 +803,8 @@ } ], "position": { - "left": 1676.8387926808718, - "top": 527.0458386379028 + "left": 1940.8759531861979, + "top": 513.5126602860244 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/column_remove_by_header/column_remove_by_header/1.0", @@ -761,7 +814,7 @@ "owner": "iuc", "tool_shed": "toolshed.g2.bx.psu.edu" }, - "tool_state": "{\"headers\": [{\"__index__\": 0, \"name\": {\"__class__\": \"ConnectedValue\"}}, {\"__index__\": 1, \"name\": \"logFC\"}, {\"__index__\": 2, \"name\": \"PValue\"}, {\"__index__\": 3, \"name\": \"FDR\"}], \"input_tabular\": {\"__class__\": \"RuntimeValue\"}, \"keep_columns\": true, \"strip_characters\": \"#\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_state": "{\"headers\": [{\"__index__\": 0, \"name\": {\"__class__\": \"ConnectedValue\"}}, {\"__index__\": 1, \"name\": \"logFC\"}, {\"__index__\": 2, \"name\": \"PValue\"}, {\"__index__\": 3, \"name\": \"FDR\"}], \"input_tabular\": {\"__class__\": \"ConnectedValue\"}, \"keep_columns\": true, \"strip_characters\": \"#\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", "tool_version": "1.0", "type": "tool", "uuid": "9935a582-7775-4706-be28-720224f5ba9e", @@ -795,8 +848,8 @@ } ], "position": { - "left": 1736.4192938751548, - "top": 238.42686256263465 + "left": 2097.058869383114, + "top": 160.46839568505237 }, "post_job_actions": { "HideDatasetActionoutfile": { @@ -845,8 +898,8 @@ } ], "position": { - "left": 2006.4343450573622, - "top": 148.05037289296598 + "left": 2376.5570395203194, + "top": 0 }, "post_job_actions": { "HideDatasetActionlist_output_txt": { @@ -890,8 +943,8 @@ } ], "position": { - "left": 2242.840767653825, - "top": 0 + "left": 2601.5947847657694, + "top": 142.72866520857775 }, "post_job_actions": { "HideDatasetActiontext_param": { @@ -938,8 +991,8 @@ } ], "position": { - "left": 2461.98898332815, - "top": 223.8346304244149 + "left": 2858.6197555934036, + "top": 367.5558994050945 }, "post_job_actions": {}, "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/volcanoplot/volcanoplot/0.0.6", @@ -964,6 +1017,6 @@ } }, "tags": [], - "uuid": "f91cd317-1c52-43be-baaf-3365de3ba45a", - "version": 24 -} \ No newline at end of file + "uuid": "e4e4055c-7e3f-4014-a8e6-343199fd9b01", + "version": 31 +} From a3256eba67cd41cc6ab8f6e4d678ca01c9fea9b7 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Mon, 4 Nov 2024 15:01:25 +0100 Subject: [PATCH 10/21] Update pseudo-bulk_edgeR-tests.yml --- .../pseudo-bulk_edgeR-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml index e02fb744f..f17558ecf 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -28,7 +28,7 @@ element_test: has_size: 531761 delta: 25000 - 'Tables: DEG ': + 'Tables: DEG': element_tests: edgeR_normal-COVID_19: has_text_matching: From c1981d036a5ea5b9376c172bd1c4f1cc1e6220a6 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Mon, 4 Nov 2024 15:04:53 +0100 Subject: [PATCH 11/21] Update pseudo-bulk_edgeR-tests.yml --- .../pseudo-bulk_edgeR-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml index f17558ecf..e02fb744f 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -28,7 +28,7 @@ element_test: has_size: 531761 delta: 25000 - 'Tables: DEG': + 'Tables: DEG ': element_tests: edgeR_normal-COVID_19: has_text_matching: From b16eb0eed6c73fd868c519511cc03028f84b22dd Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Wed, 6 Nov 2024 11:29:32 +0100 Subject: [PATCH 12/21] Update pseudo-bulk_edgeR-tests.yml --- .../pseudo-bulk_edgeR-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml index e02fb744f..45792d76c 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -12,7 +12,7 @@ Gene symbol column: gene_symbol Formula: '~ 0 + disease' outputs: - 'pseudobulk_count_matrix': + 'Pseudobulk count matrix': has_text_matching: expression: "ACAP2\t9.0\t18.0\t20.0\t68.0\t106.0\t122.0\t14.0\t259.0\t279.0\t184.0\t612.0\t293.0\t297.0\t46.0\t1.0\t0.0\t1.0\t12.0\t229.0\t151.0\t141.0\t309.0\t299.0\t181.0\t2.0\t2.0\t28.0\t15.0\t54.0\t210.0\t1.0\t1.0\t1.0\t11.0" expression: "ACER3\t4.0\t25.0\t21.0\t110.0\t82.0\t91.0\t22.0\t326.0\t297.0\t211.0\t1004.0\t574.0\t370.0\t108.0\t0.0\t0.0\t2.0\t2.0\t188.0\t113.0\t135.0\t322.0\t324.0\t159.0\t7.0\t7.0\t32.0\t5.0\t33.0\t89.0\t2.0\t2.0\t8.0\t48.0" @@ -28,7 +28,7 @@ element_test: has_size: 531761 delta: 25000 - 'Tables: DEG ': + 'Tables: DEG': element_tests: edgeR_normal-COVID_19: has_text_matching: From 120799154651a4c3c9a98cac8fa793687989a2a8 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Wed, 6 Nov 2024 11:30:19 +0100 Subject: [PATCH 13/21] Update pseudo-bulk_edgeR.ga --- .../pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga index eb9bb57d3..d26b37241 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga @@ -379,7 +379,7 @@ "when": null, "workflow_outputs": [ { - "label": "pseudobulk_count_matrix", + "label": "Pseudobulk count matrix", "output_name": "count_matrix", "uuid": "b28fbf89-5621-459e-9321-6cd0a72261ea" }, @@ -722,7 +722,7 @@ "when": null, "workflow_outputs": [ { - "label": "Tables: DEG ", + "label": "Tables: DEG", "output_name": "outTables", "uuid": "851ac5bc-9c57-4f36-b469-33a2e8dde894" }, From e83d56c7af50f802b483f5ea68a0aba4958ab325 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Wed, 6 Nov 2024 11:30:38 +0100 Subject: [PATCH 14/21] Delete workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/test-data/contrasts_files.txt File not needed anymore --- .../test-data/contrasts_files.txt | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/test-data/contrasts_files.txt diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/test-data/contrasts_files.txt b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/test-data/contrasts_files.txt deleted file mode 100644 index 249abe781..000000000 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/test-data/contrasts_files.txt +++ /dev/null @@ -1,2 +0,0 @@ -Header -normal-COVID_19 From 061fb9062bb369e9dd56fa6583a30bd7185b99e2 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Wed, 6 Nov 2024 14:00:33 +0100 Subject: [PATCH 15/21] Update workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga Co-authored-by: Amirhossein <66441226+nilchia@users.noreply.github.com> --- .../pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga | 1 + 1 file changed, 1 insertion(+) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga index d26b37241..a7ff28d0a 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga @@ -21,6 +21,7 @@ ], "format-version": "0.1", "license": "CC-BY-4.0", + "release": "0.1", "name": "pseudo-bulk_edgeR", "report": { "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" From 18e2f7e73f470e19ae0b75ddbcdb3ffbbb7dc531 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Wed, 6 Nov 2024 19:03:13 +0100 Subject: [PATCH 16/21] Update workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Björn Grüning --- .../pseudo-bulk_edgeR-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml index 45792d76c..412e62767 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -2,7 +2,7 @@ job: Source AnnData file: class: File - location: https://zenodo.org/records/13929549/files/Source%20AnnData%20file.h5ad?download=1 + location: https://zenodo.org/records/13929549/files/Source%20AnnData%20file.h5ad filetype: h5ad 'Pseudo-bulk: Fields to merge': null Groupy column: cell_type From b145339a0ba8b78513a1634ada3e0b662910dd3a Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Fri, 15 Nov 2024 22:04:34 +0100 Subject: [PATCH 17/21] Update workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml Co-authored-by: Marius van den Beek --- .../pseudo-bulk_edgeR-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml index 412e62767..9ec0fd431 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR-tests.yml @@ -5,7 +5,7 @@ location: https://zenodo.org/records/13929549/files/Source%20AnnData%20file.h5ad filetype: h5ad 'Pseudo-bulk: Fields to merge': null - Groupy column: cell_type + Group by column: cell_type Sample key column: individual Name Your Raw Counts Layer: counts Factor fields: disease From 1e01859a12c5065c3689eebc8bb5c01e93dbac3c Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Fri, 15 Nov 2024 22:05:06 +0100 Subject: [PATCH 18/21] Update workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga Co-authored-by: Marius van den Beek --- .../pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga index a7ff28d0a..6eefe3f6b 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga @@ -1,6 +1,6 @@ { "a_galaxy_workflow": "true", - "annotation": "Pseudobulk", + "annotation": "This workflow uses the decoupler tool in Galaxy to generate pseudobulk counts from an annotated AnnData file obtained from scRNA-seq analysis. Following the pseudobulk step, differential expression genes (DEG) are calculated using the edgeR tool. The workflow also includes data sanitation steps to ensure smooth operation of edgeR and minimizing potential issues. Additionally, a Volcano plot tool is used to visualize the results after the DEG analysis.", "comments": [], "creator": [ { From 6b55849a3fd33c3c343969746ed9c3150a055634 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Fri, 15 Nov 2024 22:05:19 +0100 Subject: [PATCH 19/21] Update workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga Co-authored-by: Marius van den Beek --- .../pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga index 6eefe3f6b..b3212956f 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga @@ -99,7 +99,7 @@ "name": "Groupy column" } ], - "label": "Groupy column", + "label": "Group by column", "name": "Input parameter", "outputs": [], "position": { From 36b33e2864db6155977bb7dda99ff8816d39cd94 Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Fri, 15 Nov 2024 22:05:34 +0100 Subject: [PATCH 20/21] Update workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga Co-authored-by: Marius van den Beek --- .../pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga index b3212956f..6ce640682 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga @@ -22,7 +22,7 @@ "format-version": "0.1", "license": "CC-BY-4.0", "release": "0.1", - "name": "pseudo-bulk_edgeR", + "name": "Differential gene expression for single-cell data using pseudo-bulk counts with edgeR", "report": { "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" }, From 648462d613a4dd3e354d603f6aaedc5b959f030a Mon Sep 17 00:00:00 2001 From: diana chJ <122611454+dianichj@users.noreply.github.com> Date: Fri, 15 Nov 2024 22:05:45 +0100 Subject: [PATCH 21/21] Update workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga Co-authored-by: Marius van den Beek --- .../pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga index 6ce640682..af193d2ca 100644 --- a/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga +++ b/workflows/scRNAseq/pseudobulk-worflow-decoupler-edger/pseudo-bulk_edgeR.ga @@ -96,7 +96,7 @@ "inputs": [ { "description": "Typically, the column in obs that you want to use for comparisons later (the main contrast field) should be specified here. This column will also be used for plotting the pseudo-bulk samples, showing the number of counts and cells.", - "name": "Groupy column" + "name": "Group by column" } ], "label": "Group by column",