diff --git a/workflows/data-manipulation/split-collection/.dockstore.yml b/workflows/data-manipulation/split-collection/.dockstore.yml new file mode 100644 index 000000000..c4eb4c60f --- /dev/null +++ b/workflows/data-manipulation/split-collection/.dockstore.yml @@ -0,0 +1,29 @@ +version: 1.2 +workflows: +- name: Split-collection-by-pattern-in-identifiers + subclass: Galaxy + publish: true + primaryDescriptorPath: /Split-collection-by-pattern-in-identifiers.ga + testParameterFiles: + - /Split-collection-by-pattern-in-identifiers-tests.yml + authors: + - name: Lucille Delisle + orcid: 0000-0002-1964-4960 +- name: Split-collection-using-tabular + subclass: Galaxy + publish: true + primaryDescriptorPath: /Split-collection-using-tabular.ga + testParameterFiles: + - /Split-collection-using-tabular-tests.yml + authors: + - name: Lucille Delisle + orcid: 0000-0002-1964-4960 +- name: Split-collection-using-comma-separated-list + subclass: Galaxy + publish: true + primaryDescriptorPath: /Split-collection-using-comma-separated-list.ga + testParameterFiles: + - /Split-collection-using-comma-separated-list-tests.yml + authors: + - name: Lucille Delisle + orcid: 0000-0002-1964-4960 diff --git a/workflows/data-manipulation/split-collection/CHANGELOG.md b/workflows/data-manipulation/split-collection/CHANGELOG.md new file mode 100644 index 000000000..2954c5f21 --- /dev/null +++ b/workflows/data-manipulation/split-collection/CHANGELOG.md @@ -0,0 +1,5 @@ +# Changelog + +## [0.1] 2024-11-12 + +First release diff --git a/workflows/data-manipulation/split-collection/README.md b/workflows/data-manipulation/split-collection/README.md new file mode 100644 index 000000000..04a1f113c --- /dev/null +++ b/workflows/data-manipulation/split-collection/README.md @@ -0,0 +1,14 @@ +# Split collection + +These workflows allow to split a collection into 2 using identifiers. + +The common input to all workflows is a collection of type 'list'. + +The way to split the collection differs with the workflow. + +- In the workflow "Split collection by pattern in identifiers", you need to specify a "pattern". This is a word that is present only in one part of your samples. This will split your collection into 2: one with the identifiers which have the 'pattern' and the other one with the identifiers which don't have. +- In the workflow "Split collection using tabular", you need to give a tabular where the first column is the identifier and the second column is the group (no header). All identifiers where the second column match the first item will be grouped into a collection. Others will be in another collection. +- In the workflow "Split collection using comma separated list", you need to give the group of each item of your collection separated by comma. For example, if you have 3 items in your collection, you can put "1,1,2" to put the first 2 together and the third one appart. + +Warnings: +- If you specify more than 2 groups in the second and third workflow, it will not create 3 collections. diff --git a/workflows/data-manipulation/split-collection/Split-collection-by-pattern-in-identifiers-tests.yml b/workflows/data-manipulation/split-collection/Split-collection-by-pattern-in-identifiers-tests.yml new file mode 100644 index 000000000..aa4ec7cb9 --- /dev/null +++ b/workflows/data-manipulation/split-collection/Split-collection-by-pattern-in-identifiers-tests.yml @@ -0,0 +1,51 @@ +- doc: Test outline for Split-collection-by-pattern-in-identifiers.ga + job: + Input Dataset Collection: + class: Collection + collection_type: list + elements: + - class: File + identifier: cat1_1 + path: test-data/file.txt + - class: File + identifier: cat1_2 + path: test-data/file.txt + - class: File + identifier: cat1_3 + path: test-data/file.txt + - class: File + identifier: cat2_1 + path: test-data/file.txt + - class: File + identifier: cat3_1 + path: test-data/file.txt + pattern: cat1 + outputs: + unselected_collection: + class: Collection + collection_type: list + element_tests: + cat2_1: + asserts: + has_text: + text: "whatever" + cat3_1: + asserts: + has_text: + text: "whatever" + selected_collection: + class: Collection + collection_type: list + element_tests: + cat1_1: + asserts: + has_text: + text: "whatever" + cat1_2: + asserts: + has_text: + text: "whatever" + cat1_3: + asserts: + has_text: + text: "whatever" diff --git a/workflows/data-manipulation/split-collection/Split-collection-by-pattern-in-identifiers.ga b/workflows/data-manipulation/split-collection/Split-collection-by-pattern-in-identifiers.ga new file mode 100644 index 000000000..3cecd8922 --- /dev/null +++ b/workflows/data-manipulation/split-collection/Split-collection-by-pattern-in-identifiers.ga @@ -0,0 +1,244 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "This workflow takes a collection and returns 2 collections. One with the items which contains the pattern, one with the items which do not contains the pattern.", + "comments": [], + "creator": [ + { + "class": "Person", + "identifier": "https://orcid.org/0000-0002-1964-4960", + "name": "Lucille Delisle" + } + ], + "format-version": "0.1", + "license": "MIT", + "release": "0.1", + "name": "Split collection by pattern in element identifiers", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Collection you want to split into 2", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Collection you want to split into 2", + "name": "Input Dataset Collection" + } + ], + "label": "Input Dataset Collection", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 0, + "top": 72 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null, \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "3065a6b3-a33e-4eb3-97d7-13681b2b4e40", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "pattern used to select the items in the collection", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "pattern used to select the items in the collection", + "name": "pattern" + } + ], + "label": "pattern", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 43, + "top": 177.5 + }, + "tool_id": null, + "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "3070ffe4-0f1a-47f4-92a2-98024da29942", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2", + "errors": null, + "id": 2, + "input_connections": { + "input_collection": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Extract element identifiers", + "outputs": [ + { + "name": "output", + "type": "txt" + } + ], + "position": { + "left": 299, + "top": 0 + }, + "post_job_actions": { + "HideDatasetActionoutput": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2", + "tool_shed_repository": { + "changeset_revision": "d3c07d270a50", + "name": "collection_element_identifiers", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"input_collection\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.0.2", + "type": "tool", + "uuid": "0abdf45b-5993-4645-ae05-a9f5f0efa1f8", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_grep_tool/9.3+galaxy1", + "errors": null, + "id": 3, + "input_connections": { + "infile": { + "id": 2, + "output_name": "output" + }, + "url_paste": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "label": "Select identifiers with pattern", + "name": "Search in textfiles", + "outputs": [ + { + "name": "output", + "type": "input" + } + ], + "position": { + "left": 577, + "top": 106 + }, + "post_job_actions": { + "HideDatasetActionoutput": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_grep_tool/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"case_sensitive\": \"-i\", \"color\": \"NOCOLOR\", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"invert\": \"\", \"lines_after\": \"0\", \"lines_before\": \"0\", \"regex_type\": \"-P\", \"url_paste\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "ec998903-14ca-463e-bbde-1d0ff8c5d0f8", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "", + "content_id": "__FILTER_FROM_FILE__", + "errors": null, + "id": 4, + "input_connections": { + "how|filter_source": { + "id": 3, + "output_name": "output" + }, + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Filter collection", + "name": "how" + } + ], + "label": "Split collection into 2", + "name": "Filter collection", + "outputs": [ + { + "name": "output_filtered", + "type": "input" + }, + { + "name": "output_discarded", + "type": "input" + } + ], + "position": { + "left": 797, + "top": 106 + }, + "post_job_actions": { + "RenameDatasetActionoutput_discarded": { + "action_arguments": { + "newname": "not ${pattern}" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_discarded" + }, + "RenameDatasetActionoutput_filtered": { + "action_arguments": { + "newname": "${pattern}" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_filtered" + } + }, + "tool_id": "__FILTER_FROM_FILE__", + "tool_state": "{\"how\": {\"how_filter\": \"remove_if_absent\", \"__current_case__\": 0, \"filter_source\": {\"__class__\": \"ConnectedValue\"}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "68b2babb-f4c1-4e5f-ba3c-e00704baa798", + "when": null, + "workflow_outputs": [ + { + "label": "unselected_collection", + "output_name": "output_discarded", + "uuid": "c94a017b-41bd-41a1-9eb5-c2a26c475c53" + }, + { + "label": "selected_collection", + "output_name": "output_filtered", + "uuid": "5a70e977-f0fc-48ca-8ec1-947b2da01a67" + } + ] + } + }, + "tags": [], + "uuid": "67c71ce5-ce8e-4d09-8e6c-e45d2ced17b6", + "version": 3 +} \ No newline at end of file diff --git a/workflows/data-manipulation/split-collection/Split-collection-using-comma-separated-list-tests.yml b/workflows/data-manipulation/split-collection/Split-collection-using-comma-separated-list-tests.yml new file mode 100644 index 000000000..74632b271 --- /dev/null +++ b/workflows/data-manipulation/split-collection/Split-collection-using-comma-separated-list-tests.yml @@ -0,0 +1,51 @@ +- doc: Test outline for Split-collection-using-comma-separated-list.ga + job: + Input Dataset Collection: + class: Collection + collection_type: list + elements: + - class: File + identifier: cat1_1 + path: test-data/file.txt + - class: File + identifier: cat1_2 + path: test-data/file.txt + - class: File + identifier: cat1_3 + path: test-data/file.txt + - class: File + identifier: cat2_1 + path: test-data/file.txt + - class: File + identifier: cat3_1 + path: test-data/file.txt + Groups: 1,1,1,2,3 + outputs: + collection_other: + class: Collection + collection_type: list + element_tests: + cat2_1: + asserts: + has_text: + text: "whatever" + cat3_1: + asserts: + has_text: + text: "whatever" + collection_first_group: + class: Collection + collection_type: list + element_tests: + cat1_1: + asserts: + has_text: + text: "whatever" + cat1_2: + asserts: + has_text: + text: "whatever" + cat1_3: + asserts: + has_text: + text: "whatever" diff --git a/workflows/data-manipulation/split-collection/Split-collection-using-comma-separated-list.ga b/workflows/data-manipulation/split-collection/Split-collection-using-comma-separated-list.ga new file mode 100644 index 000000000..f426f794b --- /dev/null +++ b/workflows/data-manipulation/split-collection/Split-collection-using-comma-separated-list.ga @@ -0,0 +1,634 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "This workflow takes a collection and a comma separated list of groups and returns 2 collections: One with the items whose group assignment is the same as the first one and one with the others.", + "comments": [], + "creator": [ + { + "class": "Person", + "identifier": "https://orcid.org/0000-0002-1964-4960", + "name": "Lucille Delisle" + } + ], + "format-version": "0.1", + "license": "MIT", + "release": "0.1", + "name": "Split collection using comma separated list", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Collection you want to split into 2", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Collection you want to split into 2", + "name": "Input Dataset Collection" + } + ], + "label": "Input Dataset Collection", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 0, + "top": 0 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null, \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "4f2d0e0f-4447-4dd1-bbe5-84d1a979e629", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "groups of each element of the collection separated by comma (no space), for example \"1,1,2,2\".", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "groups of each element of the collection separated by comma (no space), for example \"1,1,2,2\".", + "name": "Groups" + } + ], + "label": "Groups", + "name": "Input parameter", + "outputs": [], + "position": { + "left": 43.36489535152572, + "top": 107.76678920335333 + }, + "tool_id": null, + "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}", + "tool_version": null, + "type": "parameter_input", + "uuid": "922bc93b-0f1e-40d8-bfa3-e4d4590ebe03", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2", + "errors": null, + "id": 2, + "input_connections": { + "input_collection": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Extract element identifiers", + "outputs": [ + { + "name": "output", + "type": "txt" + } + ], + "position": { + "left": 369.4518201104032, + "top": 149.48743551496034 + }, + "post_job_actions": { + "HideDatasetActionoutput": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2", + "tool_shed_repository": { + "changeset_revision": "d3c07d270a50", + "name": "collection_element_identifiers", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"input_collection\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.0.2", + "type": "tool", + "uuid": "7e1054de-eff3-4ff4-86f4-780c051fc43b", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_text_file_with_recurring_lines/9.3+galaxy1", + "errors": null, + "id": 3, + "input_connections": { + "token_set_0|line": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "label": "Create a dataset from text", + "name": "Create text file", + "outputs": [ + { + "name": "outfile", + "type": "txt" + } + ], + "position": { + "left": 369.4518201104032, + "top": 269.48743551496034 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_text_file_with_recurring_lines/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"token_set\": [{\"__index__\": 0, \"line\": {\"__class__\": \"ConnectedValue\"}, \"repeat_select\": {\"repeat_select_opts\": \"user\", \"__current_case__\": 0, \"times\": \"1\"}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "2c2568c6-f100-4605-857f-294894573667", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_find_and_replace/9.3+galaxy1", + "errors": null, + "id": 4, + "input_connections": { + "infile": { + "id": 3, + "output_name": "outfile" + } + }, + "inputs": [], + "label": "Replace comma by back to line", + "name": "Replace", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 598.458507122122, + "top": 299.48743551496034 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_find_and_replace/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"find_and_replace\": [{\"__index__\": 0, \"find_pattern\": \",\", \"replace_pattern\": \"\\\\n\", \"is_regex\": true, \"global\": true, \"caseinsensitive\": false, \"wholewords\": false, \"skip_first_line\": false, \"searchwhere\": {\"searchwhere_select\": \"line\", \"__current_case__\": 0}}], \"infile\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "2502370a-2f9a-4e41-9134-686639873761", + "when": null, + "workflow_outputs": [] + }, + "5": { + "annotation": "", + "content_id": "Paste1", + "errors": null, + "id": 5, + "input_connections": { + "input1": { + "id": 2, + "output_name": "output" + }, + "input2": { + "id": 4, + "output_name": "outfile" + } + }, + "inputs": [], + "label": "Put side by side identifiers and groups", + "name": "Paste", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 809.4518201104032, + "top": 149.48743551496034 + }, + "post_job_actions": { + "ChangeDatatypeActionout_file1": { + "action_arguments": { + "newtype": "tabular" + }, + "action_type": "ChangeDatatypeAction", + "output_name": "out_file1" + }, + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Paste1", + "tool_state": "{\"__input_ext\": \"txt\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"delimiter\": \"T\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"input2\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "8bca840e-00ad-4b75-b2c4-16a506dc6397", + "when": null, + "workflow_outputs": [] + }, + "6": { + "annotation": "", + "id": 6, + "input_connections": { + "Input Dataset Collection": { + "id": 0, + "input_subworkflow_step_id": 0, + "output_name": "output" + }, + "identifier mapping": { + "id": 5, + "input_subworkflow_step_id": 1, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Split collection using tabular", + "outputs": [], + "position": { + "left": 1071, + "top": 151.5 + }, + "subworkflow": { + "a_galaxy_workflow": "true", + "annotation": "", + "comments": [], + "creator": [ + { + "class": "Person", + "identifier": "https://orcid.org/0000-0002-1964-4960", + "name": "Lucille Delisle" + } + ], + "format-version": "0.1", + "license": "MIT", + "name": "Split collection using tabular", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Collection you want to split into 2", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Collection you want to split into 2", + "name": "Input Dataset Collection" + } + ], + "label": "Input Dataset Collection", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 0, + "top": 0 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null, \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "25938412-e737-4d0c-892f-497a548ce40b", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "tabular where first column matches identifers of input collection and second column indicates the group", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "tabular where first column matches identifers of input collection and second column indicates the group", + "name": "identifier mapping" + } + ], + "label": "identifier mapping", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 76, + "top": 114.5 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "5d71aaff-1050-406f-875a-e671bd530867", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "errors": null, + "id": 2, + "input_connections": { + "infile": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "label": "get the first group value", + "name": "Text reformatting", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 370, + "top": 334 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"code\": \"NR==1{print $2}\", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "f2c7d369-5e51-4a06-8bbf-86fac09d3601", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "", + "content_id": "param_value_from_file", + "errors": null, + "id": 3, + "input_connections": { + "input1": { + "id": 2, + "output_name": "outfile" + } + }, + "inputs": [], + "label": "convert to parameter", + "name": "Parse parameter value", + "outputs": [ + { + "name": "text_param", + "type": "expression.json" + } + ], + "position": { + "left": 638.34, + "top": 347.5 + }, + "post_job_actions": { + "HideDatasetActiontext_param": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "text_param" + } + }, + "tool_id": "param_value_from_file", + "tool_state": "{\"input1\": {\"__class__\": \"ConnectedValue\"}, \"param_type\": \"text\", \"remove_newlines\": true, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.1.0", + "type": "tool", + "uuid": "2d67559f-0da3-4b52-97e1-e7c7bd1ba83f", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/compose_text_param/compose_text_param/0.1.1", + "errors": null, + "id": 4, + "input_connections": { + "components_1|param_type|component_value": { + "id": 3, + "output_name": "text_param" + } + }, + "inputs": [], + "label": "make awk program", + "name": "Compose text parameter value", + "outputs": [ + { + "name": "out1", + "type": "expression.json" + } + ], + "position": { + "left": 905.183349609375, + "top": 343.01666259765625 + }, + "post_job_actions": { + "HideDatasetActionout1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out1" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/compose_text_param/compose_text_param/0.1.1", + "tool_shed_repository": { + "changeset_revision": "e188c9826e0f", + "name": "compose_text_param", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"components\": [{\"__index__\": 0, \"param_type\": {\"select_param_type\": \"text\", \"__current_case__\": 0, \"component_value\": \"$2==\\\"\"}}, {\"__index__\": 1, \"param_type\": {\"select_param_type\": \"text\", \"__current_case__\": 0, \"component_value\": {\"__class__\": \"ConnectedValue\"}}}, {\"__index__\": 2, \"param_type\": {\"select_param_type\": \"text\", \"__current_case__\": 0, \"component_value\": \"\\\"{print $1}\"}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.1.1", + "type": "tool", + "uuid": "72928cef-e147-45c0-866e-d3bdc5d82937", + "when": null, + "workflow_outputs": [] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "errors": null, + "id": 5, + "input_connections": { + "code": { + "id": 4, + "output_name": "out1" + }, + "infile": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Text reformatting", + "name": "infile" + } + ], + "label": "get only identifiers with first group", + "name": "Text reformatting", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 1224, + "top": 298.5 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"code\": {\"__class__\": \"ConnectedValue\"}, \"infile\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "904fd1c2-cbc6-45c2-b4d7-d8e72b07d5cf", + "when": null, + "workflow_outputs": [] + }, + "6": { + "annotation": "", + "content_id": "__FILTER_FROM_FILE__", + "errors": null, + "id": 6, + "input_connections": { + "how|filter_source": { + "id": 5, + "output_name": "outfile" + }, + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Filter collection", + "name": "how" + } + ], + "label": "Split collection into 2", + "name": "Filter collection", + "outputs": [ + { + "name": "output_filtered", + "type": "input" + }, + { + "name": "output_discarded", + "type": "input" + } + ], + "position": { + "left": 1604.8936434420398, + "top": 252.46106251153242 + }, + "post_job_actions": { + "RenameDatasetActionoutput_discarded": { + "action_arguments": { + "newname": "not first group" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_discarded" + }, + "RenameDatasetActionoutput_filtered": { + "action_arguments": { + "newname": "first group" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_filtered" + } + }, + "tool_id": "__FILTER_FROM_FILE__", + "tool_state": "{\"how\": {\"how_filter\": \"remove_if_absent\", \"__current_case__\": 0, \"filter_source\": {\"__class__\": \"ConnectedValue\"}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "8ff72610-8349-4862-b66e-1416e40ff95a", + "when": null, + "workflow_outputs": [ + { + "label": "collection_other", + "output_name": "output_discarded", + "uuid": "d8721c45-506c-4378-95fc-a951cd854fc5" + }, + { + "label": "collection_first_group", + "output_name": "output_filtered", + "uuid": "32af9f5d-c3f1-437a-8b08-6032212dd277" + } + ] + } + }, + "tags": [], + "uuid": "f3a5e3f5-f2ce-4dde-aa36-6d39c8d37855" + }, + "tool_id": null, + "type": "subworkflow", + "uuid": "eadd9f06-55d3-49db-b1e2-02550196540a", + "when": null, + "workflow_outputs": [ + { + "label": "collection_first_group", + "output_name": "collection_first_group", + "uuid": "cc55d416-fed3-4a69-9ce3-a33ea9ca62bf" + }, + { + "label": "collection_other", + "output_name": "collection_other", + "uuid": "f432b663-fb45-426c-b890-84062087db44" + } + ] + } + }, + "tags": [], + "uuid": "71ae6dd8-b4e7-419d-83d5-9cd25ca79d57", + "version": 2 +} \ No newline at end of file diff --git a/workflows/data-manipulation/split-collection/Split-collection-using-tabular-tests.yml b/workflows/data-manipulation/split-collection/Split-collection-using-tabular-tests.yml new file mode 100644 index 000000000..a56f5a1e0 --- /dev/null +++ b/workflows/data-manipulation/split-collection/Split-collection-using-tabular-tests.yml @@ -0,0 +1,54 @@ +- doc: Test outline for Split-collection-using-tabular.ga + job: + Input Dataset Collection: + class: Collection + collection_type: list + elements: + - class: File + identifier: cat1_1 + path: test-data/file.txt + - class: File + identifier: cat1_2 + path: test-data/file.txt + - class: File + identifier: cat1_3 + path: test-data/file.txt + - class: File + identifier: cat2_1 + path: test-data/file.txt + - class: File + identifier: cat3_1 + path: test-data/file.txt + identifier mapping: + class: File + path: test-data/group_asignment.txt + datatype: tabular + outputs: + collection_other: + class: Collection + collection_type: list + element_tests: + cat2_1: + asserts: + has_text: + text: "whatever" + cat3_1: + asserts: + has_text: + text: "whatever" + collection_first_group: + class: Collection + collection_type: list + element_tests: + cat1_1: + asserts: + has_text: + text: "whatever" + cat1_2: + asserts: + has_text: + text: "whatever" + cat1_3: + asserts: + has_text: + text: "whatever" diff --git a/workflows/data-manipulation/split-collection/Split-collection-using-tabular.ga b/workflows/data-manipulation/split-collection/Split-collection-using-tabular.ga new file mode 100644 index 000000000..20af85094 --- /dev/null +++ b/workflows/data-manipulation/split-collection/Split-collection-using-tabular.ga @@ -0,0 +1,333 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "This workflow takes a collection and a tabular with 2 columns (one matches the identifiers of the collection, the second indicates the group) and returns 2 collections: One with the items which were in the tabular and whose second column value was the same as the first item and one with the others.", + "comments": [], + "creator": [ + { + "class": "Person", + "identifier": "https://orcid.org/0000-0002-1964-4960", + "name": "Lucille Delisle" + } + ], + "format-version": "0.1", + "license": "MIT", + "release": "0.1", + "name": "Split collection using tabular", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Collection you want to split into 2", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Collection you want to split into 2", + "name": "Input Dataset Collection" + } + ], + "label": "Input Dataset Collection", + "name": "Input dataset collection", + "outputs": [], + "position": { + "left": 0, + "top": 0 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null, \"collection_type\": \"list\"}", + "tool_version": null, + "type": "data_collection_input", + "uuid": "25938412-e737-4d0c-892f-497a548ce40b", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "tabular where first column matches identifers of input collection and second column indicates the group", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "tabular where first column matches identifers of input collection and second column indicates the group", + "name": "identifier mapping" + } + ], + "label": "identifier mapping", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 76, + "top": 114.5 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"format\": [\"tabular\"], \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "5d71aaff-1050-406f-875a-e671bd530867", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "errors": null, + "id": 2, + "input_connections": { + "infile": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "label": "get the first group value", + "name": "Text reformatting", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 370, + "top": 334 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"code\": \"NR==1{print $2}\", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "f2c7d369-5e51-4a06-8bbf-86fac09d3601", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "", + "content_id": "param_value_from_file", + "errors": null, + "id": 3, + "input_connections": { + "input1": { + "id": 2, + "output_name": "outfile" + } + }, + "inputs": [], + "label": "convert to parameter", + "name": "Parse parameter value", + "outputs": [ + { + "name": "text_param", + "type": "expression.json" + } + ], + "position": { + "left": 638.34, + "top": 347.5 + }, + "post_job_actions": { + "HideDatasetActiontext_param": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "text_param" + } + }, + "tool_id": "param_value_from_file", + "tool_state": "{\"input1\": {\"__class__\": \"ConnectedValue\"}, \"param_type\": \"text\", \"remove_newlines\": true, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.1.0", + "type": "tool", + "uuid": "2d67559f-0da3-4b52-97e1-e7c7bd1ba83f", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/compose_text_param/compose_text_param/0.1.1", + "errors": null, + "id": 4, + "input_connections": { + "components_1|param_type|component_value": { + "id": 3, + "output_name": "text_param" + } + }, + "inputs": [], + "label": "make awk program", + "name": "Compose text parameter value", + "outputs": [ + { + "name": "out1", + "type": "expression.json" + } + ], + "position": { + "left": 905.183349609375, + "top": 343.01666259765625 + }, + "post_job_actions": { + "HideDatasetActionout1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out1" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/compose_text_param/compose_text_param/0.1.1", + "tool_shed_repository": { + "changeset_revision": "e188c9826e0f", + "name": "compose_text_param", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"components\": [{\"__index__\": 0, \"param_type\": {\"select_param_type\": \"text\", \"__current_case__\": 0, \"component_value\": \"$2==\\\"\"}}, {\"__index__\": 1, \"param_type\": {\"select_param_type\": \"text\", \"__current_case__\": 0, \"component_value\": {\"__class__\": \"ConnectedValue\"}}}, {\"__index__\": 2, \"param_type\": {\"select_param_type\": \"text\", \"__current_case__\": 0, \"component_value\": \"\\\"{print $1}\"}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.1.1", + "type": "tool", + "uuid": "72928cef-e147-45c0-866e-d3bdc5d82937", + "when": null, + "workflow_outputs": [] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "errors": null, + "id": 5, + "input_connections": { + "code": { + "id": 4, + "output_name": "out1" + }, + "infile": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Text reformatting", + "name": "infile" + } + ], + "label": "get only identifiers with first group", + "name": "Text reformatting", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 1224, + "top": 298.5 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_awk_tool/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"code\": {\"__class__\": \"ConnectedValue\"}, \"infile\": {\"__class__\": \"RuntimeValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "904fd1c2-cbc6-45c2-b4d7-d8e72b07d5cf", + "when": null, + "workflow_outputs": [] + }, + "6": { + "annotation": "", + "content_id": "__FILTER_FROM_FILE__", + "errors": null, + "id": 6, + "input_connections": { + "how|filter_source": { + "id": 5, + "output_name": "outfile" + }, + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Filter collection", + "name": "how" + } + ], + "label": "Split collection into 2", + "name": "Filter collection", + "outputs": [ + { + "name": "output_filtered", + "type": "input" + }, + { + "name": "output_discarded", + "type": "input" + } + ], + "position": { + "left": 1604.8936434420398, + "top": 252.46106251153242 + }, + "post_job_actions": { + "RenameDatasetActionoutput_discarded": { + "action_arguments": { + "newname": "not first group" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_discarded" + }, + "RenameDatasetActionoutput_filtered": { + "action_arguments": { + "newname": "first group" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_filtered" + } + }, + "tool_id": "__FILTER_FROM_FILE__", + "tool_state": "{\"how\": {\"how_filter\": \"remove_if_absent\", \"__current_case__\": 0, \"filter_source\": {\"__class__\": \"ConnectedValue\"}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "8ff72610-8349-4862-b66e-1416e40ff95a", + "when": null, + "workflow_outputs": [ + { + "label": "collection_other", + "output_name": "output_discarded", + "uuid": "d8721c45-506c-4378-95fc-a951cd854fc5" + }, + { + "label": "collection_first_group", + "output_name": "output_filtered", + "uuid": "32af9f5d-c3f1-437a-8b08-6032212dd277" + } + ] + } + }, + "tags": [], + "uuid": "f3a5e3f5-f2ce-4dde-aa36-6d39c8d37855", + "version": 4 +} \ No newline at end of file diff --git a/workflows/data-manipulation/split-collection/test-data/file.txt b/workflows/data-manipulation/split-collection/test-data/file.txt new file mode 100644 index 000000000..982793c32 --- /dev/null +++ b/workflows/data-manipulation/split-collection/test-data/file.txt @@ -0,0 +1 @@ +whatever diff --git a/workflows/data-manipulation/split-collection/test-data/group_asignment.txt b/workflows/data-manipulation/split-collection/test-data/group_asignment.txt new file mode 100644 index 000000000..2676868b4 --- /dev/null +++ b/workflows/data-manipulation/split-collection/test-data/group_asignment.txt @@ -0,0 +1,5 @@ +cat1_1 1 +cat1_2 1 +cat1_3 1 +cat2_1 2 +cat3_1 3