galaxyproject · lldelisle · Nov 8, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/workflows/data-manipulation/split-collection/.dockstore.yml b/workflows/data-manipulation/split-collection/.dockstore.yml
@@ -0,0 +1,29 @@
+version: 1.2
+workflows:
+- name: Split-collection-by-pattern-in-identifiers
+  subclass: Galaxy
+  publish: true
+  primaryDescriptorPath: /Split-collection-by-pattern-in-identifiers.ga
+  testParameterFiles:
+  - /Split-collection-by-pattern-in-identifiers-tests.yml
+  authors:
+  - name: Lucille Delisle
+    orcid: 0000-0002-1964-4960
+- name: Split-collection-using-tabular
+  subclass: Galaxy
+  publish: true
+  primaryDescriptorPath: /Split-collection-using-tabular.ga
+  testParameterFiles:
+  - /Split-collection-using-tabular-tests.yml
+  authors:
+  - name: Lucille Delisle
+    orcid: 0000-0002-1964-4960
+- name: Split-collection-using-comma-separated-list
+  subclass: Galaxy
+  publish: true
+  primaryDescriptorPath: /Split-collection-using-comma-separated-list.ga
+  testParameterFiles:
+  - /Split-collection-using-comma-separated-list-tests.yml
+  authors:
+  - name: Lucille Delisle
+    orcid: 0000-0002-1964-4960
diff --git a/workflows/data-manipulation/split-collection/CHANGELOG.md b/workflows/data-manipulation/split-collection/CHANGELOG.md
@@ -0,0 +1,5 @@
+# Changelog
+
+## [0.1] 2024-11-12
+
+First release
diff --git a/workflows/data-manipulation/split-collection/README.md b/workflows/data-manipulation/split-collection/README.md
@@ -0,0 +1,14 @@
+# Split collection
+
+These workflows allow to split a collection into 2 using identifiers.
+
+The common input to all workflows is a collection of type 'list'.
+
+The way to split the collection differs with the workflow.
+
+- In the workflow "Split collection by pattern in identifiers", you need to specify a "pattern". This is a word that is present only in one part of your samples. This will split your collection into 2: one with the identifiers which have the 'pattern' and the other one with the identifiers which don't have.
+- In the workflow "Split collection using tabular", you need to give a tabular where the first column is the identifier and the second column is the group (no header). All identifiers where the second column match the first item will be grouped into a collection. Others will be in another collection.
+- In the workflow "Split collection using comma separated list", you need to give the group of each item of your collection separated by comma. For example, if you have 3 items in your collection, you can put "1,1,2" to put the first 2 together and the third one appart.
+
+Warnings:
+- If you specify more than 2 groups in the second and third workflow, it will not create 3 collections.
diff --git a/...s/data-manipulation/split-collection/Split-collection-by-pattern-in-identifiers-tests.yml b/...s/data-manipulation/split-collection/Split-collection-by-pattern-in-identifiers-tests.yml
@@ -0,0 +1,51 @@
+- doc: Test outline for Split-collection-by-pattern-in-identifiers.ga
+  job:
+    Input Dataset Collection:
+      class: Collection
+      collection_type: list
+      elements:
+      - class: File
+        identifier: cat1_1
+        path: test-data/file.txt
+      - class: File
+        identifier: cat1_2
+        path: test-data/file.txt
+      - class: File
+        identifier: cat1_3
+        path: test-data/file.txt
+      - class: File
+        identifier: cat2_1
+        path: test-data/file.txt
+      - class: File
+        identifier: cat3_1
+        path: test-data/file.txt
+    pattern: cat1
+  outputs:
+    unselected_collection:
+      class: Collection
+      collection_type: list
+      element_tests:
+        cat2_1:
+          asserts:
+            has_text:
+              text: "whatever"
+        cat3_1:
+          asserts:
+            has_text:
+              text: "whatever"
+    selected_collection:
+      class: Collection
+      collection_type: list
+      element_tests:
+        cat1_1:
+          asserts:
+            has_text:
+              text: "whatever"
+        cat1_2:
+          asserts:
+            has_text:
+              text: "whatever"
+        cat1_3:
+          asserts:
+            has_text:
+              text: "whatever"
diff --git a/workflows/data-manipulation/split-collection/Split-collection-by-pattern-in-identifiers.ga b/workflows/data-manipulation/split-collection/Split-collection-by-pattern-in-identifiers.ga
@@ -0,0 +1,244 @@
+{
+    "a_galaxy_workflow": "true",
+    "annotation": "This workflow takes a collection and returns 2 collections. One with the items which contains the pattern, one with the items which do not contains the pattern.",
+    "comments": [],
+    "creator": [
+        {
+            "class": "Person",
+            "identifier": "https://orcid.org/0000-0002-1964-4960",
+            "name": "Lucille Delisle"
+        }
+    ],
+    "format-version": "0.1",
+    "license": "MIT",
+    "release": "0.1",
+    "name": "Split collection by pattern in element identifiers",
+    "report": {
+        "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n"
+    },
+    "steps": {
+        "0": {
+            "annotation": "Collection you want to split into 2",
+            "content_id": null,
+            "errors": null,
+            "id": 0,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "Collection you want to split into 2",
+                    "name": "Input Dataset Collection"
+                }
+            ],
+            "label": "Input Dataset Collection",
+            "name": "Input dataset collection",
+            "outputs": [],
+            "position": {
+                "left": 0,
+                "top": 72
+            },
+            "tool_id": null,
+            "tool_state": "{\"optional\": false, \"tag\": null, \"collection_type\": \"list\"}",
+            "tool_version": null,
+            "type": "data_collection_input",
+            "uuid": "3065a6b3-a33e-4eb3-97d7-13681b2b4e40",
+            "when": null,
+            "workflow_outputs": []
+        },
+        "1": {
+            "annotation": "pattern used to select the items in the collection",
+            "content_id": null,
+            "errors": null,
+            "id": 1,
+            "input_connections": {},
+            "inputs": [
+                {
+                    "description": "pattern used to select the items in the collection",
+                    "name": "pattern"
+                }
+            ],
+            "label": "pattern",
+            "name": "Input parameter",
+            "outputs": [],
+            "position": {
+                "left": 43,
+                "top": 177.5
+            },
+            "tool_id": null,
+            "tool_state": "{\"parameter_type\": \"text\", \"optional\": false}",
+            "tool_version": null,
+            "type": "parameter_input",
+            "uuid": "3070ffe4-0f1a-47f4-92a2-98024da29942",
+            "when": null,
+            "workflow_outputs": []
+        },
+        "2": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2",
+            "errors": null,
+            "id": 2,
+            "input_connections": {
+                "input_collection": {
+                    "id": 0,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [],
+            "label": null,
+            "name": "Extract element identifiers",
+            "outputs": [
+                {
+                    "name": "output",
+                    "type": "txt"
+                }
+            ],
+            "position": {
+                "left": 299,
+                "top": 0
+            },
+            "post_job_actions": {
+                "HideDatasetActionoutput": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "output"
+                }
+            },
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/collection_element_identifiers/collection_element_identifiers/0.0.2",
+            "tool_shed_repository": {
+                "changeset_revision": "d3c07d270a50",
+                "name": "collection_element_identifiers",
+                "owner": "iuc",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"input_collection\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "0.0.2",
+            "type": "tool",
+            "uuid": "0abdf45b-5993-4645-ae05-a9f5f0efa1f8",
+            "when": null,
+            "workflow_outputs": []
+        },
+        "3": {
+            "annotation": "",
+            "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_grep_tool/9.3+galaxy1",
+            "errors": null,
+            "id": 3,
+            "input_connections": {
+                "infile": {
+                    "id": 2,
+                    "output_name": "output"
+                },
+                "url_paste": {
+                    "id": 1,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [],
+            "label": "Select identifiers with pattern",
+            "name": "Search in textfiles",
+            "outputs": [
+                {
+                    "name": "output",
+                    "type": "input"
+                }
+            ],
+            "position": {
+                "left": 577,
+                "top": 106
+            },
+            "post_job_actions": {
+                "HideDatasetActionoutput": {
+                    "action_arguments": {},
+                    "action_type": "HideDatasetAction",
+                    "output_name": "output"
+                }
+            },
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_grep_tool/9.3+galaxy1",
+            "tool_shed_repository": {
+                "changeset_revision": "86755160afbf",
+                "name": "text_processing",
+                "owner": "bgruening",
+                "tool_shed": "toolshed.g2.bx.psu.edu"
+            },
+            "tool_state": "{\"case_sensitive\": \"-i\", \"color\": \"NOCOLOR\", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"invert\": \"\", \"lines_after\": \"0\", \"lines_before\": \"0\", \"regex_type\": \"-P\", \"url_paste\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "9.3+galaxy1",
+            "type": "tool",
+            "uuid": "ec998903-14ca-463e-bbde-1d0ff8c5d0f8",
+            "when": null,
+            "workflow_outputs": []
+        },
+        "4": {
+            "annotation": "",
+            "content_id": "__FILTER_FROM_FILE__",
+            "errors": null,
+            "id": 4,
+            "input_connections": {
+                "how|filter_source": {
+                    "id": 3,
+                    "output_name": "output"
+                },
+                "input": {
+                    "id": 0,
+                    "output_name": "output"
+                }
+            },
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool Filter collection",
+                    "name": "how"
+                }
+            ],
+            "label": "Split collection into 2",
+            "name": "Filter collection",
+            "outputs": [
+                {
+                    "name": "output_filtered",
+                    "type": "input"
+                },
+                {
+                    "name": "output_discarded",
+                    "type": "input"
+                }
+            ],
+            "position": {
+                "left": 797,
+                "top": 106
+            },
+            "post_job_actions": {
+                "RenameDatasetActionoutput_discarded": {
+                    "action_arguments": {
+                        "newname": "not ${pattern}"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "output_discarded"
+                },
+                "RenameDatasetActionoutput_filtered": {
+                    "action_arguments": {
+                        "newname": "${pattern}"
+                    },
+                    "action_type": "RenameDatasetAction",
+                    "output_name": "output_filtered"
+                }
+            },
+            "tool_id": "__FILTER_FROM_FILE__",
+            "tool_state": "{\"how\": {\"how_filter\": \"remove_if_absent\", \"__current_case__\": 0, \"filter_source\": {\"__class__\": \"ConnectedValue\"}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}",
+            "tool_version": "1.0.0",
+            "type": "tool",
+            "uuid": "68b2babb-f4c1-4e5f-ba3c-e00704baa798",
+            "when": null,
+            "workflow_outputs": [
+                {
+                    "label": "unselected_collection",
+                    "output_name": "output_discarded",
+                    "uuid": "c94a017b-41bd-41a1-9eb5-c2a26c475c53"
+                },
+                {
+                    "label": "selected_collection",
+                    "output_name": "output_filtered",
+                    "uuid": "5a70e977-f0fc-48ca-8ec1-947b2da01a67"
+                }
+            ]
+        }
+    },
+    "tags": [],
+    "uuid": "67c71ce5-ce8e-4d09-8e6c-e45d2ced17b6",
+    "version": 3
+}