-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement some basic workflow linting.
- Loading branch information
Showing
5 changed files
with
241 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import sys | ||
|
||
from gxformat2._yaml import ordered_load | ||
|
||
EXIT_CODE_SUCCESS = 0 | ||
EXIT_CODE_LINT_FAILED = 1 | ||
EXIT_CODE_FORMAT_ERROR = 2 | ||
EXIT_CODE_FILE_PARSE_FAILED = 3 | ||
|
||
|
||
def lint_ga(workflow_dict): | ||
if workflow_dict.get("format-version") != "0.1": | ||
return EXIT_CODE_FORMAT_ERROR | ||
if workflow_dict.get("a_galaxy_workflow") != "true": | ||
return EXIT_CODE_FORMAT_ERROR | ||
|
||
native_steps = workflow_dict.get("steps") | ||
found_outputs = False | ||
found_output_without_label = False | ||
|
||
for step in native_steps.values(): | ||
for workflow_output in step.get("workflow_outputs", []): | ||
found_outputs = True | ||
|
||
if not workflow_output.get("label"): | ||
found_output_without_label = True | ||
|
||
if not found_outputs: | ||
return EXIT_CODE_LINT_FAILED | ||
|
||
if found_output_without_label: | ||
return EXIT_CODE_LINT_FAILED | ||
|
||
return EXIT_CODE_SUCCESS | ||
|
||
|
||
def lint_format2(workflow_dict): | ||
# Lint for outputs... | ||
if not workflow_dict.get("outputs", None): | ||
return EXIT_CODE_LINT_FAILED | ||
return EXIT_CODE_SUCCESS | ||
|
||
|
||
def main(argv): | ||
path = argv[1] | ||
with open(path, "r") as f: | ||
try: | ||
workflow_dict = ordered_load(f) | ||
except Exception: | ||
return EXIT_CODE_FILE_PARSE_FAILED | ||
workflow_class = workflow_dict.get("class") | ||
if workflow_class == "GalaxyWorkflow": | ||
exit_code = lint_format2(workflow_dict) | ||
else: | ||
exit_code = lint_ga(workflow_dict) | ||
return exit_code | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(main(sys.argv)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
from gxformat2.converter import yaml_to_workflow | ||
from gxformat2.interface import ImporterGalaxyInterface | ||
|
||
|
||
TEST_GOOD_WORKFLOW = """ | ||
""" | ||
|
||
|
||
def to_native(has_yaml, **kwds): | ||
return yaml_to_workflow(has_yaml, MockGalaxyInterface(), None, **kwds) | ||
|
||
|
||
def assert_valid_native(as_dict_native): | ||
assert as_dict_native["a_galaxy_workflow"] == "true" | ||
assert as_dict_native["format-version"] == "0.1" | ||
assert "steps" in as_dict_native | ||
step_count = 0 | ||
for key, value in as_dict_native["steps"].items(): | ||
assert key == str(step_count) | ||
step_count += 1 | ||
assert "type" in value | ||
assert value["type"] in ["data_input", "data_collection_input", "tool", "subworkflow"] | ||
|
||
|
||
class MockGalaxyInterface(ImporterGalaxyInterface): | ||
|
||
def import_workflow(self, workflow, **kwds): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
import copy | ||
import json | ||
import os | ||
import tempfile | ||
|
||
from gxformat2.lint import main | ||
from gxformat2._yaml import ordered_dump, ordered_load | ||
from ._helpers import assert_valid_native, to_native | ||
|
||
|
||
TEST_PATH = os.path.abspath(os.path.dirname(__file__)) | ||
TEST_EXAMPLES = os.path.join(TEST_PATH, "examples") | ||
|
||
BASIC_WORKFLOW = """ | ||
class: GalaxyWorkflow | ||
doc: | | ||
Simple workflow that no-op cats a file and then selects 10 random lines. | ||
inputs: | ||
the_input: | ||
type: File | ||
doc: input doc | ||
outputs: | ||
the_output: | ||
outputSource: cat/out_file1 | ||
steps: | ||
cat: | ||
tool_id: cat1 | ||
doc: cat doc | ||
in: | ||
input1: the_input | ||
""" | ||
|
||
WORKFLOW_WITH_REPEAT = """ | ||
class: GalaxyWorkflow | ||
inputs: | ||
input1: data | ||
outputs: | ||
out1: | ||
outputSource: first_cat/out_file1 | ||
steps: | ||
first_cat: | ||
tool_id: cat | ||
in: | ||
input1: input1 | ||
queries_0|input2: input1 | ||
queries_1|input2: input1 | ||
""" | ||
|
||
|
||
RULES_TOOL = """ | ||
class: GalaxyWorkflow | ||
inputs: | ||
input_c: collection | ||
outputs: | ||
out1: | ||
outputSource: random_lines/out_file1 | ||
steps: | ||
apply: | ||
tool_id: __APPLY_RULES__ | ||
state: | ||
input: | ||
$link: input_c | ||
rules: | ||
rules: | ||
- type: add_column_metadata | ||
value: identifier0 | ||
- type: add_column_metadata | ||
value: identifier0 | ||
mapping: | ||
- type: list_identifiers | ||
columns: [0, 1] | ||
random_lines: | ||
tool_id: random_lines1 | ||
state: | ||
num_lines: 1 | ||
input: | ||
$link: apply#output | ||
seed_source: | ||
seed_source_selector: set_seed | ||
seed: asdf | ||
""" | ||
|
||
|
||
def setup_module(module): | ||
# Setup an examples directory with examples we want to correspond to what exit codes, | ||
# do this so we can run same tests in Java. | ||
green_format2 = ordered_load(BASIC_WORKFLOW) | ||
_dump_with_exit_code(green_format2, 0, "basic_format2") | ||
green_native = to_native(BASIC_WORKFLOW) | ||
assert_valid_native(green_native) | ||
_dump_with_exit_code(green_native, 0, "basic_native") | ||
|
||
invalid_format2_no_format_dict = copy.deepcopy(green_format2) | ||
del invalid_format2_no_format_dict["class"] | ||
_dump_with_exit_code(invalid_format2_no_format_dict, 2, "format2_no_class") | ||
|
||
invalid_ga_no_format_dict = copy.deepcopy(green_native) | ||
del invalid_ga_no_format_dict["a_galaxy_workflow"] | ||
_dump_with_exit_code(invalid_ga_no_format_dict, 2, "native_no_class") | ||
|
||
red_ga_no_outputs = copy.deepcopy(green_native) | ||
red_ga_no_outputs_steps = red_ga_no_outputs.get("steps") | ||
for step in red_ga_no_outputs_steps.values(): | ||
step.pop("workflow_outputs", None) | ||
_dump_with_exit_code(red_ga_no_outputs, 1, "native_no_outputs") | ||
|
||
red_ga_no_output_labels = copy.deepcopy(green_native) | ||
red_ga_no_output_labels_steps = red_ga_no_output_labels.get("steps") | ||
for step in red_ga_no_output_labels_steps.values(): | ||
for workflow_output in step.get("workflow_outputs", []): | ||
workflow_output["label"] = None | ||
_dump_with_exit_code(red_ga_no_outputs, 1, "native_no_output_labels") | ||
|
||
red_format2_no_outputs = copy.deepcopy(green_format2) | ||
del red_format2_no_outputs["outputs"] | ||
_dump_with_exit_code(red_format2_no_outputs, 1, "format2_no_output") | ||
|
||
green_format2_rules = ordered_load(RULES_TOOL) | ||
_dump_with_exit_code(green_format2_rules, 0, "format2_rules") | ||
|
||
green_native_rules = to_native(RULES_TOOL) | ||
_dump_with_exit_code(green_native_rules, 0, "native_format") | ||
|
||
green_format2_repeat = ordered_load(WORKFLOW_WITH_REPEAT) | ||
_dump_with_exit_code(green_format2_repeat, 0, "format2_repeat") | ||
green_native_rules = to_native(WORKFLOW_WITH_REPEAT) | ||
_dump_with_exit_code(green_native_rules, 0, "native_repeat") | ||
|
||
|
||
def test_lint_ga_basic(): | ||
assert main(["lint", os.path.join(TEST_PATH, "wf3-shed-tools-raw.ga")]) == 1 # no outputs | ||
|
||
|
||
def test_lint_examples(): | ||
for file_name in os.listdir(TEST_EXAMPLES): | ||
file_path = os.path.join(TEST_EXAMPLES, file_name) | ||
expected_exit_code = int(file_name[0]) | ||
actual_exit_code = main(["lint", file_path]) | ||
if actual_exit_code != expected_exit_code: | ||
contents = open(file_path, "r").read() | ||
template = "File [%s] didn't lint properly - expected exit code [%d], got [%d]. Contents:\n%s" | ||
raise AssertionError(template % (file_name, expected_exit_code, actual_exit_code, contents)) | ||
|
||
|
||
def _dump_with_exit_code(as_dict, exit_code, description): | ||
if not os.path.exists(TEST_EXAMPLES): | ||
os.makedirs(TEST_EXAMPLES) | ||
with open(os.path.join(TEST_EXAMPLES, "%d_%s.yml" % (exit_code, description)), "w") as fd: | ||
ordered_dump(as_dict, fd) | ||
fd.flush() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"uuid": "eb7fc9c5-992b-495a-9baa-4a9bb67d6262", "tags": [], "format-version": "0.1", "name": "Workflow constructed from history 'Cell paper'", "steps": {"0": {"tool_id": null, "tool_version": null, "outputs": [], "workflow_outputs": [], "input_connections": {}, "tool_state": "{\"name\": \"ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE37nnn/GSE37268/suppl/GSE37268%5Fmof3%2Eout%2Ehpeak%2Etxt%2E\"}", "id": 0, "uuid": "4216f74b-f994-432a-b5be-2a6b981ac16a", "errors": null, "name": "Input dataset", "label": "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE37nnn/GSE37268/suppl/GSE37268%5Fmof3%2Eout%2Ehpeak%2Etxt%2E", "inputs": [{"name": "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE37nnn/GSE37268/suppl/GSE37268%5Fmof3%2Eout%2Ehpeak%2Etxt%2E", "description": ""}], "position": {"top": 335, "left": 200}, "annotation": "", "content_id": null, "type": "data_input"}, "1": {"tool_id": null, "tool_version": null, "outputs": [], "workflow_outputs": [], "input_connections": {}, "tool_state": "{\"name\": \"Genes\"}", "id": 1, "uuid": "0d2e344b-fd58-462c-bd21-c60acd045d9d", "errors": null, "name": "Input dataset", "label": "Genes", "inputs": [{"name": "Genes", "description": ""}], "position": {"top": 455, "left": 200}, "annotation": "", "content_id": null, "type": "data_input"}, "2": {"tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "tool_version": "1.1.0", "outputs": [{"type": "input", "name": "outfile"}], "workflow_outputs": [], "input_connections": {"infile": {"output_name": "output", "id": 0}}, "tool_state": "{\"__page__\": null, \"find_pattern\": \"\\\"[0-9]+\\\"\", \"column\": \"\\\"1\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"replace_pattern\": \"\\\"chr&\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", "id": 2, "tool_shed_repository": {"owner": "bgruening", "changeset_revision": "20344ce0c811", "name": "text_processing", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "d40b3d69-2a99-4b00-bf75-7ded995dfbd7", "errors": null, "name": "Replace Text", "post_job_actions": {}, "label": null, "inputs": [{"name": "infile", "description": "runtime parameter for tool Replace Text"}], "position": {"top": 335, "left": 420}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0", "tool_version": "1.0.0", "outputs": [{"type": "input", "name": "out_file1"}], "workflow_outputs": [], "input_connections": {"input": {"output_name": "output", "id": 1}}, "tool_state": "{\"__page__\": null, \"direction\": \"\\\"Upstream\\\"\", \"region\": \"\\\"start\\\"\", \"offset\": \"\\\"10000\\\"\", \"input\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/mm9.len\\\"\", \"__rerun_remap_job_id__\": null, \"size\": \"\\\"12000\\\"\"}", "id": 3, "tool_shed_repository": {"owner": "devteam", "changeset_revision": "077f404ae1bb", "name": "get_flanks", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "652af7e8-710b-4476-952f-484d6a2e4a7a", "errors": null, "name": "Get flanks", "post_job_actions": {}, "label": null, "inputs": [{"name": "input", "description": "runtime parameter for tool Get flanks"}], "position": {"top": 475, "left": 415}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0", "type": "tool"}, "4": {"tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "tool_version": "1.1.0", "outputs": [{"type": "input", "name": "outfile"}], "workflow_outputs": [], "input_connections": {"infile": {"output_name": "outfile", "id": 2}}, "tool_state": "{\"__page__\": null, \"find_pattern\": \"\\\"chr20\\\"\", \"column\": \"\\\"1\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"replace_pattern\": \"\\\"chrX\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", "id": 4, "tool_shed_repository": {"owner": "bgruening", "changeset_revision": "20344ce0c811", "name": "text_processing", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "37f15259-3b03-4461-8da3-6e8ee34fd2f5", "errors": null, "name": "Replace Text", "post_job_actions": {}, "label": null, "inputs": [{"name": "infile", "description": "runtime parameter for tool Replace Text"}], "position": {"top": 335, "left": 640}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "type": "tool"}, "5": {"tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "tool_version": "1.1.0", "outputs": [{"type": "input", "name": "outfile"}], "workflow_outputs": [], "input_connections": {"infile": {"output_name": "outfile", "id": 4}}, "tool_state": "{\"__page__\": null, \"find_pattern\": \"\\\"chr21\\\"\", \"column\": \"\\\"1\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"replace_pattern\": \"\\\"chrY\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", "id": 5, "tool_shed_repository": {"owner": "bgruening", "changeset_revision": "20344ce0c811", "name": "text_processing", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "8ce00200-94b7-4daf-a0e5-e3b8d6d87eeb", "errors": null, "name": "Replace Text", "post_job_actions": {}, "label": null, "inputs": [{"name": "infile", "description": "runtime parameter for tool Replace Text"}], "position": {"top": 335, "left": 860}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "type": "tool"}, "6": {"tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_intersectbed/2.27.0.1", "tool_version": "2.27.0.1", "outputs": [{"type": "input", "name": "output"}], "workflow_outputs": [], "input_connections": {"inputA": {"output_name": "out_file1", "id": 3}, "reduce_or_iterate|inputB": {"output_name": "outfile", "id": 5}}, "tool_state": "{\"count\": \"\\\"false\\\"\", \"__page__\": null, \"reciprocal\": \"\\\"false\\\"\", \"overlap_mode\": \"\\\"-wa\\\"\", \"invert\": \"\\\"false\\\"\", \"header\": \"\\\"false\\\"\", \"inputA\": \"{\\\"values\\\": [{\\\"src\\\": \\\"hda\\\", \\\"id\\\": 6112359}]}\", \"reduce_or_iterate\": \"{\\\"inputB\\\": {\\\"values\\\": [{\\\"src\\\": \\\"hda\\\", \\\"id\\\": 6112359}]}, \\\"reduce_or_iterate_selector\\\": \\\"iterate\\\", \\\"__current_case__\\\": 0}\", \"split\": \"\\\"false\\\"\", \"fraction\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"strand\": \"\\\"\\\"\", \"once\": \"\\\"false\\\"\"}", "id": 6, "tool_shed_repository": {"owner": "iuc", "changeset_revision": "33c3ddea63c5", "name": "bedtools", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "c65fd1f5-9fc2-4acf-96c6-94ea61906042", "errors": null, "name": "Intersect intervals", "post_job_actions": {}, "label": null, "inputs": [], "position": {"top": 655, "left": 1022}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_intersectbed/2.27.0.1", "type": "tool"}, "7": {"tool_id": "Grouping1", "tool_version": "2.1.1", "outputs": [{"type": "tabular", "name": "out_file1"}], "workflow_outputs": [], "input_connections": {"input1": {"output_name": "output", "id": 6}}, "tool_state": "{\"operations\": \"[{\\\"opcol\\\": \\\"1\\\", \\\"__index__\\\": 0, \\\"optype\\\": \\\"length\\\", \\\"opround\\\": \\\"no\\\"}]\", \"__page__\": null, \"input1\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"ignorelines\": \"null\", \"groupcol\": \"\\\"1\\\"\", \"__rerun_remap_job_id__\": null, \"ignorecase\": \"\\\"false\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/mm9.len\\\"\"}", "id": 7, "uuid": "be264526-e498-4050-b6cd-f7a842268d4a", "errors": null, "name": "Group", "post_job_actions": {}, "label": null, "inputs": [{"name": "input1", "description": "runtime parameter for tool Group"}], "position": {"top": 335, "left": 1300}, "annotation": "", "content_id": "Grouping1", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"} |