diff --git a/gxformat2/lint.py b/gxformat2/lint.py new file mode 100644 index 0000000..8aea660 --- /dev/null +++ b/gxformat2/lint.py @@ -0,0 +1,60 @@ +import sys + +from gxformat2._yaml import ordered_load + +EXIT_CODE_SUCCESS = 0 +EXIT_CODE_LINT_FAILED = 1 +EXIT_CODE_FORMAT_ERROR = 2 +EXIT_CODE_FILE_PARSE_FAILED = 3 + + +def lint_ga(workflow_dict): + if workflow_dict.get("format-version") != "0.1": + return EXIT_CODE_FORMAT_ERROR + if workflow_dict.get("a_galaxy_workflow") != "true": + return EXIT_CODE_FORMAT_ERROR + + native_steps = workflow_dict.get("steps") + found_outputs = False + found_output_without_label = False + + for step in native_steps.values(): + for workflow_output in step.get("workflow_outputs", []): + found_outputs = True + + if not workflow_output.get("label"): + found_output_without_label = True + + if not found_outputs: + return EXIT_CODE_LINT_FAILED + + if found_output_without_label: + return EXIT_CODE_LINT_FAILED + + return EXIT_CODE_SUCCESS + + +def lint_format2(workflow_dict): + # Lint for outputs... + if not workflow_dict.get("outputs", None): + return EXIT_CODE_LINT_FAILED + return EXIT_CODE_SUCCESS + + +def main(argv): + path = argv[1] + with open(path, "r") as f: + try: + workflow_dict = ordered_load(f) + except Exception: + return EXIT_CODE_FILE_PARSE_FAILED + workflow_class = workflow_dict.get("class") + if workflow_class == "GalaxyWorkflow": + exit_code = lint_format2(workflow_dict) + else: + exit_code = lint_ga(workflow_dict) + return exit_code + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/tests/_helpers.py b/tests/_helpers.py new file mode 100644 index 0000000..f1e5304 --- /dev/null +++ b/tests/_helpers.py @@ -0,0 +1,28 @@ +from gxformat2.converter import yaml_to_workflow +from gxformat2.interface import ImporterGalaxyInterface + + +TEST_GOOD_WORKFLOW = """ +""" + + +def to_native(has_yaml, **kwds): + return yaml_to_workflow(has_yaml, MockGalaxyInterface(), None, **kwds) + + +def assert_valid_native(as_dict_native): + assert as_dict_native["a_galaxy_workflow"] == "true" + assert as_dict_native["format-version"] == "0.1" + assert "steps" in as_dict_native + step_count = 0 + for key, value in as_dict_native["steps"].items(): + assert key == str(step_count) + step_count += 1 + assert "type" in value + assert value["type"] in ["data_input", "data_collection_input", "tool", "subworkflow"] + + +class MockGalaxyInterface(ImporterGalaxyInterface): + + def import_workflow(self, workflow, **kwds): + pass diff --git a/tests/test_basic.py b/tests/test_basic.py index e9167a2..77fa59f 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -1,6 +1,6 @@ -from gxformat2.converter import ImportOptions, yaml_to_workflow +from gxformat2.converter import ImportOptions from gxformat2.export import from_galaxy_native -from gxformat2.interface import ImporterGalaxyInterface +from ._helpers import to_native, assert_valid_native def test_import_export(): @@ -199,28 +199,6 @@ def from_native(native_as_dict): return from_galaxy_native(native_as_dict, None) -def to_native(has_yaml, **kwds): - return yaml_to_workflow(has_yaml, MockGalaxyInterface(), None, **kwds) - - def assert_valid_format2(as_dict_format2): assert as_dict_format2["class"] == "GalaxyWorkflow" assert "steps" in as_dict_format2 - - -def assert_valid_native(as_dict_native): - assert as_dict_native["a_galaxy_workflow"] == "true" - assert as_dict_native["format-version"] == "0.1" - assert "steps" in as_dict_native - step_count = 0 - for key, value in as_dict_native["steps"].items(): - assert key == str(step_count) - step_count += 1 - assert "type" in value - assert value["type"] in ["data_input", "data_collection_input", "tool", "subworkflow"] - - -class MockGalaxyInterface(ImporterGalaxyInterface): - - def import_workflow(self, workflow, **kwds): - pass diff --git a/tests/test_lint.py b/tests/test_lint.py new file mode 100644 index 0000000..8169bf7 --- /dev/null +++ b/tests/test_lint.py @@ -0,0 +1,150 @@ +import copy +import json +import os +import tempfile + +from gxformat2.lint import main +from gxformat2._yaml import ordered_dump, ordered_load +from ._helpers import assert_valid_native, to_native + + +TEST_PATH = os.path.abspath(os.path.dirname(__file__)) +TEST_EXAMPLES = os.path.join(TEST_PATH, "examples") + +BASIC_WORKFLOW = """ +class: GalaxyWorkflow +doc: | + Simple workflow that no-op cats a file and then selects 10 random lines. +inputs: + the_input: + type: File + doc: input doc +outputs: + the_output: + outputSource: cat/out_file1 +steps: + cat: + tool_id: cat1 + doc: cat doc + in: + input1: the_input +""" + +WORKFLOW_WITH_REPEAT = """ +class: GalaxyWorkflow +inputs: + input1: data +outputs: + out1: + outputSource: first_cat/out_file1 +steps: + first_cat: + tool_id: cat + in: + input1: input1 + queries_0|input2: input1 + queries_1|input2: input1 +""" + + +RULES_TOOL = """ +class: GalaxyWorkflow +inputs: + input_c: collection +outputs: + out1: + outputSource: random_lines/out_file1 +steps: + apply: + tool_id: __APPLY_RULES__ + state: + input: + $link: input_c + rules: + rules: + - type: add_column_metadata + value: identifier0 + - type: add_column_metadata + value: identifier0 + mapping: + - type: list_identifiers + columns: [0, 1] + random_lines: + tool_id: random_lines1 + state: + num_lines: 1 + input: + $link: apply#output + seed_source: + seed_source_selector: set_seed + seed: asdf +""" + + +def setup_module(module): + # Setup an examples directory with examples we want to correspond to what exit codes, + # do this so we can run same tests in Java. + green_format2 = ordered_load(BASIC_WORKFLOW) + _dump_with_exit_code(green_format2, 0, "basic_format2") + green_native = to_native(BASIC_WORKFLOW) + assert_valid_native(green_native) + _dump_with_exit_code(green_native, 0, "basic_native") + + invalid_format2_no_format_dict = copy.deepcopy(green_format2) + del invalid_format2_no_format_dict["class"] + _dump_with_exit_code(invalid_format2_no_format_dict, 2, "format2_no_class") + + invalid_ga_no_format_dict = copy.deepcopy(green_native) + del invalid_ga_no_format_dict["a_galaxy_workflow"] + _dump_with_exit_code(invalid_ga_no_format_dict, 2, "native_no_class") + + red_ga_no_outputs = copy.deepcopy(green_native) + red_ga_no_outputs_steps = red_ga_no_outputs.get("steps") + for step in red_ga_no_outputs_steps.values(): + step.pop("workflow_outputs", None) + _dump_with_exit_code(red_ga_no_outputs, 1, "native_no_outputs") + + red_ga_no_output_labels = copy.deepcopy(green_native) + red_ga_no_output_labels_steps = red_ga_no_output_labels.get("steps") + for step in red_ga_no_output_labels_steps.values(): + for workflow_output in step.get("workflow_outputs", []): + workflow_output["label"] = None + _dump_with_exit_code(red_ga_no_outputs, 1, "native_no_output_labels") + + red_format2_no_outputs = copy.deepcopy(green_format2) + del red_format2_no_outputs["outputs"] + _dump_with_exit_code(red_format2_no_outputs, 1, "format2_no_output") + + green_format2_rules = ordered_load(RULES_TOOL) + _dump_with_exit_code(green_format2_rules, 0, "format2_rules") + + green_native_rules = to_native(RULES_TOOL) + _dump_with_exit_code(green_native_rules, 0, "native_format") + + green_format2_repeat = ordered_load(WORKFLOW_WITH_REPEAT) + _dump_with_exit_code(green_format2_repeat, 0, "format2_repeat") + green_native_rules = to_native(WORKFLOW_WITH_REPEAT) + _dump_with_exit_code(green_native_rules, 0, "native_repeat") + + +def test_lint_ga_basic(): + assert main(["lint", os.path.join(TEST_PATH, "wf3-shed-tools-raw.ga")]) == 1 # no outputs + + +def test_lint_examples(): + for file_name in os.listdir(TEST_EXAMPLES): + file_path = os.path.join(TEST_EXAMPLES, file_name) + expected_exit_code = int(file_name[0]) + actual_exit_code = main(["lint", file_path]) + if actual_exit_code != expected_exit_code: + contents = open(file_path, "r").read() + template = "File [%s] didn't lint properly - expected exit code [%d], got [%d]. Contents:\n%s" + raise AssertionError(template % (file_name, expected_exit_code, actual_exit_code, contents)) + + +def _dump_with_exit_code(as_dict, exit_code, description): + if not os.path.exists(TEST_EXAMPLES): + os.makedirs(TEST_EXAMPLES) + with open(os.path.join(TEST_EXAMPLES, "%d_%s.yml" % (exit_code, description)), "w") as fd: + ordered_dump(as_dict, fd) + fd.flush() diff --git a/tests/wf3-shed-tools-raw.ga b/tests/wf3-shed-tools-raw.ga new file mode 100644 index 0000000..0c0ef8d --- /dev/null +++ b/tests/wf3-shed-tools-raw.ga @@ -0,0 +1 @@ +{"uuid": "eb7fc9c5-992b-495a-9baa-4a9bb67d6262", "tags": [], "format-version": "0.1", "name": "Workflow constructed from history 'Cell paper'", "steps": {"0": {"tool_id": null, "tool_version": null, "outputs": [], "workflow_outputs": [], "input_connections": {}, "tool_state": "{\"name\": \"ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE37nnn/GSE37268/suppl/GSE37268%5Fmof3%2Eout%2Ehpeak%2Etxt%2E\"}", "id": 0, "uuid": "4216f74b-f994-432a-b5be-2a6b981ac16a", "errors": null, "name": "Input dataset", "label": "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE37nnn/GSE37268/suppl/GSE37268%5Fmof3%2Eout%2Ehpeak%2Etxt%2E", "inputs": [{"name": "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE37nnn/GSE37268/suppl/GSE37268%5Fmof3%2Eout%2Ehpeak%2Etxt%2E", "description": ""}], "position": {"top": 335, "left": 200}, "annotation": "", "content_id": null, "type": "data_input"}, "1": {"tool_id": null, "tool_version": null, "outputs": [], "workflow_outputs": [], "input_connections": {}, "tool_state": "{\"name\": \"Genes\"}", "id": 1, "uuid": "0d2e344b-fd58-462c-bd21-c60acd045d9d", "errors": null, "name": "Input dataset", "label": "Genes", "inputs": [{"name": "Genes", "description": ""}], "position": {"top": 455, "left": 200}, "annotation": "", "content_id": null, "type": "data_input"}, "2": {"tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "tool_version": "1.1.0", "outputs": [{"type": "input", "name": "outfile"}], "workflow_outputs": [], "input_connections": {"infile": {"output_name": "output", "id": 0}}, "tool_state": "{\"__page__\": null, \"find_pattern\": \"\\\"[0-9]+\\\"\", \"column\": \"\\\"1\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"replace_pattern\": \"\\\"chr&\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", "id": 2, "tool_shed_repository": {"owner": "bgruening", "changeset_revision": "20344ce0c811", "name": "text_processing", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "d40b3d69-2a99-4b00-bf75-7ded995dfbd7", "errors": null, "name": "Replace Text", "post_job_actions": {}, "label": null, "inputs": [{"name": "infile", "description": "runtime parameter for tool Replace Text"}], "position": {"top": 335, "left": 420}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0", "tool_version": "1.0.0", "outputs": [{"type": "input", "name": "out_file1"}], "workflow_outputs": [], "input_connections": {"input": {"output_name": "output", "id": 1}}, "tool_state": "{\"__page__\": null, \"direction\": \"\\\"Upstream\\\"\", \"region\": \"\\\"start\\\"\", \"offset\": \"\\\"10000\\\"\", \"input\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/mm9.len\\\"\", \"__rerun_remap_job_id__\": null, \"size\": \"\\\"12000\\\"\"}", "id": 3, "tool_shed_repository": {"owner": "devteam", "changeset_revision": "077f404ae1bb", "name": "get_flanks", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "652af7e8-710b-4476-952f-484d6a2e4a7a", "errors": null, "name": "Get flanks", "post_job_actions": {}, "label": null, "inputs": [{"name": "input", "description": "runtime parameter for tool Get flanks"}], "position": {"top": 475, "left": 415}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0", "type": "tool"}, "4": {"tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "tool_version": "1.1.0", "outputs": [{"type": "input", "name": "outfile"}], "workflow_outputs": [], "input_connections": {"infile": {"output_name": "outfile", "id": 2}}, "tool_state": "{\"__page__\": null, \"find_pattern\": \"\\\"chr20\\\"\", \"column\": \"\\\"1\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"replace_pattern\": \"\\\"chrX\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", "id": 4, "tool_shed_repository": {"owner": "bgruening", "changeset_revision": "20344ce0c811", "name": "text_processing", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "37f15259-3b03-4461-8da3-6e8ee34fd2f5", "errors": null, "name": "Replace Text", "post_job_actions": {}, "label": null, "inputs": [{"name": "infile", "description": "runtime parameter for tool Replace Text"}], "position": {"top": 335, "left": 640}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "type": "tool"}, "5": {"tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "tool_version": "1.1.0", "outputs": [{"type": "input", "name": "outfile"}], "workflow_outputs": [], "input_connections": {"infile": {"output_name": "outfile", "id": 4}}, "tool_state": "{\"__page__\": null, \"find_pattern\": \"\\\"chr21\\\"\", \"column\": \"\\\"1\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"replace_pattern\": \"\\\"chrY\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", "id": 5, "tool_shed_repository": {"owner": "bgruening", "changeset_revision": "20344ce0c811", "name": "text_processing", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "8ce00200-94b7-4daf-a0e5-e3b8d6d87eeb", "errors": null, "name": "Replace Text", "post_job_actions": {}, "label": null, "inputs": [{"name": "infile", "description": "runtime parameter for tool Replace Text"}], "position": {"top": 335, "left": 860}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "type": "tool"}, "6": {"tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_intersectbed/2.27.0.1", "tool_version": "2.27.0.1", "outputs": [{"type": "input", "name": "output"}], "workflow_outputs": [], "input_connections": {"inputA": {"output_name": "out_file1", "id": 3}, "reduce_or_iterate|inputB": {"output_name": "outfile", "id": 5}}, "tool_state": "{\"count\": \"\\\"false\\\"\", \"__page__\": null, \"reciprocal\": \"\\\"false\\\"\", \"overlap_mode\": \"\\\"-wa\\\"\", \"invert\": \"\\\"false\\\"\", \"header\": \"\\\"false\\\"\", \"inputA\": \"{\\\"values\\\": [{\\\"src\\\": \\\"hda\\\", \\\"id\\\": 6112359}]}\", \"reduce_or_iterate\": \"{\\\"inputB\\\": {\\\"values\\\": [{\\\"src\\\": \\\"hda\\\", \\\"id\\\": 6112359}]}, \\\"reduce_or_iterate_selector\\\": \\\"iterate\\\", \\\"__current_case__\\\": 0}\", \"split\": \"\\\"false\\\"\", \"fraction\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"strand\": \"\\\"\\\"\", \"once\": \"\\\"false\\\"\"}", "id": 6, "tool_shed_repository": {"owner": "iuc", "changeset_revision": "33c3ddea63c5", "name": "bedtools", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "c65fd1f5-9fc2-4acf-96c6-94ea61906042", "errors": null, "name": "Intersect intervals", "post_job_actions": {}, "label": null, "inputs": [], "position": {"top": 655, "left": 1022}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_intersectbed/2.27.0.1", "type": "tool"}, "7": {"tool_id": "Grouping1", "tool_version": "2.1.1", "outputs": [{"type": "tabular", "name": "out_file1"}], "workflow_outputs": [], "input_connections": {"input1": {"output_name": "output", "id": 6}}, "tool_state": "{\"operations\": \"[{\\\"opcol\\\": \\\"1\\\", \\\"__index__\\\": 0, \\\"optype\\\": \\\"length\\\", \\\"opround\\\": \\\"no\\\"}]\", \"__page__\": null, \"input1\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"ignorelines\": \"null\", \"groupcol\": \"\\\"1\\\"\", \"__rerun_remap_job_id__\": null, \"ignorecase\": \"\\\"false\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/mm9.len\\\"\"}", "id": 7, "uuid": "be264526-e498-4050-b6cd-f7a842268d4a", "errors": null, "name": "Group", "post_job_actions": {}, "label": null, "inputs": [{"name": "input1", "description": "runtime parameter for tool Group"}], "position": {"top": 335, "left": 1300}, "annotation": "", "content_id": "Grouping1", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"} \ No newline at end of file