Skip to content

Commit

Permalink
Implement some basic workflow linting.
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton committed Dec 1, 2019
1 parent aef38c3 commit c79b4ec
Show file tree
Hide file tree
Showing 5 changed files with 241 additions and 24 deletions.
60 changes: 60 additions & 0 deletions gxformat2/lint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import sys

from gxformat2._yaml import ordered_load

EXIT_CODE_SUCCESS = 0
EXIT_CODE_LINT_FAILED = 1
EXIT_CODE_FORMAT_ERROR = 2
EXIT_CODE_FILE_PARSE_FAILED = 3


def lint_ga(workflow_dict):
if workflow_dict.get("format-version") != "0.1":
return EXIT_CODE_FORMAT_ERROR
if workflow_dict.get("a_galaxy_workflow") != "true":
return EXIT_CODE_FORMAT_ERROR

native_steps = workflow_dict.get("steps")
found_outputs = False
found_output_without_label = False

for step in native_steps.values():
for workflow_output in step.get("workflow_outputs", []):
found_outputs = True

if not workflow_output.get("label"):
found_output_without_label = True

if not found_outputs:
return EXIT_CODE_LINT_FAILED

if found_output_without_label:
return EXIT_CODE_LINT_FAILED

return EXIT_CODE_SUCCESS


def lint_format2(workflow_dict):
# Lint for outputs...
if not workflow_dict.get("outputs", None):
return EXIT_CODE_LINT_FAILED
return EXIT_CODE_SUCCESS


def main(argv):
path = argv[1]
with open(path, "r") as f:
try:
workflow_dict = ordered_load(f)
except Exception:
return EXIT_CODE_FILE_PARSE_FAILED
workflow_class = workflow_dict.get("class")
if workflow_class == "GalaxyWorkflow":
exit_code = lint_format2(workflow_dict)
else:
exit_code = lint_ga(workflow_dict)
return exit_code


if __name__ == "__main__":
sys.exit(main(sys.argv))
28 changes: 28 additions & 0 deletions tests/_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from gxformat2.converter import yaml_to_workflow
from gxformat2.interface import ImporterGalaxyInterface


TEST_GOOD_WORKFLOW = """
"""


def to_native(has_yaml, **kwds):
return yaml_to_workflow(has_yaml, MockGalaxyInterface(), None, **kwds)


def assert_valid_native(as_dict_native):
assert as_dict_native["a_galaxy_workflow"] == "true"
assert as_dict_native["format-version"] == "0.1"
assert "steps" in as_dict_native
step_count = 0
for key, value in as_dict_native["steps"].items():
assert key == str(step_count)
step_count += 1
assert "type" in value
assert value["type"] in ["data_input", "data_collection_input", "tool", "subworkflow"]


class MockGalaxyInterface(ImporterGalaxyInterface):

def import_workflow(self, workflow, **kwds):
pass
26 changes: 2 additions & 24 deletions tests/test_basic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from gxformat2.converter import ImportOptions, yaml_to_workflow
from gxformat2.converter import ImportOptions
from gxformat2.export import from_galaxy_native
from gxformat2.interface import ImporterGalaxyInterface
from ._helpers import to_native, assert_valid_native


def test_import_export():
Expand Down Expand Up @@ -199,28 +199,6 @@ def from_native(native_as_dict):
return from_galaxy_native(native_as_dict, None)


def to_native(has_yaml, **kwds):
return yaml_to_workflow(has_yaml, MockGalaxyInterface(), None, **kwds)


def assert_valid_format2(as_dict_format2):
assert as_dict_format2["class"] == "GalaxyWorkflow"
assert "steps" in as_dict_format2


def assert_valid_native(as_dict_native):
assert as_dict_native["a_galaxy_workflow"] == "true"
assert as_dict_native["format-version"] == "0.1"
assert "steps" in as_dict_native
step_count = 0
for key, value in as_dict_native["steps"].items():
assert key == str(step_count)
step_count += 1
assert "type" in value
assert value["type"] in ["data_input", "data_collection_input", "tool", "subworkflow"]


class MockGalaxyInterface(ImporterGalaxyInterface):

def import_workflow(self, workflow, **kwds):
pass
150 changes: 150 additions & 0 deletions tests/test_lint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import copy
import json
import os
import tempfile

from gxformat2.lint import main
from gxformat2._yaml import ordered_dump, ordered_load
from ._helpers import assert_valid_native, to_native


TEST_PATH = os.path.abspath(os.path.dirname(__file__))
TEST_EXAMPLES = os.path.join(TEST_PATH, "examples")

BASIC_WORKFLOW = """
class: GalaxyWorkflow
doc: |
Simple workflow that no-op cats a file and then selects 10 random lines.
inputs:
the_input:
type: File
doc: input doc
outputs:
the_output:
outputSource: cat/out_file1
steps:
cat:
tool_id: cat1
doc: cat doc
in:
input1: the_input
"""

WORKFLOW_WITH_REPEAT = """
class: GalaxyWorkflow
inputs:
input1: data
outputs:
out1:
outputSource: first_cat/out_file1
steps:
first_cat:
tool_id: cat
in:
input1: input1
queries_0|input2: input1
queries_1|input2: input1
"""


RULES_TOOL = """
class: GalaxyWorkflow
inputs:
input_c: collection
outputs:
out1:
outputSource: random_lines/out_file1
steps:
apply:
tool_id: __APPLY_RULES__
state:
input:
$link: input_c
rules:
rules:
- type: add_column_metadata
value: identifier0
- type: add_column_metadata
value: identifier0
mapping:
- type: list_identifiers
columns: [0, 1]
random_lines:
tool_id: random_lines1
state:
num_lines: 1
input:
$link: apply#output
seed_source:
seed_source_selector: set_seed
seed: asdf
"""


def setup_module(module):
# Setup an examples directory with examples we want to correspond to what exit codes,
# do this so we can run same tests in Java.
green_format2 = ordered_load(BASIC_WORKFLOW)
_dump_with_exit_code(green_format2, 0, "basic_format2")
green_native = to_native(BASIC_WORKFLOW)
assert_valid_native(green_native)
_dump_with_exit_code(green_native, 0, "basic_native")

invalid_format2_no_format_dict = copy.deepcopy(green_format2)
del invalid_format2_no_format_dict["class"]
_dump_with_exit_code(invalid_format2_no_format_dict, 2, "format2_no_class")

invalid_ga_no_format_dict = copy.deepcopy(green_native)
del invalid_ga_no_format_dict["a_galaxy_workflow"]
_dump_with_exit_code(invalid_ga_no_format_dict, 2, "native_no_class")

red_ga_no_outputs = copy.deepcopy(green_native)
red_ga_no_outputs_steps = red_ga_no_outputs.get("steps")
for step in red_ga_no_outputs_steps.values():
step.pop("workflow_outputs", None)
_dump_with_exit_code(red_ga_no_outputs, 1, "native_no_outputs")

red_ga_no_output_labels = copy.deepcopy(green_native)
red_ga_no_output_labels_steps = red_ga_no_output_labels.get("steps")
for step in red_ga_no_output_labels_steps.values():
for workflow_output in step.get("workflow_outputs", []):
workflow_output["label"] = None
_dump_with_exit_code(red_ga_no_outputs, 1, "native_no_output_labels")

red_format2_no_outputs = copy.deepcopy(green_format2)
del red_format2_no_outputs["outputs"]
_dump_with_exit_code(red_format2_no_outputs, 1, "format2_no_output")

green_format2_rules = ordered_load(RULES_TOOL)
_dump_with_exit_code(green_format2_rules, 0, "format2_rules")

green_native_rules = to_native(RULES_TOOL)
_dump_with_exit_code(green_native_rules, 0, "native_format")

green_format2_repeat = ordered_load(WORKFLOW_WITH_REPEAT)
_dump_with_exit_code(green_format2_repeat, 0, "format2_repeat")
green_native_rules = to_native(WORKFLOW_WITH_REPEAT)
_dump_with_exit_code(green_native_rules, 0, "native_repeat")


def test_lint_ga_basic():
assert main(["lint", os.path.join(TEST_PATH, "wf3-shed-tools-raw.ga")]) == 1 # no outputs


def test_lint_examples():
for file_name in os.listdir(TEST_EXAMPLES):
file_path = os.path.join(TEST_EXAMPLES, file_name)
expected_exit_code = int(file_name[0])
actual_exit_code = main(["lint", file_path])
if actual_exit_code != expected_exit_code:
contents = open(file_path, "r").read()
template = "File [%s] didn't lint properly - expected exit code [%d], got [%d]. Contents:\n%s"
raise AssertionError(template % (file_name, expected_exit_code, actual_exit_code, contents))


def _dump_with_exit_code(as_dict, exit_code, description):
if not os.path.exists(TEST_EXAMPLES):
os.makedirs(TEST_EXAMPLES)
with open(os.path.join(TEST_EXAMPLES, "%d_%s.yml" % (exit_code, description)), "w") as fd:
ordered_dump(as_dict, fd)
fd.flush()
1 change: 1 addition & 0 deletions tests/wf3-shed-tools-raw.ga
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"uuid": "eb7fc9c5-992b-495a-9baa-4a9bb67d6262", "tags": [], "format-version": "0.1", "name": "Workflow constructed from history 'Cell paper'", "steps": {"0": {"tool_id": null, "tool_version": null, "outputs": [], "workflow_outputs": [], "input_connections": {}, "tool_state": "{\"name\": \"ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE37nnn/GSE37268/suppl/GSE37268%5Fmof3%2Eout%2Ehpeak%2Etxt%2E\"}", "id": 0, "uuid": "4216f74b-f994-432a-b5be-2a6b981ac16a", "errors": null, "name": "Input dataset", "label": "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE37nnn/GSE37268/suppl/GSE37268%5Fmof3%2Eout%2Ehpeak%2Etxt%2E", "inputs": [{"name": "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE37nnn/GSE37268/suppl/GSE37268%5Fmof3%2Eout%2Ehpeak%2Etxt%2E", "description": ""}], "position": {"top": 335, "left": 200}, "annotation": "", "content_id": null, "type": "data_input"}, "1": {"tool_id": null, "tool_version": null, "outputs": [], "workflow_outputs": [], "input_connections": {}, "tool_state": "{\"name\": \"Genes\"}", "id": 1, "uuid": "0d2e344b-fd58-462c-bd21-c60acd045d9d", "errors": null, "name": "Input dataset", "label": "Genes", "inputs": [{"name": "Genes", "description": ""}], "position": {"top": 455, "left": 200}, "annotation": "", "content_id": null, "type": "data_input"}, "2": {"tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "tool_version": "1.1.0", "outputs": [{"type": "input", "name": "outfile"}], "workflow_outputs": [], "input_connections": {"infile": {"output_name": "output", "id": 0}}, "tool_state": "{\"__page__\": null, \"find_pattern\": \"\\\"[0-9]+\\\"\", \"column\": \"\\\"1\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"replace_pattern\": \"\\\"chr&\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", "id": 2, "tool_shed_repository": {"owner": "bgruening", "changeset_revision": "20344ce0c811", "name": "text_processing", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "d40b3d69-2a99-4b00-bf75-7ded995dfbd7", "errors": null, "name": "Replace Text", "post_job_actions": {}, "label": null, "inputs": [{"name": "infile", "description": "runtime parameter for tool Replace Text"}], "position": {"top": 335, "left": 420}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "type": "tool"}, "3": {"tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0", "tool_version": "1.0.0", "outputs": [{"type": "input", "name": "out_file1"}], "workflow_outputs": [], "input_connections": {"input": {"output_name": "output", "id": 1}}, "tool_state": "{\"__page__\": null, \"direction\": \"\\\"Upstream\\\"\", \"region\": \"\\\"start\\\"\", \"offset\": \"\\\"10000\\\"\", \"input\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/mm9.len\\\"\", \"__rerun_remap_job_id__\": null, \"size\": \"\\\"12000\\\"\"}", "id": 3, "tool_shed_repository": {"owner": "devteam", "changeset_revision": "077f404ae1bb", "name": "get_flanks", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "652af7e8-710b-4476-952f-484d6a2e4a7a", "errors": null, "name": "Get flanks", "post_job_actions": {}, "label": null, "inputs": [{"name": "input", "description": "runtime parameter for tool Get flanks"}], "position": {"top": 475, "left": 415}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/get_flanks/get_flanks1/1.0.0", "type": "tool"}, "4": {"tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "tool_version": "1.1.0", "outputs": [{"type": "input", "name": "outfile"}], "workflow_outputs": [], "input_connections": {"infile": {"output_name": "outfile", "id": 2}}, "tool_state": "{\"__page__\": null, \"find_pattern\": \"\\\"chr20\\\"\", \"column\": \"\\\"1\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"replace_pattern\": \"\\\"chrX\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", "id": 4, "tool_shed_repository": {"owner": "bgruening", "changeset_revision": "20344ce0c811", "name": "text_processing", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "37f15259-3b03-4461-8da3-6e8ee34fd2f5", "errors": null, "name": "Replace Text", "post_job_actions": {}, "label": null, "inputs": [{"name": "infile", "description": "runtime parameter for tool Replace Text"}], "position": {"top": 335, "left": 640}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "type": "tool"}, "5": {"tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "tool_version": "1.1.0", "outputs": [{"type": "input", "name": "outfile"}], "workflow_outputs": [], "input_connections": {"infile": {"output_name": "outfile", "id": 4}}, "tool_state": "{\"__page__\": null, \"find_pattern\": \"\\\"chr21\\\"\", \"column\": \"\\\"1\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null, \"replace_pattern\": \"\\\"chrY\\\"\", \"infile\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\"}", "id": 5, "tool_shed_repository": {"owner": "bgruening", "changeset_revision": "20344ce0c811", "name": "text_processing", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "8ce00200-94b7-4daf-a0e5-e3b8d6d87eeb", "errors": null, "name": "Replace Text", "post_job_actions": {}, "label": null, "inputs": [{"name": "infile", "description": "runtime parameter for tool Replace Text"}], "position": {"top": 335, "left": 860}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/1.1.0", "type": "tool"}, "6": {"tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_intersectbed/2.27.0.1", "tool_version": "2.27.0.1", "outputs": [{"type": "input", "name": "output"}], "workflow_outputs": [], "input_connections": {"inputA": {"output_name": "out_file1", "id": 3}, "reduce_or_iterate|inputB": {"output_name": "outfile", "id": 5}}, "tool_state": "{\"count\": \"\\\"false\\\"\", \"__page__\": null, \"reciprocal\": \"\\\"false\\\"\", \"overlap_mode\": \"\\\"-wa\\\"\", \"invert\": \"\\\"false\\\"\", \"header\": \"\\\"false\\\"\", \"inputA\": \"{\\\"values\\\": [{\\\"src\\\": \\\"hda\\\", \\\"id\\\": 6112359}]}\", \"reduce_or_iterate\": \"{\\\"inputB\\\": {\\\"values\\\": [{\\\"src\\\": \\\"hda\\\", \\\"id\\\": 6112359}]}, \\\"reduce_or_iterate_selector\\\": \\\"iterate\\\", \\\"__current_case__\\\": 0}\", \"split\": \"\\\"false\\\"\", \"fraction\": \"\\\"\\\"\", \"__rerun_remap_job_id__\": null, \"strand\": \"\\\"\\\"\", \"once\": \"\\\"false\\\"\"}", "id": 6, "tool_shed_repository": {"owner": "iuc", "changeset_revision": "33c3ddea63c5", "name": "bedtools", "tool_shed": "toolshed.g2.bx.psu.edu"}, "uuid": "c65fd1f5-9fc2-4acf-96c6-94ea61906042", "errors": null, "name": "Intersect intervals", "post_job_actions": {}, "label": null, "inputs": [], "position": {"top": 655, "left": 1022}, "annotation": "", "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/bedtools/bedtools_intersectbed/2.27.0.1", "type": "tool"}, "7": {"tool_id": "Grouping1", "tool_version": "2.1.1", "outputs": [{"type": "tabular", "name": "out_file1"}], "workflow_outputs": [], "input_connections": {"input1": {"output_name": "output", "id": 6}}, "tool_state": "{\"operations\": \"[{\\\"opcol\\\": \\\"1\\\", \\\"__index__\\\": 0, \\\"optype\\\": \\\"length\\\", \\\"opround\\\": \\\"no\\\"}]\", \"__page__\": null, \"input1\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"ignorelines\": \"null\", \"groupcol\": \"\\\"1\\\"\", \"__rerun_remap_job_id__\": null, \"ignorecase\": \"\\\"false\\\"\", \"chromInfo\": \"\\\"/usr/local/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/mm9.len\\\"\"}", "id": 7, "uuid": "be264526-e498-4050-b6cd-f7a842268d4a", "errors": null, "name": "Group", "post_job_actions": {}, "label": null, "inputs": [{"name": "input1", "description": "runtime parameter for tool Group"}], "position": {"top": 335, "left": 1300}, "annotation": "", "content_id": "Grouping1", "type": "tool"}}, "annotation": "", "a_galaxy_workflow": "true"}

0 comments on commit c79b4ec

Please sign in to comment.