diff --git a/scripts/workflow_manifest.py b/scripts/workflow_manifest.py index 5b7e8e412..4f5069136 100644 --- a/scripts/workflow_manifest.py +++ b/scripts/workflow_manifest.py @@ -2,6 +2,8 @@ import json import yaml import datetime +import requests +from urllib.parse import quote_plus from create_mermaid import walk_directory @@ -15,6 +17,41 @@ def read_contents(path: str): print(f"Error reading file {path}: {e}") +def get_dockstore_details(trsID): + hash_stripped = trsID.replace("#workflow/", "", 1) + encoded_id = quote_plus(hash_stripped) + + # Query the top-level details of the workflow + url_details = f"https://dockstore.org/api/workflows/path/workflow/{encoded_id}/published?include=validations%2Cauthors%2Cmetrics&subclass=BIOWORKFLOW&versionName=main" + response = requests.get(url_details) + + details = None + categories = [] + + if response.status_code == 200: + details = response.json() + + entry_id = details.get("id") + if entry_id: + # With the ID, request categories + url_categories = f"https://dockstore.org/api/entries/{entry_id}/categories" + cat_response = requests.get(url_categories) + + if cat_response.status_code == 200: + categories_data = cat_response.json() + for category in categories_data: + categories.append(category["displayName"]) + else: + print( + f"Failed to get categories. Status code: {cat_response.status_code}" + ) + else: + print("No 'id' field found in the top-level data.") + else: + print(f"Failed to retrieve details. Status code: {response.status_code}") + return details, categories + + def find_and_load_compliant_workflows(directory): """ Find all .dockstore.yml files in the given directory and its subdirectories. @@ -25,85 +62,81 @@ def find_and_load_compliant_workflows(directory): workflow_data = [] for root, _, files in os.walk(directory): if ".dockstore.yml" in files: - try: - dockstore_path = os.path.join(root, ".dockstore.yml") - with open(dockstore_path) as f: - workflow_details = yaml.safe_load(f) - workflow_details["path"] = root - workflow_data.append(workflow_details) - - # Now inspect the details which are something like this: - # version: 1.2 - # workflows: - # - name: Velocyto-on10X-from-bundled - # subclass: Galaxy - # publish: true - # primaryDescriptorPath: /Velocyto-on10X-from-bundled.ga - # testParameterFiles: - # - /Velocyto-on10X-from-bundled-tests.yml - # authors: - # - name: Lucille Delisle - # orcid: 0000-0002-1964-4960 - # - name: Velocyto-on10X-filtered-barcodes - # subclass: Galaxy - # publish: true - # primaryDescriptorPath: /Velocyto-on10X-filtered-barcodes.ga - # testParameterFiles: - # - /Velocyto-on10X-filtered-barcodes-tests.yml - # authors: - # - name: Lucille Delisle - # orcid: 0000-0002-1964-4960 - - for workflow in workflow_details["workflows"]: - # For each listed workflow, load the primaryDescriptorPath - # file, which is the actual galaxy workflow. - # strip leading slash from primaryDescriptorPath if present -- these are relative. - workflow_path = os.path.join( - root, workflow["primaryDescriptorPath"].lstrip("/") - ) - try: - with open(workflow_path) as f: - workflow["definition"] = json.load(f) - except Exception as e: - print( - f"No workflow file: {os.path.join(root, workflow['primaryDescriptorPath'])}: {e}" - ) - - # Get workflow file update time and add it to the data as - # isoformat -- most accurate version of the latest 'update' - # to the workflow? - updated_timestamp = os.path.getmtime(workflow_path) - updated_datetime = datetime.datetime.fromtimestamp( - updated_timestamp - ) - updated_isoformat = updated_datetime.isoformat() - workflow["updated"] = updated_isoformat - - # load readme, changelog and diagrams - workflow["readme"] = read_contents(os.path.join(root, "README.md")) - workflow["changelog"] = read_contents( - os.path.join(root, "CHANGELOG.md") - ) - workflow["diagrams"] = read_contents( - f"{os.path.splitext(workflow_path)[0]}_diagrams.md" - ) - dirname = os.path.dirname(workflow_path).split("/")[-1] - workflow["trsID"] = ( - f"#workflow/github.com/iwc-workflows/{dirname}/{workflow['name'] or 'main'}" + dockstore_path = os.path.join(root, ".dockstore.yml") + with open(dockstore_path) as f: + workflow_details = yaml.safe_load(f) + workflow_details["path"] = root + workflow_data.append(workflow_details) + + # Now inspect the details which are something like this: + # version: 1.2 + # workflows: + # - name: Velocyto-on10X-from-bundled + # subclass: Galaxy + # publish: true + # primaryDescriptorPath: /Velocyto-on10X-from-bundled.ga + # testParameterFiles: + # - /Velocyto-on10X-from-bundled-tests.yml + # authors: + # - name: Lucille Delisle + # orcid: 0000-0002-1964-4960 + # - name: Velocyto-on10X-filtered-barcodes + # subclass: Galaxy + # publish: true + # primaryDescriptorPath: /Velocyto-on10X-filtered-barcodes.ga + # testParameterFiles: + # - /Velocyto-on10X-filtered-barcodes-tests.yml + # authors: + # - name: Lucille Delisle + # orcid: 0000-0002-1964-4960 + + for workflow in workflow_details["workflows"]: + # For each listed workflow, load the primaryDescriptorPath + # file, which is the actual galaxy workflow. + # strip leading slash from primaryDescriptorPath if present -- these are relative. + workflow_path = os.path.join( + root, workflow["primaryDescriptorPath"].lstrip("/") + ) + try: + with open(workflow_path) as f: + workflow["definition"] = json.load(f) + except Exception as e: + print( + f"No workflow file: {os.path.join(root, workflow['primaryDescriptorPath'])}: {e}" ) - workflow_test_path = ( - f"{workflow_path.rsplit('.ga', 1)[0]}-tests.yml" - ) - if os.path.exists(workflow_test_path): - with open(workflow_test_path) as f: - tests = yaml.safe_load(f) - workflow["tests"] = tests - else: - print(f"no test for {workflow_test_path}") - - except Exception as e: - print(f"Error reading file {os.path.join(root, '.dockstore.yml')}: {e}") + # Get workflow file update time and add it to the data as + # isoformat -- most accurate version of the latest 'update' + # to the workflow? + updated_timestamp = os.path.getmtime(workflow_path) + updated_datetime = datetime.datetime.fromtimestamp(updated_timestamp) + updated_isoformat = updated_datetime.isoformat() + workflow["updated"] = updated_isoformat + + # load readme, changelog and diagrams + workflow["readme"] = read_contents(os.path.join(root, "README.md")) + workflow["changelog"] = read_contents( + os.path.join(root, "CHANGELOG.md") + ) + workflow["diagrams"] = read_contents( + f"{os.path.splitext(workflow_path)[0]}_diagrams.md" + ) + dirname = os.path.dirname(workflow_path).split("/")[-1] + trsID = f"#workflow/github.com/iwc-workflows/{dirname}/{workflow['name'] or 'main'}" + workflow["trsID"] = trsID + + dockstore_details, categories = get_dockstore_details(trsID) + + workflow["dockstore_id"] = dockstore_details["id"] + workflow["categories"] = categories + + workflow_test_path = f"{workflow_path.rsplit('.ga', 1)[0]}-tests.yml" + if os.path.exists(workflow_test_path): + with open(workflow_test_path) as f: + tests = yaml.safe_load(f) + workflow["tests"] = tests + else: + print(f"no test for {workflow_test_path}") return workflow_data