Skip to content

Commit

Permalink
Update manifest to query category and other details -- so far we only…
Browse files Browse the repository at this point in the history
… care about adding the category to the manifest.
  • Loading branch information
dannon committed Dec 16, 2024
1 parent 4f112d6 commit ae088ee
Showing 1 changed file with 110 additions and 77 deletions.
187 changes: 110 additions & 77 deletions scripts/workflow_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import json
import yaml
import datetime
import requests
from urllib.parse import quote_plus
from create_mermaid import walk_directory


Expand All @@ -15,6 +17,41 @@ def read_contents(path: str):
print(f"Error reading file {path}: {e}")


def get_dockstore_details(trsID):
hash_stripped = trsID.replace("#workflow/", "", 1)
encoded_id = quote_plus(hash_stripped)

# Query the top-level details of the workflow
url_details = f"https://dockstore.org/api/workflows/path/workflow/{encoded_id}/published?include=validations%2Cauthors%2Cmetrics&subclass=BIOWORKFLOW&versionName=main"
response = requests.get(url_details)

details = None
categories = []

if response.status_code == 200:
details = response.json()

entry_id = details.get("id")
if entry_id:
# With the ID, request categories
url_categories = f"https://dockstore.org/api/entries/{entry_id}/categories"
cat_response = requests.get(url_categories)

if cat_response.status_code == 200:
categories_data = cat_response.json()
for category in categories_data:
categories.append(category["displayName"])
else:
print(
f"Failed to get categories. Status code: {cat_response.status_code}"
)
else:
print("No 'id' field found in the top-level data.")
else:
print(f"Failed to retrieve details. Status code: {response.status_code}")
return details, categories


def find_and_load_compliant_workflows(directory):
"""
Find all .dockstore.yml files in the given directory and its subdirectories.
Expand All @@ -25,85 +62,81 @@ def find_and_load_compliant_workflows(directory):
workflow_data = []
for root, _, files in os.walk(directory):
if ".dockstore.yml" in files:
try:
dockstore_path = os.path.join(root, ".dockstore.yml")
with open(dockstore_path) as f:
workflow_details = yaml.safe_load(f)
workflow_details["path"] = root
workflow_data.append(workflow_details)

# Now inspect the details which are something like this:
# version: 1.2
# workflows:
# - name: Velocyto-on10X-from-bundled
# subclass: Galaxy
# publish: true
# primaryDescriptorPath: /Velocyto-on10X-from-bundled.ga
# testParameterFiles:
# - /Velocyto-on10X-from-bundled-tests.yml
# authors:
# - name: Lucille Delisle
# orcid: 0000-0002-1964-4960
# - name: Velocyto-on10X-filtered-barcodes
# subclass: Galaxy
# publish: true
# primaryDescriptorPath: /Velocyto-on10X-filtered-barcodes.ga
# testParameterFiles:
# - /Velocyto-on10X-filtered-barcodes-tests.yml
# authors:
# - name: Lucille Delisle
# orcid: 0000-0002-1964-4960

for workflow in workflow_details["workflows"]:
# For each listed workflow, load the primaryDescriptorPath
# file, which is the actual galaxy workflow.
# strip leading slash from primaryDescriptorPath if present -- these are relative.
workflow_path = os.path.join(
root, workflow["primaryDescriptorPath"].lstrip("/")
)
try:
with open(workflow_path) as f:
workflow["definition"] = json.load(f)
except Exception as e:
print(
f"No workflow file: {os.path.join(root, workflow['primaryDescriptorPath'])}: {e}"
)

# Get workflow file update time and add it to the data as
# isoformat -- most accurate version of the latest 'update'
# to the workflow?
updated_timestamp = os.path.getmtime(workflow_path)
updated_datetime = datetime.datetime.fromtimestamp(
updated_timestamp
)
updated_isoformat = updated_datetime.isoformat()
workflow["updated"] = updated_isoformat

# load readme, changelog and diagrams
workflow["readme"] = read_contents(os.path.join(root, "README.md"))
workflow["changelog"] = read_contents(
os.path.join(root, "CHANGELOG.md")
)
workflow["diagrams"] = read_contents(
f"{os.path.splitext(workflow_path)[0]}_diagrams.md"
)
dirname = os.path.dirname(workflow_path).split("/")[-1]
workflow["trsID"] = (
f"#workflow/github.com/iwc-workflows/{dirname}/{workflow['name'] or 'main'}"
dockstore_path = os.path.join(root, ".dockstore.yml")
with open(dockstore_path) as f:
workflow_details = yaml.safe_load(f)
workflow_details["path"] = root
workflow_data.append(workflow_details)

# Now inspect the details which are something like this:
# version: 1.2
# workflows:
# - name: Velocyto-on10X-from-bundled
# subclass: Galaxy
# publish: true
# primaryDescriptorPath: /Velocyto-on10X-from-bundled.ga
# testParameterFiles:
# - /Velocyto-on10X-from-bundled-tests.yml
# authors:
# - name: Lucille Delisle
# orcid: 0000-0002-1964-4960
# - name: Velocyto-on10X-filtered-barcodes
# subclass: Galaxy
# publish: true
# primaryDescriptorPath: /Velocyto-on10X-filtered-barcodes.ga
# testParameterFiles:
# - /Velocyto-on10X-filtered-barcodes-tests.yml
# authors:
# - name: Lucille Delisle
# orcid: 0000-0002-1964-4960

for workflow in workflow_details["workflows"]:
# For each listed workflow, load the primaryDescriptorPath
# file, which is the actual galaxy workflow.
# strip leading slash from primaryDescriptorPath if present -- these are relative.
workflow_path = os.path.join(
root, workflow["primaryDescriptorPath"].lstrip("/")
)
try:
with open(workflow_path) as f:
workflow["definition"] = json.load(f)
except Exception as e:
print(
f"No workflow file: {os.path.join(root, workflow['primaryDescriptorPath'])}: {e}"
)

workflow_test_path = (
f"{workflow_path.rsplit('.ga', 1)[0]}-tests.yml"
)
if os.path.exists(workflow_test_path):
with open(workflow_test_path) as f:
tests = yaml.safe_load(f)
workflow["tests"] = tests
else:
print(f"no test for {workflow_test_path}")

except Exception as e:
print(f"Error reading file {os.path.join(root, '.dockstore.yml')}: {e}")
# Get workflow file update time and add it to the data as
# isoformat -- most accurate version of the latest 'update'
# to the workflow?
updated_timestamp = os.path.getmtime(workflow_path)
updated_datetime = datetime.datetime.fromtimestamp(updated_timestamp)
updated_isoformat = updated_datetime.isoformat()
workflow["updated"] = updated_isoformat

# load readme, changelog and diagrams
workflow["readme"] = read_contents(os.path.join(root, "README.md"))
workflow["changelog"] = read_contents(
os.path.join(root, "CHANGELOG.md")
)
workflow["diagrams"] = read_contents(
f"{os.path.splitext(workflow_path)[0]}_diagrams.md"
)
dirname = os.path.dirname(workflow_path).split("/")[-1]
trsID = f"#workflow/github.com/iwc-workflows/{dirname}/{workflow['name'] or 'main'}"
workflow["trsID"] = trsID

dockstore_details, categories = get_dockstore_details(trsID)

workflow["dockstore_id"] = dockstore_details["id"]
workflow["categories"] = categories

workflow_test_path = f"{workflow_path.rsplit('.ga', 1)[0]}-tests.yml"
if os.path.exists(workflow_test_path):
with open(workflow_test_path) as f:
tests = yaml.safe_load(f)
workflow["tests"] = tests
else:
print(f"no test for {workflow_test_path}")

return workflow_data

Expand Down

0 comments on commit ae088ee

Please sign in to comment.