Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Not really a PR #2

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
.idea/
*.pyc
__pycache__
*.swp
.nfs*
*.sqlite
*.touch
.coverage

tmp/
dist/
.snakemake
_*_venv*/
doc/*.ps
doc/*.pdf
reports/*.pdf
/toolbox*/wheels/

12 changes: 12 additions & 0 deletions example_CITATION.cff
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
- family-names: Druskat
given-names: Stephan
orcid: https://orcid.org/1234-5678-9101-1121
title: "My Research Software"
version: 2.0.4
identifiers:
- type: doi
value: 10.5281/zenodo.1234
date-released: 2021-08-11
15 changes: 15 additions & 0 deletions snakemake-report-plugin-wrroc/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# WRite an R0-Crate with a Snakemake reporter plugin

## Trying out the code.

Easiest way to get this working is to:

1) Create a conda env with the latest Snakemake
2) Make sure that env is active, and that `which pip` looks appropriate
3) `pip install -e ./snakemake-report-plugin-wrroc/`

That's it. No need to mess around with Poetry just to test/hack the code.

## Building and releasing the code

Something something Poetry. Something something distutils.
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions snakemake-report-plugin-wrroc/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ documentation = "https://snakemake.github.io/snakemake-plugin-catalog/plugins/re
python = "^3.12"
snakemake-interface-common = "^1.17.2"
snakemake-interface-report-plugins = "^1.0.0"
rocrate = "^0.11.0"
253 changes: 218 additions & 35 deletions snakemake-report-plugin-wrroc/snakemake_report_plugin_wrroc/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
"""RO-Crate exporter plugin for Snakemake.

Activate this reporter while running a test run of your workflow to generate
???.zip

Maybe at some point there will be an upload to WorkflowHub via the API included?
"""

from dataclasses import dataclass, field
from typing import Optional

import snakemake
from snakemake.logging import logger
import os
from subprocess import run, CalledProcessError

from snakemake_interface_common.exceptions import WorkflowError # noqa: F401
from snakemake_interface_report_plugins.reporter import ReporterBase
Expand All @@ -11,18 +21,18 @@
from rocrate.rocrate import ROCrate
from rocrate.model import ContextEntity, Person

# Optional:
# Define additional settings for your reporter.
# They will occur in the Snakemake CLI as --report-<reporter-name>-<param-name>
# Omit this class if you don't need any.
# Make sure that all defined fields are Optional (or bool) and specify a default value
# of None (or False) or anything else that makes sense in your case.
@dataclass
class ReportSettings(ReportSettingsBase):
myparam: Optional[int] = field(
"""Additional settings for the RO-Crate reporter.
They will occur in the Snakemake CLI as --report-wrroc-<param-name>
Make sure that any further defined fields are Optional (or bool) and specify a default
value of None (or False) or else Snakemake will demand these settings even when the
reporter is not in use. Use the "required" flag below for required options.
"""
exclude: Optional[str] = field(
default=None,
metadata={
"help": "Some help text",
"help": "Comma-separed list of extra files to exclude",
# Optionally request that setting is also available for specification
# via an environment variable. The variable will be named automatically as
# SNAKEMAKE_REPORT_<reporter-name>_<param-name>, all upper case.
Expand All @@ -42,11 +52,19 @@ class ReportSettings(ReportSettingsBase):
},
)

force: bool = field(
default=False,
metadata={
"help": "Continue even if there are non-conformities.",
"env_var": False,
"required": False,
},
)

# Required:
# Implementation of your reporter
class Reporter(ReporterBase):
def __post_init__(self, excludelist = [".snakemake", ".git", ".github", ".test", ".gitignore"]):
def __post_init__(self, excludelist = (".snakemake", ".git", ".github", ".test", ".gitignore")):
# initialize additional attributes
# Do not overwrite the __init__ method as this is kept in control of the base
# class in order to simplify the update process.
Expand All @@ -55,54 +73,220 @@ def __post_init__(self, excludelist = [".snakemake", ".git", ".github", ".test",
# In particular, the settings of above ReportSettings class are accessible via
# self.settings.
self.outdir = "ro-crate_out"
self.excludelist = excludelist
self.excludelist = list(excludelist)
self.excludelist.append(self.outdir)
# Load the existing Workflow RO-Crate
self.crate = ROCrate(source='./', exclude=self.excludelist)

# Add any exclude items specified by the user
if self.settings.exclude:
self.excludelist.extend(self.settings.exclude.split(','))

self.conformance_force = self.settings.force

# Decide if we are in dry-run mode. Oh, apparently the reporter always runs off
# --dry-run mode. Right.

# Load the existing Workflow RO-Crate...
try:
self.crate = ROCrate(source='./', exclude=self.excludelist)
except ValueError:
# ...or make a fresh one
self.crate = ROCrate(exclude=self.excludelist)
self.crate.add_directory(".")

def check_essential_files(self):
"""Check for the presence of essential files.

We want to scan/report everything even if there is an error.

See https://docs.google.com/document/d/1KozjchVFqrctBGooRR-OWpifyI0TRuoSWIUUGIDWNyI/edit?tab=t.0
"""
errors = []

# A license
if not os.path.exists("LICENSE.md") or not os.path.exists("LICENSE.txt"):
errors.append("No LICENSE.md or LICENSE.txt found.")

# TODO - do we need CODE_OF_CONDUCT.md and CONTRIBUTING.md too? I'm making these
# warnings just now.

# Evidence of Git repo. This is probably not the best way to check but we'd like to know
# the remote URL
if not os.path.exists(".git/config"):
if os.path.exists("../.git/config"):
# We are within a GIT repo but not at the top level, so:
if not os.path.exists("workflowhub.yml"):
errors.append("Since your workflow is in a subdirectory of your GIT repo,"
" you must include a 'workflowhub.yml' file.")
else:
errors.append("No .git/config file found. Is the code under source control?")

# We need a README.md
if not os.path.exists("README.md"):
errors.append("Add a README.md file to introduce your worflow.")

# The main workflow should be called "workflow/Snakefile"
main_snakefile = os.path.relpath(self.dag.workflow.main_snakefile)
if main_snakefile != "workflow/Snakefile":
# FIXME - this is returning nonsense in Snakemake 8. Does the regular html
# reporter get a meaningful value? Nope.
errors.append("Your main Snakefile needs to be called 'workflow/Snakefile'."
f" Please rename {main_snakefile}.")

# And we want a config.yaml file.
if not os.path.exists("config/config.yaml"):
if os.path.exists("config/config.json"):
# For anything that needs to be hand-edited and not read by JS, YAML >> JSON.
errors.append("Please convert your config/config.json file to YAML format using"
" <insert suggested converter tool here>.")
else:
errors.append("Please supply a default/sample configuration file for the workflow"
" under 'config/config.yaml'.")

# GitHub wants a "CITATION.cff"
# It looks like we should be able to pull the info from this into the metadata -
# see https://www.researchobject.org/ro-crate/specification/1.1/contextual-entities.html#publications-via-citation-property
# but I'm not sure how useful this is or where that code would live. Given that the
# cffconvert tool (and library) already supports CFF to schema.org conversion I'd imagine
# this is already done in other tools.
if not os.path.exists("CITATION.cff"):
errors.append("You must include a 'CITATION.cff' file. If you are not requesting a"
" specific citation for use of the workflow, please <link instrux here"
" for making a minimal CFF, or maybe create a template>")

return errors

def check_desirable_files(self):
"""Things that *should* be in the submission but are not essential.
"""
errors = []

# WorkflowHub says the tests should be under "tests" but Snakemake says they should
# be under ".tests". Can we be opinionated about it?
if not os.path.isdir("tests"):
if os.path.isdir(".tests"):
errors.append("You have a '.tests' directory. Please rename it as 'tests' or else"
" make a symlink called 'tests'.")
else:
errors.append("Please add a 'tests' directory with 'unit' and 'integration'"
" subdirectories.")
elif not(os.path.isdir("tests/integration") and os.path.isdir("tests/unit")):
errors.append("Please create 'unit' and 'integration' subdirectories under 'tests'.")


if not(os.path.exists("CODE_OF_CONDUCT.md") and os.path.exists("CONTRIBUTING.md")):
errors.append("Please add CODE_OF_CONDUCT.md and CONTRIBUTING.md files."
" You may be happy copying the versions from <suggest something here>")

return errors

def conformance_check(self):
"""Ensure that some expected files are found. The rocrate module does
not scan the files until the crate is exported, so we have to look for the
files here.
"""
essential_problems = self.check_essential_files()
if essential_problems:
for prob in essential_problems:
logger.error(f"Conformance error: {prob}")
msg = f"Exiting due to {len(essential_problems)} conformance issues."
if self.conformance_force:
logger.warning(f"Continuing despite {len(essential_problems)} conformance issues.")
else:
raise RuntimeError(f"Exiting due to {len(essential_problems)} conformance issues.")

desirable_problems = self.check_desirable_files()
if desirable_problems:
for prob in desirable_problems:
logger.warning(f"Conformance warning: {prob}")
logger.warning(f"Continuing despite len(desirable_problems) warnings.")

# images/rulegraph.svg should be something we can auto-generate. self.dag has methods dot()
# and rule_dot() which can make the graph for us, but it still needs converting to SVG.
if not os.path.exists("image/rulegraph.svg"):

logger.warning("Auto generating 'image/rulegraph.svg'")
try:
os.makedirs("image", exist_ok=True)
with open("image/rulegraph.dot", "x") as dotfh:
print(self.dag.rule_dot(), file=dotfh)
except FileExistsError:
# Never mind, use the one we have. Maybe the user edited it.
logger.info("Using existing 'image/rulegraph.dot'")

# For converting .dot to .svg I don't see a better way than calling the graphviz
# program directly.
try:
run(['dot', '-Tsvg', 'image/rulegraph.dot', '-o', 'image/rulegraph.svg'],
check = True,
capture_output = True,
text = True)
except CalledProcessError as e:
logger.error(str(e.stderr).rstrip())
logger.error("The 'dot' program returned the above error attempting to convert the rulegraph.")
except FileNotFoundError as e:
logger.error(str(e))
logger.error("The 'dot' program was not found. Unable to auto-convert the rulegraph.")




def render(self):
# Render the report, using attributes of the base class.
try:
self.try_render()
except Exception as e:
# Catch all exceptions and turn them into error messages.
logger.error(e)

def try_render(self):
"""Generate the crate, using the ROCrate library.
"""
logger.info(f"Excludelist: {self.excludelist}")

crate = self.crate

# Remove any publication date from the root dataset of the original RO-Crate
if 'datePublished' in self.crate.root_dataset:
self.crate.root_dataset.__delitem__('datePublished')
if 'datePublished' in crate.root_dataset:
crate.root_dataset.__delitem__('datePublished')

self.conformance_check()

# Provenance Crate - add snakemake version
for entity in self.crate.contextual_entities:
for entity in crate.contextual_entities:
if entity.type == 'ComputerLanguage' and 'snakemake' in entity.id.lower():
entity['version'] = snakemake.__version__.split("+")[0]

# Provenance Crate - record execution of workflow as a CreateAction object
instruments = {}
for entity in self.crate.data_entities:
for entity in crate.data_entities:
if 'ComputationalWorkflow' in entity.type:
instruments["@id"] = entity.id
workflow_run_properties = {
#"@id":"FIXME-a",
#"@id":"FIXME-add-workflow-run-properties-id",
"@type":"CreateAction",
"name":"Snakemake workflow run (FIXME)",
"endTime":"FIXME date",
"instrument":instruments,
#"instrument":instruments,
#"subjectOf":{"@id":"FIXME creative work (workflow?)"},
"object":["FIXME inputs"],
"result":["FIXME outputs"]
}
print(workflow_run_properties)
workflow_run = self.crate.add(
ContextEntity(self.crate, properties=workflow_run_properties)
if '@id' in instruments:
workflow_run_properties['instruments'] = instruments
logger.info(workflow_run_properties)
workflow_run = crate.add(
ContextEntity(crate, properties=workflow_run_properties)
)

# Provenance Run Crate (individual step information)

# print basic information (start/end) of each job
for rulename, rule in self.rules.items():
print(f"rule: {rulename}")
logger.info(f"rule: {rulename}")
#print(rule)
#print("rule: " + rec.rule)
#print("starttime: " + str(rec.starttime))
#print("endtime: " + str(rec.endtime))
#print("ROCrate date published: " + str(self.crate.datePublished.date()))
#print("ROCrate date published: " + str(crate.datePublished.date()))

# Add Person running workflow (agent)
person_properties = {
Expand All @@ -111,25 +295,24 @@ def render(self):
"familyName": "FIXME",
"affiliation": "FIXME"
}
agent = self.crate.add(Person(self.crate,
"FIXME-ORCID?",
properties=person_properties))
agent = crate.add(Person(crate,
"FIXME-ORCID?",
properties=person_properties))
workflow_run.append_to( "agent", [agent] )



# Reference CreateAction in the root Dataset
self.crate.root_dataset.append_to(
crate.root_dataset.append_to(
"mentions" , [{"@id": workflow_run.id}]
)

# Set the conformsTo statement for the root Dataset.
# Note that this will replace any pre-existing conformsTo information
self.crate.root_dataset["conformsTo"] = [
crate.root_dataset["conformsTo"] = [
{"@id": "https://w3id.org/ro/wfrun/process/0.1"},
{"@id": "https://w3id.org/ro/wfrun/workflow/0.5"},
{"@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0"}
]


self.crate.write(self.outdir)

crate.write(self.outdir)
crate.write_zip(self.outdir + ".zip")