Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Workflow Run RO-crate format #19

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ prov {
}
```

Finally, run your Nextflow pipeline. You do not need to modify your pipeline script in order to use the `nf-prov` plugin. The plugin will automatically generate a JSON file with provenance information.
Finally, run your Nextflow pipeline. You do not need to modify your pipeline script in order to use the `nf-prov` plugin. The plugin will automatically produce the specified provenance reports at the end of the workflow run.

## Configuration

Expand All @@ -48,6 +48,8 @@ Configuration scope for the desired output formats. The following formats are av

- `legacy`: Render the legacy format originally defined in this plugin (default). Supports the `file` and `overwrite` options.

- `wrroc`: Render a [Workflow Run RO-Crate](https://www.researchobject.org/workflow-run-crate/). Includes all three profiles (Process, Workflow, and Provenance).

Any number of formats can be specified, for example:

```groovy
Expand Down
4 changes: 4 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,9 @@ prov {
file = "${params.outdir}/manifest.json"
overwrite = true
}
wrroc {
file = "${params.outdir}/ro-crate-metadata.json"
overwrite = true
}
}
}
5 changes: 4 additions & 1 deletion plugins/nf-prov/src/main/nextflow/prov/ProvObserver.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ import nextflow.trace.TraceRecord
@CompileStatic
class ProvObserver implements TraceObserver {

public static final List<String> VALID_FORMATS = ['bco', 'dag', 'legacy']
public static final List<String> VALID_FORMATS = ['bco', 'dag', 'legacy', 'wrroc']

private Session session

Expand Down Expand Up @@ -67,6 +67,9 @@ class ProvObserver implements TraceObserver {
if( name == 'legacy' )
return new LegacyRenderer(opts)

if( name == 'wrroc' )
return new WrrocRenderer(opts)

throw new IllegalArgumentException("Invalid provenance format -- valid formats are ${VALID_FORMATS.join(', ')}")
}

Expand Down
261 changes: 261 additions & 0 deletions plugins/nf-prov/src/main/nextflow/prov/WrrocRenderer.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,261 @@
/*
* Copyright 2023, Seqera Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package nextflow.prov

import java.nio.file.Files
import java.nio.file.Path
import java.time.format.DateTimeFormatter

import groovy.json.JsonOutput
import groovy.transform.CompileStatic
import nextflow.Session
import nextflow.exception.AbortOperationException
import nextflow.processor.TaskRun

/**
* Renderer for the Provenance Run RO Crate format.
*
* @author Ben Sherman <[email protected]>
*/
@CompileStatic
class WrrocRenderer implements Renderer {

private Path path

private boolean overwrite

@Delegate
private PathNormalizer normalizer

WrrocRenderer(Map opts) {
path = opts.file as Path
overwrite = opts.overwrite as Boolean

ProvHelper.checkFileOverwrite(path, overwrite)
}

@Override
void render(Session session, Set<TaskRun> tasks, Map<Path,Path> workflowOutputs) {
// get workflow inputs
final taskLookup = ProvHelper.getTaskLookup(tasks)
final workflowInputs = ProvHelper.getWorkflowInputs(tasks, taskLookup)

// get workflow metadata
final metadata = session.workflowMetadata
this.normalizer = new PathNormalizer(metadata)

final manifest = metadata.manifest
final nextflowMeta = metadata.nextflow

final formatter = DateTimeFormatter.ISO_OFFSET_DATE_TIME
final dateStarted = formatter.format(metadata.start)
final dateCompleted = formatter.format(metadata.complete)
final nextflowVersion = nextflowMeta.version.toString()
final params = session.config.params as Map

// create manifest
final softwareApplicationId = UUID.randomUUID()
final organizeActionId = UUID.randomUUID()

final authors = (manifest.author ?: '')
.tokenize(',')
.withIndex()
.collect { String name, int i -> [
"@id": "author-${i + 1}",
"@type": "Person",
"name": name.trim()
] }

final formalParameters = params
.toConfigObject()
.flatten()
.collect { name, value -> [
"@id": "#${name}",
"@type": "FormalParameter",
// TODO: infer type from value at runtime
// "additionalType": "File",
// "defaultValue": "",
"conformsTo": ["@id": "https://bioschemas.org/profiles/FormalParameter/1.0-RELEASE"],
"description": "",
// TODO: apply only if type is Path
// "encodingFormat": "text/plain",
// TODO: match to output if type is Path
// "workExample": ["@id": outputId],
"name": name,
// "valueRequired": "True"
] }

final inputFiles = workflowInputs
.collect { source -> [
"@id": normalizePath(source),
"@type": "File",
"description": "",
"encodingFormat": Files.probeContentType(source) ?: "",
// TODO: apply if matching param is found
// "exampleOfWork": ["@id": paramId]
] }

// TODO: create PropertyValue for each non-file FormalParameter output
final propertyValues = [:]
.collect { name, value -> [
"@id": "#${name}",
"@type": "PropertyValue",
// TODO: match to param
// "exampleOfWork": ["@id": "#verbose-param"],
"name": name,
"value": value
] }

final outputFiles = workflowOutputs
.collect { source, target -> [
"@id": normalizePath(source),
"@type": "File",
"name": source.name,
"description": "",
"encodingFormat": Files.probeContentType(source) ?: "",
// TODO: create FormalParameter for each output file?
// "exampleOfWork": {"@id": "#reversed"}
] }

final wrroc = [
"@context": "https://w3id.org/ro/crate/1.1/context",
"@graph": [
[
"@id": path.name,
"@type": "CreativeWork",
"about": ["@id": "./"],
"conformsTo": [
["@id": "https://w3id.org/ro/crate/1.1"],
["@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0"]
]
],
[
"@id": "./",
"@type": "Dataset",
"conformsTo": [
["@id": "https://w3id.org/ro/wfrun/process/0.1"],
["@id": "https://w3id.org/ro/wfrun/workflow/0.1"],
["@id": "https://w3id.org/ro/wfrun/provenance/0.1"],
["@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0"]
],
"name": "Workflow run of ${metadata.projectName}",
"description": manifest.description ?: "",
"hasPart": [
["@id": metadata.projectName],
*inputFiles.collect( file -> ["@id": file["@id"]] ),
*outputFiles.collect( file -> ["@id": file["@id"]] )
],
"mainEntity": ["@id": metadata.projectName],
"mentions": ["@id": "#${session.uniqueId}"]
],
[
"@id": "https://w3id.org/ro/wfrun/process/0.1",
"@type": "CreativeWork",
"name": "Process Run Crate",
"version": "0.1"
],
[
"@id": "https://w3id.org/ro/wfrun/workflow/0.1",
"@type": "CreativeWork",
"name": "Workflow Run Crate",
"version": "0.1"
],
[
"@id": "https://w3id.org/ro/wfrun/provenance/0.1",
"@type": "CreativeWork",
"name": "Provenance Run Crate",
"version": "0.1"
],
[
"@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0",
"@type": "CreativeWork",
"name": "Workflow RO-Crate",
"version": "1.0"
],
[
"@id": metadata.projectName,
"@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow", "HowTo"],
"name": metadata.projectName,
"programmingLanguage": ["@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow"],
"hasPart": [
// TODO: module files? processes?
],
"input": formalParameters.collect( fp ->
["@id": fp["@id"]]
),
"output": [
// TODO: id of FormalParameter for each output file
],
"step": [
// TODO: processes?
]
],
[
"@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow",
"@type": "ComputerLanguage",
"name": "Nextflow",
"identifier": "https://www.nextflow.io/",
"url": "https://www.nextflow.io/",
"version": nextflowVersion
],
// TODO: SoftwareApplication for each process w/ formal parameters
*formalParameters,
[
"@id": "#${softwareApplicationId}",
"@type": "SoftwareApplication",
"name": "Nextflow ${nextflowVersion}"
],
[
"@id": "#${organizeActionId}",
"@type": "OrganizeAction",
"agent": authors ? ["@id": "author-1"] : null,
"instrument": ["@id": "#${softwareApplicationId}"],
"name": "Run of Nextflow ${nextflowVersion}",
"object": [
["@id": "#4f7f887f-1b9b-4417-9beb-58618a125cc5"],
["@id": "#793b3df4-cbb7-4d17-94d4-0edb18566ed3"]
],
"result": ["@id": "#${session.uniqueId}"],
"startTime": dateStarted
],
*authors,
[
"@id": "#${session.uniqueId}",
"@type": "CreateAction",
"name": "Nextflow workflow run ${session.uniqueId}",
"startTime": dateStarted,
"endTime": dateCompleted,
"instrument": ["@id": metadata.projectName],
"object": [
*inputFiles.collect( file -> ["@id": file["@id"]] ),
*propertyValues.collect( pv -> ["@id", pv["@id"]] )
],
"result": outputFiles.collect( file ->
["@id": file["@id"]]
)
],
*inputFiles,
*propertyValues,
*outputFiles
]
]

// render manifest to JSON file
path.text = JsonOutput.prettyPrint(JsonOutput.toJson(wrroc))
}

}
Loading