Skip to content

Commit

Permalink
FEAT: asynchronous publishing for the nextflow runner. (#736)
Browse files Browse the repository at this point in the history
  • Loading branch information
DriesSchaumont authored Dec 9, 2024
1 parent 381b0c0 commit 3e33053
Show file tree
Hide file tree
Showing 8 changed files with 426 additions and 66 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

TODO add summary

## NEW FEATURES

* `Nextflow` runner: allow emitting multiple output channels (PR #736).

## MINOR CHANGES

* `viash-hub`: Change the url for viash-hub Git access to packages.viash-hub.com (PR #774).
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
Map _processOutputValues(Map outputs, Map config, String id, String key) {
Map _checkValidOutputArgument(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}

outputs = outputs.collectEntries { name, value ->
def par = config.allArguments.find { it.plainName == name && it.direction == "output" }
assert par != null : "Error in module '${key}' id '${id}': '${name}' is not a valid output argument"
Expand All @@ -18,3 +11,14 @@ Map _processOutputValues(Map outputs, Map config, String id, String key) {
}
return outputs
}

void _checkAllRequiredOuputsPresent(Map outputs, Map config, String id, String key) {
if (!workflow.stubRun) {
config.allArguments.each { arg ->
if (arg.direction == "output" && arg.required) {
assert outputs.containsKey(arg.plainName) && outputs.get(arg.plainName) != null :
"Error in module '${key}' id '${id}': required output argument '${arg.plainName}' is missing"
}
}
}
}
154 changes: 154 additions & 0 deletions src/main/resources/io/viash/runners/nextflow/states/publishFiles.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
def publishFiles(Map args) {
def key_ = args.get("key")

assert key_ != null : "publishFiles: key must be specified"

workflow publishFilesWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1]

// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]

[id_, inputFiles_, outputFilenames_]
}
| publishFilesProc
emit: input_ch
}
return publishFilesWf
}

process publishFilesProc {
// todo: check publishpath?
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
output:
tuple val(id), path{outputFiles}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
}


// this assumes that the state contains no other values other than those specified in the config
def publishFilesByConfig(Map args) {
def config = args.get("config")
assert config != null : "publishFilesByConfig: config must be specified"

def key_ = args.get("key", config.name)
assert key_ != null : "publishFilesByConfig: key must be specified"

workflow publishFilesSimpleWf {
take: input_ch
main:
input_ch
| map { tup ->
def id_ = tup[0]
def state_ = tup[1] // e.g. [output: new File("myoutput.h5ad"), k: 10]
def origState_ = tup[2] // e.g. [output: '$id.$key.foo.h5ad']


// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
.collectMany { par ->
def plainName_ = par.plainName
// if the state does not contain the key, it's an
// optional argument for which the component did
// not generate any output OR multiple channels were emitted
// and the output was just not added to using the channel
// that is now being parsed
if (!state_.containsKey(plainName_)) {
return []
}
def value = state_[plainName_]
// if the parameter is not a file, it should be stored
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[inputPath: [], outputFilename: []]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
// that it should not be returned as a state
if (!origState_.containsKey(plainName_)) {
return []
}
def filenameTemplate = origState_[plainName_]
// if the pararameter is multiple: true, fetch the template
if (par.multiple && filenameTemplate instanceof List) {
filenameTemplate = filenameTemplate[0]
}
// instantiate the template
def filename = filenameTemplate
.replaceAll('\\$id', id_)
.replaceAll('\\$\\{id\\}', id_)
.replaceAll('\\$key', key_)
.replaceAll('\\$\\{key\\}', key_)
if (par.multiple) {
// if the parameter is multiple: true, the filename
// should contain a wildcard '*' that is replaced with
// the index of the file
assert filename.contains("*") : "Module '${key_}' id '${id_}': Multiple output files specified, but no wildcard '*' in the filename: ${filename}"
def outputPerFile = value.withIndex().collect{ val, ix ->
def filename_ix = filename.replace("*", ix.toString())
def inputPath = val instanceof File ? val.toPath() : val
[inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
}
return [[key: plainName_] + transposedOutputs]
} else {
def value_ = java.nio.file.Paths.get(filename)
def inputPath = value instanceof File ? value.toPath() : value
return [[inputPath: [inputPath], outputFilename: [filename]]]
}
}

def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}


[id_, inputPaths, outputFilenames]
}
| publishFilesProc
emit: input_ch
}
return publishFilesSimpleWf
}



Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ def publishStates(Map args) {

// the input files and the target output filenames
def inputoutputFilenames_ = collectInputOutputPaths(state_, id_ + "." + key_).transpose()
def inputFiles_ = inputoutputFilenames_[0]
def outputFilenames_ = inputoutputFilenames_[1]

def yamlFilename = yamlTemplate_
.replaceAll('\\$id', id_)
Expand All @@ -68,7 +66,7 @@ def publishStates(Map args) {
// convert state to yaml blob
def yamlBlob_ = toRelativeTaggedYamlBlob([id: id_] + state_, java.nio.file.Paths.get(yamlFilename))

[id_, yamlBlob_, yamlFilename, inputFiles_, outputFilenames_]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
Expand All @@ -80,33 +78,17 @@ process publishStatesProc {
publishDir path: "${getPublishDir()}/", mode: "copy"
tag "$id"
input:
tuple val(id), val(yamlBlob), val(yamlFile), path(inputFiles, stageAs: "_inputfile?/*"), val(outputFiles)
tuple val(id), val(yamlBlob), val(yamlFile)
output:
tuple val(id), path{[yamlFile] + outputFiles}
tuple val(id), path{[yamlFile]}
script:
def copyCommands = [
inputFiles instanceof List ? inputFiles : [inputFiles],
outputFiles instanceof List ? outputFiles : [outputFiles]
]
.transpose()
.collectMany{infile, outfile ->
if (infile.toString() != outfile.toString()) {
[
"[ -d \"\$(dirname '${outfile.toString()}')\" ] || mkdir -p \"\$(dirname '${outfile.toString()}')\"",
"cp -r '${infile.toString()}' '${outfile.toString()}'"
]
} else {
// no need to copy if infile is the same as outfile
[]
}
}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
echo '${yamlBlob}' > '${yamlFile}'
echo "Copying output files to destination folder"
${copyCommands.join("\n ")}
"""
mkdir -p "\$(dirname '${yamlFile}')"
echo "Storing state as yaml"
cat > '${yamlFile}' << HERE
${yamlBlob}
HERE
"""
}


Expand Down Expand Up @@ -137,13 +119,10 @@ def publishStatesByConfig(Map args) {
.replaceAll('\\$\\{key\\}', key_)
def yamlDir = java.nio.file.Paths.get(yamlFilename).getParent()

// the processed state is a list of [key, value, inputPath, outputFilename] tuples, where
// the processed state is a list of [key, value] tuples, where
// - key is a String
// - value is any object that can be serialized to a Yaml (so a String/Integer/Long/Double/Boolean, a List, a Map, or a Path)
// - inputPath is a List[Path]
// - outputFilename is a List[String]
// - (key, value) are the tuples that will be saved to the state.yaml file
// - (inputPath, outputFilename) are the files that will be copied from src to dest (relative to the state.yaml)
def processedState =
config.allArguments
.findAll { it.direction == "output" }
Expand All @@ -160,7 +139,7 @@ def publishStatesByConfig(Map args) {
// in the state as-is, but is not something that needs
// to be copied from the source path to the dest path
if (par.type != "file") {
return [[key: plainName_, value: value, inputPath: [], outputFilename: []]]
return [[key: plainName_, value: value]]
}
// if the orig state does not contain this filename,
// it's an optional argument for which the user specified
Expand Down Expand Up @@ -191,32 +170,27 @@ def publishStatesByConfig(Map args) {
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = val instanceof File ? val.toPath() : val
[value: value_, inputPath: inputPath, outputFilename: filename_ix]
}
def transposedOutputs = ["value", "inputPath", "outputFilename"].collectEntries{ key ->
[key, outputPerFile.collect{dic -> dic[key]}]
return value_
}
return [[key: plainName_] + transposedOutputs]
return [["key": plainName_, "value": outputPerFile]]
} else {
def value_ = java.nio.file.Paths.get(filename)
// if id contains a slash
if (yamlDir != null) {
value_ = yamlDir.relativize(value_)
}
def inputPath = value instanceof File ? value.toPath() : value
return [[key: plainName_, value: value_, inputPath: [inputPath], outputFilename: [filename]]]
return [["key": plainName_, value: value_]]
}
}


def updatedState_ = processedState.collectEntries{[it.key, it.value]}
def inputPaths = processedState.collectMany{it.inputPath}
def outputFilenames = processedState.collectMany{it.outputFilename}

// convert state to yaml blob
def yamlBlob_ = toTaggedYamlBlob([id: id_] + updatedState_)

[id_, yamlBlob_, yamlFilename, inputPaths, outputFilenames]
[id_, yamlBlob_, yamlFilename]
}
| publishStatesProc
emit: input_ch
Expand Down
Loading

0 comments on commit 3e33053

Please sign in to comment.