-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Snakemake SLURM Functionality #77
Changes from all commits
f9c3b1c
a1bb558
3e1e7ca
636795b
a887b4b
69f1a9d
7012106
8bf5787
cb719a0
6e430e9
abdc59c
98b2801
4484216
0965558
c5c955a
79eb332
d944213
ce581ba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
from abc import ABC, abstractmethod | ||
from dataclasses import dataclass | ||
from typing import Callable, List | ||
from typing import Callable, List, Optional | ||
|
||
|
||
class Rule(ABC): | ||
|
@@ -39,7 +39,8 @@ def _build_rule(self) -> str: | |
rule all: | ||
input: | ||
final_output={self.target_files}, | ||
validation='{self.validation}'""" | ||
validation='{self.validation}' | ||
message: 'Grabbing final output' """ | ||
|
||
|
||
@dataclass | ||
|
@@ -48,37 +49,55 @@ class ImplementedRule(Rule): | |
A rule that defines the execution of an implementation | ||
|
||
Parameters: | ||
name: Name | ||
step_name: Name of step | ||
implementation_name: Name of implementation | ||
execution_input: List of file paths required by implementation | ||
validation: name of file created by InputValidationRule to check for compatible input | ||
output: List of file paths created by implementation | ||
resources: Computational resources used by executor (e.g. SLURM) | ||
envvars: Dictionary of environment variables to set | ||
diagnostics_dir: Directory for diagnostic files | ||
image_path: Path to Singularity image | ||
script_cmd: Command to execute | ||
""" | ||
|
||
name: str | ||
step_name: str | ||
implementation_name: str | ||
execution_input: List[str] | ||
validation: str | ||
output: List[str] | ||
resources: Optional[dict] | ||
envvars: dict | ||
diagnostics_dir: str | ||
image_path: str | ||
script_cmd: str | ||
|
||
def _build_rule(self) -> str: | ||
return ( | ||
f""" | ||
return self._build_io() + self._build_resources() + self._build_shell_command() | ||
|
||
def _build_io(self) -> str: | ||
return f""" | ||
rule: | ||
name: "{self.name}" | ||
name: "{self.implementation_name}" | ||
message: "Running {self.step_name} implementation: {self.implementation_name}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. message is snakemake's way of logging to stdout? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah it adds to the snakemake logging--example in the stdout posted above |
||
input: | ||
implementation_inputs={self.execution_input}, | ||
validation="{self.validation}" | ||
output: {self.output} | ||
log: "{self.diagnostics_dir}/{self.implementation_name}-output.log" | ||
container: "{self.image_path}" """ | ||
+ self._build_shell_command() | ||
) | ||
|
||
def _build_resources(self) -> str: | ||
if not self.resources: | ||
return "" | ||
return f""" | ||
resources: | ||
slurm_partition='{self.resources['partition']}', | ||
mem={self.resources['memory']}, | ||
runtime={self.resources['time_limit']}, | ||
nodes={self.resources['cpus']}, | ||
slurm_extra="--output '{self.diagnostics_dir}/{self.implementation_name}-slurm-%j.log'" | ||
""" | ||
|
||
def _build_shell_command(self) -> str: | ||
shell_cmd = f""" | ||
|
@@ -90,8 +109,9 @@ def _build_shell_command(self) -> str: | |
for var_name, var_value in self.envvars.items(): | ||
shell_cmd += f""" | ||
export {var_name}={var_value}""" | ||
# Log stdout/stderr to diagnostics directory | ||
shell_cmd += f""" | ||
{self.script_cmd} | ||
{self.script_cmd} > {{log}} 2>&1 | ||
patricktnast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
'''""" | ||
|
||
return shell_cmd | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
from linker.pipeline import Pipeline | ||
from linker.utilities.data_utils import copy_configuration_files_to_results_directory | ||
from linker.utilities.paths import LINKER_TEMP | ||
from linker.utilities.slurm_utils import is_on_slurm | ||
|
||
|
||
def main( | ||
|
@@ -24,7 +25,7 @@ def main( | |
copy_configuration_files_to_results_directory(config, results_dir) | ||
snakefile = pipeline.build_snakefile(results_dir) | ||
environment_args = get_environment_args(config, results_dir) | ||
singularity_args = get_singularity_args(config.input_data, results_dir) | ||
singularity_args = get_singularity_args(config, results_dir) | ||
# We need to set a dummy environment variable to avoid logging a wall of text. | ||
# TODO [MIC-4920]: Remove when https://github.com/snakemake/snakemake-interface-executor-plugins/issues/55 merges | ||
os.environ["foo"] = "bar" | ||
|
@@ -38,6 +39,9 @@ def main( | |
## See above | ||
"--envvars", | ||
"foo", | ||
## Suppress some of the snakemake output | ||
"--quiet", | ||
"progress", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this saying to suppross some output ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "progress" is the argument to "--quiet", it's suppressing some logging about the progress of execution |
||
"--use-singularity", | ||
"--singularity-args", | ||
singularity_args, | ||
|
@@ -47,11 +51,12 @@ def main( | |
snake_main(argv) | ||
|
||
|
||
def get_singularity_args(input_data: List[Path], results_dir: Path) -> str: | ||
input_file_paths = ",".join(file.as_posix() for file in input_data) | ||
def get_singularity_args(config: Config, results_dir: Path) -> str: | ||
input_file_paths = ",".join(file.as_posix() for file in config.input_data) | ||
singularity_args = "--no-home --containall" | ||
LINKER_TEMP.mkdir(parents=True, exist_ok=True) | ||
singularity_args += f" -B {LINKER_TEMP}:/tmp,{results_dir},{input_file_paths}" | ||
linker_tmp_dir = LINKER_TEMP[config.computing_environment] | ||
patricktnast marked this conversation as resolved.
Show resolved
Hide resolved
|
||
linker_tmp_dir.mkdir(parents=True, exist_ok=True) | ||
singularity_args += f" -B {linker_tmp_dir}:/tmp,{results_dir},{input_file_paths}" | ||
return singularity_args | ||
|
||
|
||
|
@@ -62,44 +67,24 @@ def get_environment_args(config: Config, results_dir: Path) -> List[str]: | |
|
||
# TODO [MIC-4822]: launch a local spark cluster instead of relying on implementation | ||
elif config.computing_environment == "slurm": | ||
# TODO: Add back slurm support | ||
raise NotImplementedError( | ||
"Slurm support is not yet implemented with snakemake integration" | ||
) | ||
# # Set up a single drmaa.session that is persistent for the duration of the pipeline | ||
# # TODO [MIC-4468]: Check for slurm in a more meaningful way | ||
# hostname = socket.gethostname() | ||
# if "slurm" not in hostname: | ||
# raise RuntimeError( | ||
# f"Specified a 'slurm' computing-environment but on host {hostname}" | ||
# ) | ||
# os.environ["DRMAA_LIBRARY_PATH"] = "/opt/slurm-drmaa/lib/libdrmaa.so" | ||
# diagnostics = results_dir / "diagnostics/" | ||
# job_name = "snakemake-linker" | ||
# resources = config.slurm_resources | ||
# drmaa_args = get_cli_args( | ||
# job_name=job_name, | ||
# account=resources["account"], | ||
# partition=resources["partition"], | ||
# peak_memory=resources["memory"], | ||
# max_runtime=resources["time_limit"], | ||
# num_threads=resources["cpus"], | ||
# ) | ||
# drmaa_cli_arguments = [ | ||
# "--executor", | ||
# "drmaa", | ||
# "--drmaa-args", | ||
# drmaa_args, | ||
# "--drmaa-log-dir", | ||
# str(diagnostics), | ||
# ] | ||
# # slurm_args = [ | ||
# # "--executor", | ||
# # "slurm", | ||
# # "--profile", | ||
# # "/ihme/homes/pnast/repos/linker/.config/snakemake/slurm" | ||
# # ] | ||
# return drmaa_cli_arguments | ||
if not is_on_slurm(): | ||
raise RuntimeError( | ||
f"A 'slurm' computing environment is specified but it has been " | ||
"determined that the current host is not on a slurm cluster " | ||
f"(host: {socket.gethostname()})." | ||
) | ||
resources = config.slurm_resources | ||
slurm_args = [ | ||
"--executor", | ||
"slurm", | ||
"--default-resources", | ||
f"slurm_account={resources['account']}", | ||
f"slurm_partition='{resources['partition']}'", | ||
f"mem={resources['memory']}", | ||
f"runtime={resources['time_limit']}", | ||
f"nodes={resources['cpus']}", | ||
] | ||
return slurm_args | ||
else: | ||
raise NotImplementedError( | ||
"only computing_environment 'local' and 'slurm' are supported; " | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,11 @@ | |
from linker.configuration import Config | ||
|
||
|
||
def is_on_slurm() -> bool: | ||
"""Returns True if the current environment is a SLURM cluster.""" | ||
return "SLURM_ROOT" in os.environ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, right, in my unmerged branch for Jenkins a better check is |
||
|
||
|
||
def get_slurm_drmaa() -> "drmaa": | ||
"""Returns object() to bypass RuntimeError when not on a DRMAA-compliant system""" | ||
try: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,18 @@ | ||
|
||
rule: | ||
name: "foo" | ||
message: "Running foo_step implementation: foo" | ||
input: | ||
implementation_inputs=['foo', 'bar'], | ||
validation="bar" | ||
output: ['baz'] | ||
log: "spam/foo-output.log" | ||
container: "Multipolarity.sif" | ||
shell: | ||
''' | ||
export DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS=foo,bar | ||
export DUMMY_CONTAINER_OUTPUT_PATHS=baz | ||
export DUMMY_CONTAINER_DIAGNOSTICS_DIRECTORY=spam | ||
export eggs=coconut | ||
echo hello world | ||
echo hello world > {log} 2>&1 | ||
''' |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
|
||
rule: | ||
name: "foo" | ||
message: "Running foo_step implementation: foo" | ||
input: | ||
implementation_inputs=['foo', 'bar'], | ||
validation="bar" | ||
output: ['baz'] | ||
log: "spam/foo-output.log" | ||
container: "Multipolarity.sif" | ||
resources: | ||
slurm_partition='slurmpart', | ||
mem=5, | ||
runtime=1, | ||
nodes=1337, | ||
slurm_extra="--output 'spam/foo-slurm-%j.log'" | ||
|
||
shell: | ||
''' | ||
export DUMMY_CONTAINER_MAIN_INPUT_FILE_PATHS=foo,bar | ||
export DUMMY_CONTAINER_OUTPUT_PATHS=baz | ||
export DUMMY_CONTAINER_DIAGNOSTICS_DIRECTORY=spam | ||
export eggs=coconut | ||
echo hello world > {log} 2>&1 | ||
''' |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: we may want to specify this as "slurm_resources" (as opposed to spark resources or in the future who knows what else)
Unless your thought is the logic will go here eventually regardless of resource type?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I think the more general "resources" make sense at least on the rule side, given that we may support non-slurm execution resources, and this would be the place that those would go (though not necessarily for something like spark,)
https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#snakefiles-standard-resources