From be1c27814c208e5b9b3f352eede11ac4de00156d Mon Sep 17 00:00:00 2001 From: "Andrew S. Rosen" Date: Tue, 8 Aug 2023 05:26:10 -0700 Subject: [PATCH] Remove sshproxy utility (#78) * Remove sshproxy * Remove sshproxy interface * fix test * Update setup.py --- CHANGELOG.md | 1 + README.md | 31 ++++------------- covalent_slurm_plugin/slurm.py | 61 ---------------------------------- setup.py | 3 -- tests/slurm_test.py | 2 +- 5 files changed, 8 insertions(+), 90 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e1f069..3884a16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Changed +- Removed the `sshproxy` interface. - Updates __init__ signature kwargs replaced with parent for better documentation. ### Docs diff --git a/README.md b/README.md index f70fcdd..d8b857d 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ Covalent is a Pythonic workflow tool used to execute tasks on advanced computing hardware. This executor plugin interfaces Covalent with HPC systems managed by [Slurm](https://slurm.schedmd.com/documentation.html). For workflows to be deployable, users must have SSH access to the Slurm login node, writable storage space on the remote filesystem, and permissions to submit jobs to Slurm. ## Installation + To use this plugin with Covalent, simply install it using `pip`: ``` @@ -26,6 +27,7 @@ pip install covalent-slurm-plugin On the remote system, the Python version in the environment you plan to use must match that used when dispatching the calculations. Additionally, the remote system's Python environment must have the base [covalent package](https://github.com/AgnostiqHQ/covalent) installed (e.g. `pip install covalent`). ## Usage + The following shows an example of a Covalent [configuration](https://covalent.readthedocs.io/en/latest/how_to/config/customization.html) that is modified to support Slurm: ```console @@ -56,7 +58,7 @@ The first stanza describes default connection parameters for a user who can conn ssh -i /home/user/.ssh/id_rsa user@login.cluster.org ``` -The second and third stanzas describe default parameters for ``#SBATCH`` directives and default parameters passed directly to ``srun``, respectively. +The second and third stanzas describe default parameters for `#SBATCH` directives and default parameters passed directly to `srun`, respectively. This example generates a script containing the following preamble: @@ -86,9 +88,9 @@ To use the configuration settings, an electron’s executor must be specified wi return x + y ``` -Alternatively, passing a ``SlurmExecutor`` instance enables custom behavior scoped to specific tasks. Here, the executor's ``prerun_commands`` and ``postrun_commands`` parameters can be used to list shell commands to be executed before and after submitting the workflow. These may include any additional ``srun`` commands apart from workflow submission. Commands can also be nested inside the submission call to ``srun`` by using the ``srun_append`` parameter. +Alternatively, passing a `SlurmExecutor` instance enables custom behavior scoped to specific tasks. Here, the executor's `prerun_commands` and `postrun_commands` parameters can be used to list shell commands to be executed before and after submitting the workflow. These may include any additional `srun` commands apart from workflow submission. Commands can also be nested inside the submission call to `srun` by using the `srun_append` parameter. -More complex jobs can be crafted by using these optional parameters. For example, the instance below runs a job that accesses CPU and GPU resources on a single node, while profiling GPU usage via ``nsys`` and issuing complementary commands that pause/resume the central hardware counter. +More complex jobs can be crafted by using these optional parameters. For example, the instance below runs a job that accesses CPU and GPU resources on a single node, while profiling GPU usage via `nsys` and issuing complementary commands that pause/resume the central hardware counter. ```python executor = ct.executor.SlurmExecutor( @@ -134,27 +136,6 @@ Here the corresponding submit script contains the following commands: srun --ntasks-per-node 1 dcgmi profile --resume ``` -### sshproxy - -Some users may need two-factor authentication (2FA) to connect to a cluster. This plugin supports one form of 2FA using the [sshproxy](https://docs.nersc.gov/connect/mfa/#sshproxy) service developed by NERSC. When this plugin is configured to support `sshproxy`, the user's SSH key and certificate will be refreshed automatically by Covalent if either it does not exist or it is expired. We assume that the user has already [configured 2FA](https://docs.nersc.gov/connect/mfa/#creating-and-installing-a-token), used the `sshproxy` service on the command line without issue, and added the executable to their `PATH`. Note that this plugin assumes the script is called `sshproxy`, not `sshproxy.sh`. Further note that using `sshproxy` within Covalent is not required; a user can still run it manually and provide `ssh_key_file` and `cert_file` in the plugin constructor. - -In order to enable `sshproxy` in this plugin, add the following block to your Covalent configuration while the server is stopped: - -```console -[executors.slurm.sshproxy] -hosts = [ "perlmutter-p1.nersc.gov" ] -password = "" -secret = "" -``` - -For details on how to modify your Covalent configuration, refer to documentation [here](https://covalent.readthedocs.io/en/latest/how_to/config/customization.html?highlight=configuration). - -Then, reinstall this plugin using `pip install covalent-slurm-plugin[sshproxy]` in order to pull in the `oathtool` package which will generate one-time passwords. - -The `hosts` parameter is a list of hostnames for which the `sshproxy` service will be used. If the address provided in the plugin constructor is not present in this list, `sshproxy` will not be used. The `password` is the user's password, not including the 6-digit OTP. The `secret` is the 2FA secret provided when a user registers a new device on [Iris](https://iris.nersc.gov/). Rather than scan the QR code into an authenticator app, inspect the Oath Seed URL for a string labeled `secret=...`, typically consisting of numbers and capital letters. Users can validate that correct OTP codes are being generated by using the command `oathtool ` and using the 6-digit number returned in the "Test" option on the Iris 2FA page. Note that these values are stored in plaintext in the Covalent configuration file. If a user suspects credentials have been stolen or compromised, contact your systems administrator immediately to report the incident and request deactivation. - -For more information about how to get started with Covalent, check out the project [homepage](https://github.com/AgnostiqHQ/covalent) and the official [documentation](https://covalent.readthedocs.io/en/latest/). - ## Release Notes Release notes are available in the [Changelog](https://github.com/AgnostiqHQ/covalent-slurm-plugin/blob/main/CHANGELOG.md). @@ -164,7 +145,7 @@ Release notes are available in the [Changelog](https://github.com/AgnostiqHQ/cov Please use the following citation in any publications: > W. J. Cunningham, S. K. Radha, F. Hasan, J. Kanem, S. W. Neagle, and S. Sanand. -> *Covalent.* Zenodo, 2022. https://doi.org/10.5281/zenodo.5903364 +> _Covalent._ Zenodo, 2022. https://doi.org/10.5281/zenodo.5903364 ## License diff --git a/covalent_slurm_plugin/slurm.py b/covalent_slurm_plugin/slurm.py index bf93274..47b5696 100644 --- a/covalent_slurm_plugin/slurm.py +++ b/covalent_slurm_plugin/slurm.py @@ -25,7 +25,6 @@ import re import sys from copy import deepcopy -from datetime import datetime from pathlib import Path from typing import Any, Callable, Dict, List, Union @@ -45,7 +44,6 @@ "username": "", "address": "", "ssh_key_file": "", - "sshproxy": {}, "cert_file": None, "remote_workdir": "covalent-workdir", "create_unique_workdir": False, @@ -76,7 +74,6 @@ class SlurmExecutor(AsyncBaseExecutor): address: Remote address or hostname of the Slurm login node. ssh_key_file: Private RSA key used to authenticate over SSH (usually at ~/.ssh/id_rsa). cert_file: Certificate file used to authenticate over SSH, if required (usually has extension .pub). - sshproxy: Dictionary of parameters for sshproxy, namely the "hosts": List[str], "username": str, and "secret": str. remote_workdir: Working directory on the remote cluster. create_unique_workdir: Whether to create a unique working (sub)directory for each task. conda_env: Name of conda environment on which to run the function. Use "base" for the base environment or "" for no conda. @@ -103,7 +100,6 @@ def __init__( address: str = None, ssh_key_file: str = None, cert_file: str = None, - sshproxy: Dict = None, remote_workdir: str = None, create_unique_workdir: bool = None, conda_env: str = None, @@ -173,13 +169,6 @@ def __init__( options = get_config("executors.slurm.options") self.options = deepcopy(options) - if sshproxy is None: - try: - sshproxy = get_config("executors.slurm.sshproxy") - except KeyError: - sshproxy = {} - self.sshproxy = deepcopy(sshproxy) - self.use_srun = get_config("executors.slurm.use_srun") if use_srun is None else use_srun if srun_options is None: @@ -227,56 +216,6 @@ async def _client_connect(self) -> asyncssh.SSHClientConnection: if not self.ssh_key_file: raise ValueError("ssh_key_file is a required parameter in the Slurm plugin.") - if self.sshproxy and self.address in self.sshproxy["hosts"]: - try: - import oathtool - except ImportError: - raise RuntimeError( - "To use 'sshproxy' options, reinstall the Slurm plugin as 'pip install covalent-slurm-plugin[sshproxy]'" - ) - - # Validate the certificate is not expired - valid_cert = False - if self.cert_file and Path(self.cert_file).exists(): - proc = await asyncio.create_subprocess_shell( - f"ssh-keygen -L -f {self.cert_file} | awk '/Valid/ " + "{print $5}'", - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, stderr = await proc.communicate() - - if proc.returncode != 0: - raise RuntimeError( - "Failed to identify the expiration of the SSH key. Is this key compatible with sshproxy?" - ) - - expiration = datetime.strptime(stdout.decode().rstrip(), "%Y-%m-%dT%H:%M:%S") - if expiration > datetime.now(): - valid_cert = True - - app_log.debug(f"Certificate expiration: {stdout.decode()}") - - if not valid_cert: - app_log.debug("Requesting new key and certificate") - password = self.sshproxy["password"] - otp = oathtool.generate_otp(self.sshproxy["secret"]) - - proc = await asyncio.create_subprocess_shell( - f"sshproxy -u {self.username} -o {self.ssh_key_file}", - stdin=asyncio.subprocess.PIPE, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - stdout, stderr = await proc.communicate(input=f"{password}{otp}".encode()) - - if proc.returncode != 0: - raise RuntimeError(f"sshproxy failed to retrieve a key: {stderr.decode()}") - - if not self.cert_file: - self.cert_file = Path(self.ssh_key_file).parents[0] / "nersc-cert.pub" - - app_log.debug("sshproxy successful") - if self.cert_file and not os.path.exists(self.cert_file): raise FileNotFoundError(f"Certificate file not found: {self.cert_file}") diff --git a/setup.py b/setup.py index cd63950..1753513 100644 --- a/setup.py +++ b/setup.py @@ -48,9 +48,6 @@ "long_description_content_type": "text/markdown", "include_package_data": True, "install_requires": required, - "extras_require": { - "sshproxy": ["oathtool==2.3.1"], - }, "classifiers": [ "Development Status :: 4 - Beta", "Environment :: Console", diff --git a/tests/slurm_test.py b/tests/slurm_test.py index e442279..e34b6b2 100644 --- a/tests/slurm_test.py +++ b/tests/slurm_test.py @@ -171,7 +171,7 @@ def test_failed_init(): """Test for failed inits""" start_config = deepcopy(get_config()) - for key in ["cert_file", "slurm_path", "conda_env", "bashrc_path", "sshproxy", "srun_append"]: + for key in ["cert_file", "slurm_path", "conda_env", "bashrc_path", "srun_append"]: config = get_config() config["executors"]["slurm"].pop(key, None) set_config(config)