From ed01b35c6dc61c2656f7b1f28c8ffe14d9fa092f Mon Sep 17 00:00:00 2001 From: nicholasyang Date: Mon, 9 Oct 2023 15:16:59 +0800 Subject: [PATCH 01/14] Dev: bootstrap: implement ssh-agent support (jsc#PED-5774) Use Cases ========= In a typical cloud-based deployment, a server may have password-based authentication disabled for ssh, and an adminstrator's ssh public key is added to authorized_keys during initialization. For this case, it is impossible for crmsh to log into cluster node with interactive authentication and create new key pairs for further operations. Instead, crmsh can make use of the administrator's key, by authenticating ssh session with ssh-agent forwarded form the adminstrator's PC. Usage Example ============= ```sh alice@alice-pc: ~> ssh -A root@node1 root@node1:~ # crm cluster init --use-ssh-agent -y root@node1:~ # exit alice@alice-pc: ~> ssh -A root@node2 root@node2:~ # crm cluster join --use-ssh-agent -c node1 -y ``` --- crmsh/bootstrap.py | 264 ++++++++++++++++++++++++++---------------- crmsh/config.py | 1 + crmsh/sh.py | 137 +++++++++++++++++----- crmsh/ssh_key.py | 176 ++++++++++++++++++++++++---- crmsh/ui_cluster.py | 10 +- crmsh/ui_corosync.py | 2 +- crmsh/user_of_host.py | 5 +- crmsh/utils.py | 36 +++--- 8 files changed, 458 insertions(+), 173 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 731db47cac..f0e0b67aa1 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -143,7 +143,7 @@ def __init__(self): COROSYNC_AUTH, "/var/lib/heartbeat/crm/*", "/var/lib/pacemaker/cib/*", "/var/lib/corosync/*", "/var/lib/pacemaker/pengine/*", PCMK_REMOTE_AUTH, "/var/lib/csync2/*", "~/.config/crm/*"] - self.ssh_key_file = None + self.use_ssh_agent = False @classmethod def set_context(cls, options): @@ -485,7 +485,7 @@ def is_online(): if not xmlutil.CrmMonXmlParser.is_node_online(cluster_node): shutil.copy(COROSYNC_CONF_ORIG, corosync.conf()) sync_file(corosync.conf()) - ServiceManager(sh.LocalOnlyClusterShell(sh.LocalShell())).stop_service("corosync") + ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).stop_service("corosync") print() utils.fatal("Cannot see peer node \"{}\", please check the communication IP".format(cluster_node)) return True @@ -833,11 +833,16 @@ def _parse_user_at_host(s: str, default_user: str) -> typing.Tuple[str, str]: def init_ssh(): user_host_list = [_parse_user_at_host(x, _context.current_user) for x in _context.user_at_node_list] - init_ssh_impl( - _context.current_user, - ssh_key.KeyFile(_context.ssh_key_file) if _context.ssh_key_file is not None else None, - user_host_list, - ) + if _context.use_ssh_agent: + try: + ssh_agent = ssh_key.AgentClient() + keys = ssh_agent.list() + except ssh_key.Error: + logger.error("Cannot get a public key from ssh-agent.") + raise + else: + keys = list() + init_ssh_impl(_context.current_user, keys, user_host_list) if user_host_list: service_manager = ServiceManager() for user, node in user_host_list: @@ -845,50 +850,74 @@ def init_ssh(): utils.fatal("Cluster is currently active on {} - can't run".format(node)) -def init_ssh_impl(local_user: str, ssh_public_key: typing.Optional[ssh_key.Key], user_node_list: typing.List[typing.Tuple[str, str]]): +def init_ssh_impl(local_user: str, ssh_public_keys: typing.List[ssh_key.Key], user_node_list: typing.List[typing.Tuple[str, str]]): """ Configure passwordless SSH. The local_user on local host will be configured. If user_node_list is not empty, those user and host will also be configured. - If ssh_public_key is specified, it will be added to authorized_keys; if not, a new key pair will be generated for each node. + If ssh_public_keys is not empty, it will be added to authorized_keys; if not, a new key pair will be generated for each node. """ - ServiceManager(sh.LocalOnlyClusterShell(sh.LocalShell())).start_service("sshd.service", enable=True) - shell = sh.SSHShell(sh.LocalShell(), local_user) + ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).start_service("sshd.service", enable=True) + if ssh_public_keys: + local_shell = sh.LocalShell(additional_environ={'SSH_AUTH_SOCK': os.environ.get('SSH_AUTH_SOCK')}) + else: + local_shell = sh.LocalShell() + shell = sh.SSHShell(local_shell, local_user) authorized_key_manager = ssh_key.AuthorizedKeyManager(shell) - if ssh_public_key is not None: + if ssh_public_keys: # Use specified key. Do not generate new ones. - authorized_key_manager.add(None, local_user, ssh_public_key) + for key in ssh_public_keys: + authorized_key_manager.add(None, local_user, key) else: configure_ssh_key(local_user) configure_ssh_key('hacluster') if user_node_list: print() - if ssh_public_key is not None: + if ssh_public_keys: for user, node in user_node_list: logger.info("Adding public key to authorized_keys on %s@%s", user, node) - authorized_key_manager.add(node, user, ssh_public_key) + for key in ssh_public_keys: + authorized_key_manager.add(node, local_user, key) + if user != 'root' and 0 != shell.subprocess_run_without_input( + node, user, 'sudo true', + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ).returncode: + raise ValueError(f'Failed to sudo on {user}@{node}') else: - _init_ssh_on_remote_nodes(local_user, ssh_public_key, user_node_list) + _init_ssh_on_remote_nodes(local_user, user_node_list) + user_by_host = utils.HostUserConfig() + for user, node in user_node_list: + user_by_host.add(user, node) + user_by_host.add(local_user, utils.this_node()) + user_by_host.set_no_generating_ssh_key(bool(ssh_public_keys)) + user_by_host.save_remote([node for user, node in user_node_list]) + for user, node in user_node_list: + change_user_shell('hacluster', node) + # Starting from here, ClusterShell is available + shell = sh.ClusterShell(local_shell, UserOfHost.instance()) + authorized_key_manager = ssh_key.AuthorizedKeyManager(shell) + _init_ssh_for_secondary_user_on_remote_nodes( + shell, authorized_key_manager, + [node for user, node in user_node_list], + 'hacluster', + ) def _init_ssh_on_remote_nodes( local_user: str, - ssh_public_key: typing.Optional[ssh_key.Key], user_node_list: typing.List[typing.Tuple[str, str]], ): # Swap public ssh key between remote node and local public_key_list = list() - hacluster_public_key_list = list() for i, (remote_user, node) in enumerate(user_node_list): utils.ssh_copy_id(local_user, remote_user, node) # After this, login to remote_node is passwordless public_key_list.append(swap_public_ssh_key(node, local_user, remote_user, local_user, remote_user, add=True)) - hacluster_public_key_list.append(swap_public_ssh_key(node, 'hacluster', 'hacluster', local_user, remote_user, add=True)) if len(user_node_list) > 1: shell = sh.LocalShell() shell_script = _merge_authorized_keys(public_key_list) - hacluster_shell_script = _merge_authorized_keys(hacluster_public_key_list) for i, (remote_user, node) in enumerate(user_node_list): result = shell.su_subprocess_run( local_user, @@ -899,22 +928,26 @@ def _init_ssh_on_remote_nodes( ) if result.returncode != 0: utils.fatal('Failed to add public keys to {}@{}: {}'.format(remote_user, node, result.stdout)) - result = shell.su_subprocess_run( - local_user, - 'ssh {} {}@{} sudo -H -u {} /bin/sh'.format(constants.SSH_OPTION, remote_user, node, 'hacluster'), - input=hacluster_shell_script, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - ) - if result.returncode != 0: - utils.fatal('Failed to add public keys to {}@{}: {}'.format(remote_user, node, result.stdout)) - user_by_host = utils.HostUserConfig() - for user, node in user_node_list: - user_by_host.add(user, node) - user_by_host.add(local_user, utils.this_node()) - user_by_host.save_remote([node for user, node in user_node_list]) - for user, node in user_node_list: - change_user_shell('hacluster', node) + + +def _init_ssh_for_secondary_user_on_remote_nodes( + cluster_shell: sh.ClusterShell, + authorized_key_manager: ssh_key.AuthorizedKeyManager, + nodes: typing.Iterable[str], + user: str, +): + """Initialize ssh for another user via an already working ClusterShell.""" + key_file_manager = ssh_key.KeyFileManager(cluster_shell) + local_keys = [ssh_key.KeyFile(path) for path in key_file_manager.list_public_key_for_user(None, user)] + assert local_keys + for node in nodes: + if not sh.SSHShell(cluster_shell.local_shell, user).can_run_as(node, user): + for key in local_keys: + authorized_key_manager.add(node, user, key) + remote_keys = key_file_manager.ensure_key_pair_exists_for_user(node, user) + for key in remote_keys: + authorized_key_manager.add(None, user, key) + def _merge_authorized_keys(keys: typing.List[str]) -> bytes: @@ -929,18 +962,9 @@ def _merge_authorized_keys(keys: typing.List[str]) -> bytes: return buf -def _fetch_core_hosts(local_user, remote_user, remote_host) -> typing.Tuple[typing.List[str], typing.List[str]]: +def _fetch_core_hosts(shell: sh.ClusterShell, remote_host) -> typing.Tuple[typing.List[str], typing.List[str]]: cmd = 'crm options show core.hosts' - result = sh.LocalShell().su_subprocess_run( - local_user, - f'ssh {SSH_OPTION} {remote_user}@{remote_host} sudo /bin/sh', - input=cmd.encode('utf-8'), - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - if result.returncode != 0: - utils.fatal('Failed to run command "{}" on host {}: {}'.format(cmd, remote_host, result.stderr.decode('utf-8'))) - text = result.stdout.decode('utf-8') + text = shell.get_stdout_or_raise_error(cmd, remote_host) match = re.match('core\\.hosts\\s*=\\s*(.*)\\s*', text) if match is None: utils.fatal('Malformed core.hosts from host {}: {}'.format(remote_host, text)) @@ -1704,43 +1728,84 @@ def join_ssh(seed_host, seed_user): """ if not seed_host: utils.fatal("No existing IP/hostname specified (use -c option)") - local_user = _context.current_user - ServiceManager(sh.LocalOnlyClusterShell(sh.LocalShell())).start_service("sshd.service", enable=True) - configure_ssh_key(local_user) - if 0 != utils.ssh_copy_id_no_raise(local_user, seed_user, seed_host): - msg = f"Failed to login to {seed_user}@{seed_host}. Please check the credentials." - sudoer = userdir.get_sudoer() - if sudoer and seed_user != sudoer: - args = ['sudo crm'] - args += [x for x in sys.argv[1:]] - for i, arg in enumerate(args): - if arg == '-c' or arg == '--cluster-node' and i + 1 < len(args): - if '@' not in args[i+1]: - args[i + 1] = f'{sudoer}@{seed_host}' - msg += '\nOr, run "{}".'.format(' '.join(args)) - raise ValueError(msg) - # After this, login to remote_node is passwordless - swap_public_ssh_key(seed_host, local_user, seed_user, local_user, seed_user, add=True) - configure_ssh_key('hacluster') - swap_public_ssh_key(seed_host, 'hacluster', 'hacluster', local_user, seed_user, add=True) + + if _context.use_ssh_agent: + try: + ssh_agent = ssh_key.AgentClient() + keys = ssh_agent.list() + except ssh_key.Error: + logger.error("Cannot get a public key from ssh-agent.") + raise + else: + keys = list() + return join_ssh_impl(local_user, seed_host, seed_user, keys) + + +def join_ssh_impl(local_user, seed_host, seed_user, ssh_public_keys: typing.List[ssh_key.Key]): + ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).start_service("sshd.service", enable=True) + if ssh_public_keys: + local_shell = sh.LocalShell(additional_environ={'SSH_AUTH_SOCK': os.environ.get('SSH_AUTH_SOCK')}) + join_ssh_with_ssh_agent(local_shell, local_user, seed_host, seed_user, ssh_public_keys) + else: + local_shell = sh.LocalShell() + if not ssh_public_keys: + configure_ssh_key(local_user) + if 0 != utils.ssh_copy_id_no_raise(local_user, seed_user, seed_host): + msg = f"Failed to login to {seed_user}@{seed_host}. Please check the credentials." + sudoer = userdir.get_sudoer() + if sudoer and seed_user != sudoer: + args = ['sudo crm'] + args += [x for x in sys.argv[1:]] + for i, arg in enumerate(args): + if arg == '-c' or arg == '--cluster-node' and i + 1 < len(args): + if '@' not in args[i+1]: + args[i + 1] = f'{sudoer}@{seed_host}' + msg += '\nOr, run "{}".'.format(' '.join(args)) + raise ValueError(msg) + # After this, login to remote_node is passwordless + swap_public_ssh_key(seed_host, local_user, seed_user, local_user, seed_user, add=True) + configure_ssh_key('hacluster') + swap_public_ssh_key(seed_host, 'hacluster', 'hacluster', local_user, seed_user, add=True) # This makes sure the seed host has its own SSH keys in its own # authorized_keys file (again, to help with the case where the # user has done manual initial setup without the assistance of # ha-cluster-init). - sh.LocalShell().get_stdout_or_raise_error( - local_user, - "ssh {} {}@{} sudo crm cluster init -i {} ssh_remote".format( - SSH_OPTION, seed_user, seed_host, _context.default_nic_list[0], - ), - ) + if not ssh_public_keys: + local_shell.get_stdout_or_raise_error( + local_user, + "ssh {} {}@{} sudo crm cluster init -i {} ssh_remote".format( + SSH_OPTION, seed_user, seed_host, _context.default_nic_list[0], + ), + ) user_by_host = utils.HostUserConfig() user_by_host.add(seed_user, seed_host) user_by_host.add(local_user, utils.this_node()) + user_by_host.set_no_generating_ssh_key(bool(ssh_public_keys)) user_by_host.save_local() - change_user_shell('hacluster', seed_host) + change_user_shell('hacluster') + + +def join_ssh_with_ssh_agent( + local_shell: sh.LocalShell, + local_user: str, seed_host: str, seed_user: str, + ssh_public_keys: typing.List[ssh_key.Key], +): + # As ssh-agent is used, the local_user does not have any effects + shell = sh.SSHShell(local_shell, 'root') + if not shell.can_run_as(seed_host, seed_user): + raise ValueError(f'Failed to login to {seed_user}@{seed_host}') + if seed_user != 'root' and 0 != shell.subprocess_run_without_input( + seed_host, seed_user, 'sudo true', + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ).returncode: + raise ValueError(f'Failed to sudo on {seed_user}@{seed_host}') + authorized_key_manager = ssh_key.AuthorizedKeyManager(shell) + for key in ssh_public_keys: + authorized_key_manager.add(None, local_user, key) def swap_public_ssh_key( @@ -1850,16 +1915,16 @@ def join_ssh_merge(cluster_node, remote_user): hosts = list() hosts.append(cluster_node) - # To create local entry in known_hosts - rc, _, _ = utils.get_stdout_stderr_as_local_sudoer("ssh {} {} true".format(SSH_OPTION, utils.this_node())) - assert rc == 0 + shell = sh.cluster_shell() + # create local entry in known_hosts + shell.ssh_to_localhost(None, 'true') known_hosts_new = set() cat_cmd = "[ -e ~/.ssh/known_hosts ] && cat ~/.ssh/known_hosts || true" #logger_utils.log_only_to_file("parallax.call {} : {}".format(hosts, cat_cmd)) for host in hosts: - known_hosts_content = sh.cluster_shell().get_stdout_or_raise_error(cat_cmd, host) + known_hosts_content = shell.get_stdout_or_raise_error(cat_cmd, host) if known_hosts_content: known_hosts_new.update((utils.to_ascii(known_hosts_content) or "").splitlines()) @@ -1957,9 +2022,8 @@ def setup_passwordless_with_other_nodes(init_node, remote_user): """ # Fetch cluster nodes list local_user = _context.current_user - shell = sh.LocalShell() - cmd = f'ssh {SSH_OPTION} {remote_user}@{init_node} sudo crm_node -l' - rc, out, err = shell.get_rc_stdout_stderr(local_user, cmd) + shell = sh.cluster_shell() + rc, out, err = shell.get_rc_stdout_stderr_without_input(init_node, 'crm_node -l') if rc != 0: utils.fatal("Can't fetch cluster nodes list from {}: {}".format(init_node, err)) cluster_nodes_list = [] @@ -1982,7 +2046,7 @@ def setup_passwordless_with_other_nodes(init_node, remote_user): user_by_host = utils.HostUserConfig() user_by_host.add(local_user, utils.this_node()) try: - user_list, host_list = _fetch_core_hosts(local_user, remote_user, init_node) + user_list, host_list = _fetch_core_hosts(shell, init_node) for user, host in zip(user_list, host_list): user_by_host.add(user, host) except ValueError: @@ -1991,20 +2055,22 @@ def setup_passwordless_with_other_nodes(init_node, remote_user): user_by_host.save_local() # Filter out init node from cluster_nodes_list - cmd = "ssh {} {}@{} hostname".format(SSH_OPTION, remote_user , init_node) - rc, out, err = shell.get_rc_stdout_stderr(local_user, cmd) + rc, out, err = shell.get_rc_stdout_stderr_without_input(init_node, 'hostname') if rc != 0: utils.fatal("Can't fetch hostname of {}: {}".format(init_node, err)) # Swap ssh public key between join node and other cluster nodes - for node in (node for node in cluster_nodes_list if node != out): - remote_user_to_swap = utils.user_of(node) - remote_privileged_user = remote_user_to_swap - utils.ssh_copy_id(local_user, remote_privileged_user, node) - swap_public_ssh_key(node, local_user, remote_user_to_swap, local_user, remote_privileged_user) + if not _context.use_ssh_agent: + for node in (node for node in cluster_nodes_list if node != out): + remote_user_to_swap = utils.user_of(node) + remote_privileged_user = remote_user_to_swap + utils.ssh_copy_id(local_user, remote_privileged_user, node) + swap_public_ssh_key(node, local_user, remote_user_to_swap, local_user, remote_privileged_user) + if local_user != 'hacluster': + change_user_shell('hacluster', node) + swap_public_ssh_key(node, 'hacluster', 'hacluster', local_user, remote_privileged_user, add=True) if local_user != 'hacluster': - change_user_shell('hacluster', node) - swap_public_ssh_key(node, 'hacluster', 'hacluster', local_user, remote_privileged_user, add=True) - if local_user != 'hacluster': + swap_key_for_hacluster(cluster_nodes_list) + else: swap_key_for_hacluster(cluster_nodes_list) user_by_host.save_remote(cluster_nodes_list) @@ -2197,7 +2263,7 @@ def update_nodeid(nodeid, node=None): if is_qdevice_configured: start_qdevice_on_join_node(seed_host) else: - ServiceManager(sh.LocalOnlyClusterShell(sh.LocalShell())).disable_service("corosync-qdevice.service") + ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).disable_service("corosync-qdevice.service") def adjust_priority_in_rsc_defaults(is_2node_wo_qdevice): @@ -2246,7 +2312,7 @@ def start_qdevice_on_join_node(seed_host): qnetd_addr = corosync.get_value("quorum.device.net.host") qdevice_inst = qdevice.QDevice(qnetd_addr, cluster_node=seed_host) qdevice_inst.certificate_process_on_join() - ServiceManager(sh.LocalOnlyClusterShell(sh.LocalShell())).start_service("corosync-qdevice.service", enable=True) + ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).start_service("corosync-qdevice.service", enable=True) def get_cluster_node_ip(node: str) -> str: @@ -2402,7 +2468,7 @@ def bootstrap_init(context): _context.cluster_node = args[1] if stage and _context.cluster_is_running and \ - not ServiceManager(shell=sh.LocalOnlyClusterShell(sh.LocalShell())).service_is_active(CSYNC2_SERVICE): + not ServiceManager(shell=sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).service_is_active(CSYNC2_SERVICE): _context.skip_csync2 = True _context.node_list_in_cluster = utils.list_cluster_nodes() elif not _context.cluster_is_running: @@ -2449,12 +2515,16 @@ def bootstrap_add(context): options += '-i {} '.format(nic) options = " {}".format(options.strip()) if options else "" + if context.use_ssh_agent: + options += ' --use-ssh-agent' + + shell = sh.ClusterShell(sh.LocalShell(), UserOfHost.instance(), _context.use_ssh_agent) for (user, node) in (_parse_user_at_host(x, _context.current_user) for x in _context.user_at_node_list): print() logger.info("Adding node {} to cluster".format(node)) cmd = 'crm cluster join -y {} -c {}@{}'.format(options, _context.current_user, utils.this_node()) logger.info("Running command on {}: {}".format(node, cmd)) - out = sh.cluster_shell().get_stdout_or_raise_error(cmd, node) + out = shell.get_stdout_or_raise_error(cmd, node) print(out) @@ -2470,7 +2540,7 @@ def bootstrap_join(context): check_tty() - corosync_active = ServiceManager(sh.LocalOnlyClusterShell(sh.LocalShell())).service_is_active("corosync.service") + corosync_active = ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).service_is_active("corosync.service") if corosync_active and _context.stage != "ssh": utils.fatal("Abort: Cluster is currently active. Run this command on a node joining the cluster.") @@ -2849,7 +2919,7 @@ def bootstrap_arbitrator(context): utils.fatal("Failed to copy {} from {}".format(BOOTH_CFG, _context.cluster_node)) # TODO: verify that the arbitrator IP in the configuration is us? logger.info("Enabling and starting the booth arbitrator service") - ServiceManager(sh.LocalOnlyClusterShell(sh.LocalShell())).start_service("booth@booth", enable=True) + ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).start_service("booth@booth", enable=True) def get_stonith_timeout_generally_expected(): diff --git a/crmsh/config.py b/crmsh/config.py index 7954e06b8d..54fdbaea15 100644 --- a/crmsh/config.py +++ b/crmsh/config.py @@ -239,6 +239,7 @@ def get(self, value): 'pager': opt_program('PAGER', ('less', 'more', 'pg')), 'user': opt_string(''), 'hosts': opt_list([]), # 'alice@host1, bob@host2' + 'no_generating_ssh_key': opt_boolean('no'), 'skill_level': opt_choice('expert', ('operator', 'administrator', 'expert')), 'sort_elements': opt_boolean('yes'), 'check_frequency': opt_choice('always', ('always', 'on-verify', 'never')), diff --git a/crmsh/sh.py b/crmsh/sh.py index 27ce1a24c3..bb3a7fcd69 100644 --- a/crmsh/sh.py +++ b/crmsh/sh.py @@ -21,6 +21,7 @@ import logging import os import pwd +import re import socket import subprocess import typing @@ -95,15 +96,18 @@ def geteuid() -> int: def get_effective_user_name() -> str: return pwd.getpwuid(LocalShell.geteuid()).pw_name + def __init__(self, additional_environ: typing.Dict[str, str] = None): + self.additional_environ = additional_environ + self.preserve_env = additional_environ.keys() if additional_environ is not None else None + def can_run_as(self, user: str): return self.geteuid() == 0 or self.get_effective_user_name() == user def su_subprocess_run( self, - user: str, + user: typing.Optional[str], cmd: str, tty=False, - preserve_env: typing.Optional[typing.List[str]] = None, **kwargs, ): """Call subprocess.run as another user. @@ -111,24 +115,30 @@ def su_subprocess_run( This variant is the most flexible one as it pass unknown kwargs to the underlay subprocess.run. However, it accepts only cmdline but not argv, as the argv is used internally to switch user. """ - if self.get_effective_user_name() == user: + if user is None or self.get_effective_user_name() == user: args = ['/bin/sh', '-c', cmd] elif 0 == self.geteuid(): args = ['su', user, '--login', '-c', cmd] if tty: args.append('--pty') - if preserve_env: + if self.preserve_env: args.append('-w') - args.append(','.join(preserve_env)) + args.append(','.join(self.preserve_env)) else: raise AuthorizationError( cmd, None, user, f"non-root user '{self.get_effective_user_name()}' cannot switch to another user" ) - logger.debug('su_subprocess_run: %s, %s', args, kwargs) - return subprocess.run(args, **kwargs) + if not self.additional_environ: + logger.debug('su_subprocess_run: %s, %s', args, kwargs) + return subprocess.run(args, **kwargs) + else: + logger.debug('su_subprocess_run: %s, env=%s, %s', args, self.additional_environ, kwargs) + env = dict(os.environ) + env.update(self.additional_environ) + return subprocess.run(args, env=env, **kwargs) - def get_rc_stdout_stderr_raw(self, user: str, cmd: str, input: typing.Optional[bytes] = None): + def get_rc_stdout_stderr_raw(self, user: typing.Optional[str], cmd: str, input: typing.Optional[bytes] = None): result = self.su_subprocess_run( user, cmd, input=input, @@ -137,13 +147,13 @@ def get_rc_stdout_stderr_raw(self, user: str, cmd: str, input: typing.Optional[b ) return result.returncode, result.stdout, result.stderr - def get_rc_stdout_stderr(self, user: str, cmd: str, input: typing.Optional[str] = None): + def get_rc_stdout_stderr(self, user: typing.Optional[str], cmd: str, input: typing.Optional[str] = None): rc, stdout, stderr = self.get_rc_stdout_stderr_raw(user, cmd, input.encode('utf-8') if input is not None else None) return rc, Utils.decode_str(stdout).strip(), Utils.decode_str(stderr).strip() def get_rc_and_error( self, - user: str, + user: typing.Optional[str], cmd: str, ) -> typing.Tuple[int, typing.Optional[str]]: """Run a command for its side effects. Returns (rc, error_message) @@ -151,17 +161,8 @@ def get_rc_and_error( If the return code is 0, outputs from the command will be ignored and (0, None) is returned. If the return code is not 0, outputs from the stdout and stderr is combined as a single message. """ - if self.get_effective_user_name() == user: - args = ['/bin/sh', '-c', cmd] - elif self.geteuid() == 0: - args = ['su', user, '--login', '-c', cmd] - else: - raise AuthorizationError( - cmd, None, user, - f"non-root user '{self.get_effective_user_name()}' cannot switch to another user" - ) - result = subprocess.run( - args, + result = self.su_subprocess_run( + user, cmd, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, @@ -174,7 +175,7 @@ def get_rc_and_error( def get_stdout_or_raise_error( self, - user: str, + user: typing.Optional[str], cmd: str, success_exit_status: typing.Optional[typing.Set[int]] = None, ): @@ -206,6 +207,8 @@ def __init__(self, local_shell: LocalShell, local_user): self.local_user = local_user def can_run_as(self, host: typing.Optional[str], user: str) -> bool: + # This method does not call subprocess_run_without_input. The reason may be some of the callers expect that ssh + # is used even if the destination host is localhost. if host is None or host == self.local_shell.hostname(): return self.local_shell.can_run_as(user) else: @@ -263,28 +266,74 @@ class ClusterShell: For remote nodes, the local and remote user used for SSH sessions are determined from cluster configuration recorded during bootstrap. """ - def __init__(self, local_shell: LocalShell, user_of_host: UserOfHost): + def __init__( + self, + local_shell: LocalShell, + user_of_host: UserOfHost, + forward_ssh_agent: bool = False, + raise_ssh_error: bool = False, # whether to raise AuthorizationError when ssh returns with 255 + ): self.local_shell = local_shell self.user_of_host = user_of_host + self.forward_ssh_agent = forward_ssh_agent + self.raise_ssh_error = raise_ssh_error + + def can_run_as(self, host: typing.Optional[str], user: str) -> bool: + result = self.subprocess_run_without_input( + host, user, 'true', + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + return 0 == result.returncode def subprocess_run_without_input(self, host: typing.Optional[str], user: typing.Optional[str], cmd: str, **kwargs): assert 'input' not in kwargs and 'stdin' not in kwargs if host is None or host == self.local_shell.hostname(): - return subprocess.run( - ['/bin/sh'], - input=cmd.encode('utf-8'), - **kwargs, - ) + if user is None: + return subprocess.run( + ['/bin/sh'], + input=cmd.encode('utf-8'), + **kwargs, + ) + else: + return self.local_shell.su_subprocess_run( + user, cmd, + **kwargs, + ) else: if user is None: user = 'root' local_user, remote_user = self.user_of_host.user_pair_for_ssh(host) - return self.local_shell.su_subprocess_run( + result = self.local_shell.su_subprocess_run( local_user, - 'ssh {} {}@{} sudo -H -u {} /bin/sh'.format(constants.SSH_OPTION, remote_user, host, user), + 'ssh {} {} {}@{} sudo -H -u {} {} /bin/sh'.format( + '-A' if self.forward_ssh_agent else '', + constants.SSH_OPTION, + remote_user, + host, + user, + '--preserve-env=SSH_AUTH_SOCK' if self.forward_ssh_agent else '', + constants.SSH_OPTION, + ), input=cmd.encode('utf-8'), **kwargs, ) + if self.raise_ssh_error and result.returncode == 255: + raise AuthorizationError(cmd, host, remote_user, Utils.decode_str(result.stderr).strip()) + else: + return result + + def get_rc_and_error(self, host: typing.Optional[str], user: str, cmd: str): + result = self.subprocess_run_without_input( + host, user, cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + start_new_session=True, + ) + if result.returncode == 0: + return 0, None + else: + return result.returncode, Utils.decode_str(result.stdout).strip() def get_rc_stdout_stderr_raw_without_input(self, host, cmd) -> typing.Tuple[int, bytes, bytes]: result = self.subprocess_run_without_input( @@ -317,8 +366,34 @@ def get_stdout_or_raise_error( else: raise CommandFailure(cmd, host, None, Utils.decode_str(stderr).strip()) + def ssh_to_localhost(self, user: typing.Optional[str], cmd: str, **kwargs): + if user is None: + user = 'root' + host = self.local_shell.hostname() + local_user, remote_user = self.user_of_host.user_pair_for_ssh(host) + result = self.local_shell.su_subprocess_run( + local_user, + 'ssh {} {} {}@{} sudo -H -u {} {} /bin/sh'.format( + '-A' if self.forward_ssh_agent else '', + constants.SSH_OPTION, + remote_user, + host, + user, + '--preserve-env=SSH_AUTH_SOCK' if self.forward_ssh_agent else '', + constants.SSH_OPTION, + ), + input=cmd.encode('utf-8'), + **kwargs, + ) + if self.raise_ssh_error and result.returncode == 255: + raise AuthorizationError(cmd, host, remote_user, Utils.decode_str(result.stderr).strip()) + else: + return result + class ShellUtils: + CONTROL_CHARACTER_PATTER = re.compile('[\u0000-\u001F]') + @classmethod def get_stdout(cls, cmd, input_s=None, stderr_on=True, shell=True, raw=False): ''' @@ -366,7 +441,7 @@ def get_stdout_stderr(cls, cmd, input_s=None, shell=True, raw=False, no_reg=Fals return proc.returncode, stdout_data.strip(), stderr_data.strip() -class LocalOnlyClusterShell(ClusterShell): +class ClusterShellAdaptorForLocalShell(ClusterShell): """A adaptor to wrap a LocalShell as a ClusterShell. Some modules depend on shell and are called both during bootstrap and after bootstrap. Use a LocalShell as their diff --git a/crmsh/ssh_key.py b/crmsh/ssh_key.py index 5014310a86..dca4359b21 100644 --- a/crmsh/ssh_key.py +++ b/crmsh/ssh_key.py @@ -1,10 +1,11 @@ import logging import os import pwd +import re +import subprocess import tempfile import typing -from crmsh import utils from crmsh import sh @@ -16,6 +17,14 @@ def __init__(self, msg: str): super().__init__(msg) +class AgentNotAvailableError(Error): + pass + + +class NoKeysInAgentError(Error): + pass + + class Key: def public_key(self) -> str: raise NotImplementedError @@ -38,6 +47,14 @@ def public_key(self) -> str: return self._public_key +class InMemoryPublicKey(Key): + def __init__(self, content: str): + self.content = content + + def public_key(self) -> str: + return self.content + + class AuthorizedKeyManager: def __init__(self, shell: sh.SSHShell): self._shell = shell @@ -49,9 +66,7 @@ def add(self, host: typing.Optional[str], user: str, key: Key): self._add_remote(host, user, key) def _add_local(self, user: str, key: Key): - public_key = key.public_key() - file = f'~{user}/.ssh/authorized_keys' - cmd = f'''grep "{public_key}" {file} > /dev/null || sed -i '$a {public_key}' {file}''' + cmd = self._add_by_editing_file(user, key) rc, output = self._shell.local_shell.get_rc_and_error(user, cmd) if rc != 0: # unlikely @@ -59,25 +74,142 @@ def _add_local(self, user: str, key: Key): def _add_remote(self, host: str, user: str, key: Key): if self._shell.can_run_as(host, user): - rc, _ = self._shell.get_rc_and_error( - host, user, - f"grep '{key.public_key()}' ~{user}/.ssh/authorized_key > /dev/null", - ) - if rc == 0: - return - if isinstance(key, KeyFile) and key.public_key_file() is not None: + shell_user = user + elif self._shell.can_run_as(host, 'root'): + shell_user = 'root' + else: + shell_user = None + if shell_user is not None: + cmd = self._add_by_editing_file(user, key) + rc, msg = self._shell.get_rc_and_error(host, shell_user, cmd) + if rc != 0: + raise Error(f'Failed configuring SSH passwordless with {user}@{host}: {msg}') + else: user_info = pwd.getpwnam(user) - if os.stat(key.public_key_file()).st_uid == user_info.pw_uid: - cmd = "ssh-copy-id -f -i '{}' '{}@{}' &> /dev/null".format(key.public_key_file(), user, host) - logger.info("Configuring SSH passwordless with %s@%s", user, host) - result = self._shell.local_shell.su_subprocess_run(self._shell.local_user, cmd, tty=True, preserve_env=['SSH_AUTH_SOCK']) + if isinstance(key, KeyFile) and key.public_key_file() is not None: + if os.stat(key.public_key_file()).st_uid == user_info.pw_uid: + self._add_by_ssh_copy_id(user, host, key.public_key_file()) + else: + with tempfile.NamedTemporaryFile('w', encoding='utf-8', suffix='.pub') as tmp: + os.chown(tmp.fileno(), user_info.pw_uid, user_info.pw_gid) + print(key.public_key(), file=tmp) + tmp.flush() + self._add_by_ssh_copy_id(user, host, tmp.name) else: - with tempfile.NamedTemporaryFile('w', encoding='utf-8') as tmp: + with tempfile.NamedTemporaryFile('w', encoding='utf-8', suffix='.pub') as tmp: os.chown(tmp.fileno(), user_info.pw_uid, user_info.pw_gid) print(key.public_key(), file=tmp) - cmd = "ssh-copy-id -f -i '{}' '{}@{}' &> /dev/null".format(tmp.name, user, host) - logger.info("Configuring SSH passwordless with %s@%s", user, host) - result = self._shell.local_shell.su_subprocess_run(self._shell.local_user, cmd, tty=True) - if result.returncode != 0: - raise Error(f'Failed configuring SSH passwordless with {user}@{host}.') - # TODO: error handling + tmp.flush() + self._add_by_ssh_copy_id(user, host, tmp.name) + + @classmethod + def _add_by_editing_file(cls, user: str, key: Key): + public_key = key.public_key() + dir = f'~{user}/.ssh' + file = f'{dir}/authorized_keys' + cmd = f'''if ! grep '{public_key}' {file} > /dev/null; then + if [ -s {file} ]; then + sed -i '$a {public_key}' {file} + else + mkdir -p {dir} + chmod 0700 {dir} + echo '{public_key}' > {file} + chmod 0600 {file} + fi +fi''' + return cmd + + def _add_by_ssh_copy_id(self, user, host, key_path): + cmd = "ssh-copy-id -f -i '{}' '{}@{}' &> /dev/null".format(key_path, user, host) + logger.info("Configuring SSH passwordless with %s@%s", user, host) + result = self._shell.local_shell.su_subprocess_run( + self._shell.local_user, cmd, + tty=True, + ) + if result.returncode != 0: + raise Error(f'Failed configuring SSH passwordless with {user}@{host}.') + + +class AgentClient: + def __init__(self, socket_path: typing.Optional[str] = None): + if socket_path is None: + if 'SSH_AUTH_SOCK' not in os.environ: + raise AgentNotAvailableError("ssh-agent is not available.") + self.socket_path = None + else: + self.socket_path = socket_path + self.shell = sh.LocalShell(additional_environ={'SSH_AUTH_SOCK': self.socket_path} if self.socket_path else None) + + def list(self) -> typing.List[Key]: + cmd = 'ssh-add -L' + rc, stdout, stderr = self.shell.get_rc_stdout_stderr(None, cmd) + if rc == 1: + raise NoKeysInAgentError(stderr) + elif rc == 2: + raise AgentNotAvailableError(stderr) + elif rc != 0: + raise sh.CommandFailure(cmd, None, None, stderr) + return [InMemoryPublicKey(line) for line in stdout.splitlines()] + + +class KeyFileManager: + KNOWN_KEY_TYPES = ['rsa', 'ed25519', 'ecdsa'] # dsa is not listed here as it is not so secure + KNOWN_PUBLIC_KEY_FILENAME_PATTERN = re.compile('/id_(?:{})\\.pub$'.format('|'.join(KNOWN_KEY_TYPES))) + + def __init__(self, shell: sh.ClusterShell): + self.cluster_shell = sh.ClusterShell(shell.local_shell, shell.user_of_host, raise_ssh_error=True) + + def list_public_key_for_user(self, host: typing.Optional[str], user: str) -> typing.List[str]: + result = self.cluster_shell.subprocess_run_without_input( + host, user, + f'ls ~/.ssh/id_*.pub', + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if result.returncode != 0: + return list() + return [ + filename + for filename in sh.Utils.decode_str(result.stdout).splitlines() + if self.KNOWN_PUBLIC_KEY_FILENAME_PATTERN.search(filename) + ] + + def load_public_keys_for_user(self, host: typing.Optional[str], user: str) -> typing.List[InMemoryPublicKey]: + filenames = self.list_public_key_for_user(host, user) + if not filenames: + return list() + cmd = f'cat ~{user}/.ssh/{{{",".join(filenames)}}}' + result = self.cluster_shell.subprocess_run_without_input( + host, user, + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if result.returncode != 0: + raise sh.CommandFailure(cmd, host, user, sh.Utils.decode_str(result.stderr).strip()) + return [InMemoryPublicKey(line) for line in sh.Utils.decode_str(result.stdout).splitlines()] + + def ensure_key_pair_exists_for_user(self, host: typing.Optional[str], user: str) -> typing.List[InMemoryPublicKey]: + script = '''if [ ! \\( {condition} \\) ]; then + ssh-keygen -t rsa -f ~/.ssh/id_rsa -q -C 'Cluster internal on {host}' -N '' <> /dev/null +fi +for file in ~/.ssh/id_{{{pattern}}}.pub; do + if [ -f "$file" ]; then cat "$file"; fi +done +'''.format( + condition=' -o '.join([f'-f ~/.ssh/id_{t}' for t in self.KNOWN_KEY_TYPES]), + host=host, + pattern=','.join(self.KNOWN_KEY_TYPES), + ) + result = self.cluster_shell.subprocess_run_without_input( + host, user, + script, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + start_new_session=True, + ) + if result.returncode != 0: + print(script) + print(result.stdout) + raise sh.CommandFailure(f'Script({script[:16]}...) failed. rc = {result.returncode}', host, user, sh.Utils.decode_str(result.stderr).strip()) + return [InMemoryPublicKey(line) for line in sh.Utils.decode_str(result.stdout).splitlines()] diff --git a/crmsh/ui_cluster.py b/crmsh/ui_cluster.py index ddf3d45cb0..3ecb6c8551 100644 --- a/crmsh/ui_cluster.py +++ b/crmsh/ui_cluster.py @@ -63,7 +63,7 @@ def script_args(args): def get_cluster_name(): cluster_name = None - if not ServiceManager(sh.LocalOnlyClusterShell(sh.LocalShell())).service_is_active("corosync.service"): + if not ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).service_is_active("corosync.service"): name = corosync.get_values('totem.cluster_name') if name: cluster_name = name[0] @@ -381,6 +381,8 @@ def looks_like_hostnames(lst): help="Skip csync2 initialization (an experimental option)") parser.add_argument("--no-overwrite-sshkey", action="store_true", dest="no_overwrite_sshkey", help='Avoid "/root/.ssh/id_rsa" overwrite if "-y" option is used (False by default; Deprecated)') + parser.add_argument('--use-ssh-agent', action='store_true', dest='use_ssh_agent', + help="Use an existing key from ssh-agent instead of creating new key pairs") network_group = parser.add_argument_group("Network configuration", "Options for configuring the network and messaging layer.") network_group.add_argument("-i", "--interface", dest="nic_list", metavar="IF", action=CustomAppendAction, choices=utils.interface_choice(), default=[], @@ -450,7 +452,7 @@ def looks_like_hostnames(lst): boot_context.ui_context = context boot_context.stage = stage boot_context.args = args - boot_context.cluster_is_running = ServiceManager(sh.LocalOnlyClusterShell(sh.LocalShell())).service_is_active("pacemaker.service") + boot_context.cluster_is_running = ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).service_is_active("pacemaker.service") boot_context.type = "init" boot_context.initialize_qdevice() boot_context.validate_option() @@ -497,6 +499,8 @@ def do_join(self, context, *args): "-c", "--cluster-node", metavar="[USER@]HOST", dest="cluster_node", help="User and host to login to an existing cluster node. The host can be specified with either a hostname or an IP.", ) + network_group.add_argument('--use-ssh-agent', action='store_true', dest='use_ssh_agent', + help="Use an existing key from ssh-agent instead of creating new key pairs") network_group.add_argument("-i", "--interface", dest="nic_list", metavar="IF", action=CustomAppendAction, choices=utils.interface_choice(), default=[], help="Bind to IP address on interface IF. Use -i second time for second interface") options, args = parse_options(parser, args) @@ -563,7 +567,7 @@ def do_rename(self, context, new_name): ''' Rename the cluster. ''' - if not ServiceManager(sh.LocalOnlyClusterShell(sh.LocalShell())).service_is_active("corosync.service"): + if not ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).service_is_active("corosync.service"): context.fatal_error("Can't rename cluster when cluster service is stopped") old_name = cib_factory.get_property('cluster-name') if old_name and new_name == old_name: diff --git a/crmsh/ui_corosync.py b/crmsh/ui_corosync.py index ae0c8da3e8..804023ad51 100644 --- a/crmsh/ui_corosync.py +++ b/crmsh/ui_corosync.py @@ -62,7 +62,7 @@ def do_status(self, context, status_type="ring"): ''' Quick cluster health status. Corosync status or QNetd status ''' - if not ServiceManager(sh.LocalOnlyClusterShell(sh.LocalShell())).service_is_active("corosync.service"): + if not ServiceManager(sh.ClusterShellAdaptorForLocalShell(sh.LocalShell())).service_is_active("corosync.service"): logger.error("corosync.service is not running!") return False diff --git a/crmsh/user_of_host.py b/crmsh/user_of_host.py index 5cea291343..4b559cb04b 100644 --- a/crmsh/user_of_host.py +++ b/crmsh/user_of_host.py @@ -47,7 +47,7 @@ def user_pair_for_ssh(self, host: str) -> typing.Tuple[str, str]: local_user = None remote_user = None try: - local_user = self.user_of(self.this_node()) + local_user = 'root' if self._use_ssh_agent() else self.user_of(self.this_node()) remote_user = self.user_of(host) return local_user, remote_user except UserNotFoundError: @@ -67,6 +67,9 @@ def user_pair_for_ssh(self, host: str) -> typing.Tuple[str, str]: else: return cached + @staticmethod + def _use_ssh_agent() -> bool: + return config.get_option('core', 'no_generating_ssh_key') @staticmethod def _get_user_of_host_from_config(host): diff --git a/crmsh/utils.py b/crmsh/utils.py index cea3d0525c..03812c85f3 100644 --- a/crmsh/utils.py +++ b/crmsh/utils.py @@ -936,23 +936,6 @@ def pipe_cmd_nosudo(cmd): return rc -def get_stdout_stderr(cmd, input_s=None, shell=True, raw=False, no_reg=False): - ''' - Run a cmd, return (rc, stdout, stderr) - ''' - if options.regression_tests and not no_reg: - print(".EXT", cmd) - proc = subprocess.Popen(cmd, - shell=shell, - stdin=input_s and subprocess.PIPE or None, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout_data, stderr_data = proc.communicate(input_s) - if raw: - return proc.returncode, stdout_data, stderr_data - return proc.returncode, to_ascii(stdout_data).strip(), to_ascii(stderr_data).strip() - - def get_stdout_stderr_as_local_sudoer(cmd, input_s=None): try: user = user_of(this_node()) @@ -3168,9 +3151,14 @@ class HostUserConfig: """ def __init__(self): self._hosts_users = dict() + self._no_generating_ssh_key = False self.load() def load(self): + self._load_hosts_users() + self._load_no_generating_ssh_key() + + def _load_hosts_users(self): users = list() hosts = list() li = config.get_option('core', 'hosts') @@ -3185,13 +3173,16 @@ def load(self): hosts.append(parts[1]) self._hosts_users = {host: user for user, host in zip(users, hosts)} + def _load_no_generating_ssh_key(self): + self._no_generating_ssh_key = config.get_option('core', 'no_generating_ssh_key') + def save_local(self): value = [f'{user}@{host}' for host, user in sorted(self._hosts_users.items(), key=lambda x: x[0])] config.set_option('core', 'hosts', value) + config.set_option('core', 'no_generating_ssh_key', self._no_generating_ssh_key) debug_on = config.get_option('core', 'debug') if debug_on: config.set_option('core', 'debug', 'false') - # TODO: it is saved in ~root/.config/crm/crm.conf, is it as suitable path? config.save() if debug_on: config.set_option('core', 'debug', 'true') @@ -3200,6 +3191,9 @@ def save_remote(self, remote_hosts: typing.Iterable[str]): self.save_local() value = [f'{user}@{host}' for host, user in sorted(self._hosts_users.items(), key=lambda x: x[0])] crmsh.parallax.parallax_call(remote_hosts, "crm options set core.hosts '{}'".format(', '.join(value))) + crmsh.parallax.parallax_call(remote_hosts, "crm options set core.no_generating_ssh_key '{}'".format( + 'yes' if self._no_generating_ssh_key else 'no' + )) def get(self, host): return self._hosts_users[host] @@ -3207,6 +3201,12 @@ def get(self, host): def add(self, user, host): self._hosts_users[host] = user + def set_no_generating_ssh_key(self, value: bool): + self._no_generating_ssh_key = value + + def get_no_generating_ssh_key(self) -> bool: + return self._no_generating_ssh_key + def parse_user_at_host(s: str): i = s.find('@') if i == -1: From 0b866ef59cb65bc318b3606ec6684a2b3616b078 Mon Sep 17 00:00:00 2001 From: nicholasyang Date: Wed, 11 Oct 2023 13:28:11 +0800 Subject: [PATCH 02/14] Dev: bootstrap: implement ssh-agent support for qdevice (jsc#PED-5774) --- crmsh/bootstrap.py | 14 ++++++++++---- crmsh/prun/prun.py | 30 ++++++++++++++++++------------ crmsh/user_of_host.py | 4 ++-- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index f0e0b67aa1..4e4b9da78f 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -872,6 +872,8 @@ def init_ssh_impl(local_user: str, ssh_public_keys: typing.List[ssh_key.Key], us configure_ssh_key(local_user) configure_ssh_key('hacluster') + user_by_host = utils.HostUserConfig() + user_by_host.set_no_generating_ssh_key(bool(ssh_public_keys)) if user_node_list: print() if ssh_public_keys: @@ -887,12 +889,9 @@ def init_ssh_impl(local_user: str, ssh_public_keys: typing.List[ssh_key.Key], us raise ValueError(f'Failed to sudo on {user}@{node}') else: _init_ssh_on_remote_nodes(local_user, user_node_list) - user_by_host = utils.HostUserConfig() for user, node in user_node_list: user_by_host.add(user, node) user_by_host.add(local_user, utils.this_node()) - user_by_host.set_no_generating_ssh_key(bool(ssh_public_keys)) - user_by_host.save_remote([node for user, node in user_node_list]) for user, node in user_node_list: change_user_shell('hacluster', node) # Starting from here, ClusterShell is available @@ -903,6 +902,7 @@ def init_ssh_impl(local_user: str, ssh_public_keys: typing.List[ssh_key.Key], us [node for user, node in user_node_list], 'hacluster', ) + user_by_host.save_remote([node for user, node in user_node_list]) def _init_ssh_on_remote_nodes( @@ -1680,7 +1680,13 @@ def init_qdevice(): local_user = userdir.getuser() ssh_user = local_user # Configure ssh passwordless to qnetd if detect password is needed - if utils.check_ssh_passwd_need(local_user, ssh_user, qnetd_addr): + if UserOfHost.instance().use_ssh_agent(): + for key in ssh_key.AgentClient().list(): + ssh_key.AuthorizedKeyManager(sh.SSHShell( + sh.LocalShell(additional_environ={'SSH_AUTH_SOCK': os.environ.get('SSH_AUTH_SOCK')}), + 'root', + )).add(qnetd_addr, ssh_user, key) + elif utils.check_ssh_passwd_need(local_user, ssh_user, qnetd_addr): configure_ssh_key(local_user) if 0 != utils.ssh_copy_id_no_raise(local_user, ssh_user, qnetd_addr): msg = f"Failed to login to {ssh_user}@{qnetd_addr}. Please check the credentials." diff --git a/crmsh/prun/prun.py b/crmsh/prun/prun.py index 157345b6de..8ca5d0e188 100644 --- a/crmsh/prun/prun.py +++ b/crmsh/prun/prun.py @@ -151,14 +151,17 @@ def pcopy_to_remote( script = "put {} '{}' '{}'\n".format(flags, src, dst) ssh = None try: - ssh = tempfile.NamedTemporaryFile('w', encoding='utf-8', delete=False) - os.fchmod(ssh.fileno(), 0o700) # sftp -S does not parse args, it accepts only a single executable. So we create one. - ssh.write(f'''#!/bin/sh + if local_sudoer == crmsh.userdir.getuser(): + tasks = [_build_copy_task('', script, host) for host in hosts] + else: + ssh = tempfile.NamedTemporaryFile('w', encoding='utf-8', delete=False) + os.fchmod(ssh.fileno(), 0o700) + ssh.write(f'''#!/bin/sh exec sudo -u {local_sudoer} ssh "$@"''') # It is necessary to close the file before executing, or we will get an EBUSY. - ssh.close() - tasks = [_build_copy_task("-S '{}'".format(ssh.name), script, host) for host in hosts] + ssh.close() + tasks = [_build_copy_task("-S '{}'".format(ssh.name), script, host) for host in hosts] runner = Runner(concurrency) for task in tasks: runner.add_task(task) @@ -209,13 +212,16 @@ def pfetch_from_remote( local_sudoer, _ = UserOfHost.instance().user_pair_for_ssh(hosts[0]) ssh = None try: - ssh = tempfile.NamedTemporaryFile('w', encoding='utf-8', delete=False) - os.fchmod(ssh.fileno(), 0o700) - ssh.write(f'''#!/bin/sh -exec sudo -u {local_sudoer} ssh "$@"''') - # It is necessary to close the file before executing - ssh.close() - tasks = [_build_fetch_task("-S '{}'".format(ssh.name), host, src, dst, flags) for host in hosts] + if local_sudoer == crmsh.userdir.getuser(): + tasks = [_build_fetch_task('', host, src, dst, flags) for host in hosts] + else: + ssh = tempfile.NamedTemporaryFile('w', encoding='utf-8', delete=False) + os.fchmod(ssh.fileno(), 0o700) + ssh.write(f'''#!/bin/sh + exec sudo -u {local_sudoer} ssh "$@"''') + # It is necessary to close the file before executing + ssh.close() + tasks = [_build_fetch_task("-S '{}'".format(ssh.name), host, src, dst, flags) for host in hosts] runner = Runner(concurrency) for task in tasks: runner.add_task(task) diff --git a/crmsh/user_of_host.py b/crmsh/user_of_host.py index 4b559cb04b..6c0ed28fd3 100644 --- a/crmsh/user_of_host.py +++ b/crmsh/user_of_host.py @@ -47,7 +47,7 @@ def user_pair_for_ssh(self, host: str) -> typing.Tuple[str, str]: local_user = None remote_user = None try: - local_user = 'root' if self._use_ssh_agent() else self.user_of(self.this_node()) + local_user = 'root' if self.use_ssh_agent() else self.user_of(self.this_node()) remote_user = self.user_of(host) return local_user, remote_user except UserNotFoundError: @@ -68,7 +68,7 @@ def user_pair_for_ssh(self, host: str) -> typing.Tuple[str, str]: return cached @staticmethod - def _use_ssh_agent() -> bool: + def use_ssh_agent() -> bool: return config.get_option('core', 'no_generating_ssh_key') @staticmethod From 9940918b15711135883636d8c277a75f1ec47584 Mon Sep 17 00:00:00 2001 From: nicholasyang Date: Wed, 11 Oct 2023 14:44:58 +0800 Subject: [PATCH 03/14] Dev: bootstrap: refine key swap for user `hacluster` Newly implemented KeyFileManager and AuthorizedKeyManager allow faster key swap for user `hacluster`. --- crmsh/bootstrap.py | 94 ++++++++++++++++++++++---------------------- crmsh/healthcheck.py | 10 ++--- crmsh/sh.py | 2 +- crmsh/ssh_key.py | 14 ++++--- 4 files changed, 60 insertions(+), 60 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 4e4b9da78f..eb957bc451 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -871,6 +871,7 @@ def init_ssh_impl(local_user: str, ssh_public_keys: typing.List[ssh_key.Key], us else: configure_ssh_key(local_user) configure_ssh_key('hacluster') + change_user_shell('hacluster') user_by_host = utils.HostUserConfig() user_by_host.set_no_generating_ssh_key(bool(ssh_public_keys)) @@ -892,8 +893,7 @@ def init_ssh_impl(local_user: str, ssh_public_keys: typing.List[ssh_key.Key], us for user, node in user_node_list: user_by_host.add(user, node) user_by_host.add(local_user, utils.this_node()) - for user, node in user_node_list: - change_user_shell('hacluster', node) + user_by_host.save_local() # Starting from here, ClusterShell is available shell = sh.ClusterShell(local_shell, UserOfHost.instance()) authorized_key_manager = ssh_key.AuthorizedKeyManager(shell) @@ -902,7 +902,9 @@ def init_ssh_impl(local_user: str, ssh_public_keys: typing.List[ssh_key.Key], us [node for user, node in user_node_list], 'hacluster', ) - user_by_host.save_remote([node for user, node in user_node_list]) + for user, node in user_node_list: + change_user_shell('hacluster', node) + user_by_host.save_remote([node for user, node in user_node_list]) def _init_ssh_on_remote_nodes( @@ -1031,24 +1033,10 @@ def configure_ssh_key(user): """ change_user_shell(user) shell = sh.LocalShell() - - cmd = "" - private_key, public_key, authorized_file = key_files(user).values() - - if not utils.detect_file(private_key): - logger.info("SSH key for {} does not exist, hence generate it now".format(user)) - cmd = "ssh-keygen -q -f {} -C 'Cluster Internal on {}' -N ''".format(private_key, utils.this_node()) - elif not utils.detect_file(public_key): - cmd = "ssh-keygen -y -f {} > {}".format(private_key, public_key) - - if cmd: - shell.get_stdout_or_raise_error(user, cmd) - - if not utils.detect_file(authorized_file): - cmd = "touch {}".format(authorized_file) - shell.get_stdout_or_raise_error(user, cmd) - - append_unique(public_key, authorized_file, user) + key_file_manager = ssh_key.KeyFileManager(sh.ClusterShellAdaptorForLocalShell(shell)) + authorized_key_manager = ssh_key.AuthorizedKeyManager(sh.SSHShell(shell, None)) + key = key_file_manager.ensure_key_pair_exists_for_user(None, user)[0] + authorized_key_manager.add(None, user, key) def generate_ssh_key_pair_on_remote( @@ -1755,24 +1743,21 @@ def join_ssh_impl(local_user, seed_host, seed_user, ssh_public_keys: typing.List join_ssh_with_ssh_agent(local_shell, local_user, seed_host, seed_user, ssh_public_keys) else: local_shell = sh.LocalShell() - if not ssh_public_keys: - configure_ssh_key(local_user) - if 0 != utils.ssh_copy_id_no_raise(local_user, seed_user, seed_host): - msg = f"Failed to login to {seed_user}@{seed_host}. Please check the credentials." - sudoer = userdir.get_sudoer() - if sudoer and seed_user != sudoer: - args = ['sudo crm'] - args += [x for x in sys.argv[1:]] - for i, arg in enumerate(args): - if arg == '-c' or arg == '--cluster-node' and i + 1 < len(args): - if '@' not in args[i+1]: - args[i + 1] = f'{sudoer}@{seed_host}' - msg += '\nOr, run "{}".'.format(' '.join(args)) - raise ValueError(msg) - # After this, login to remote_node is passwordless - swap_public_ssh_key(seed_host, local_user, seed_user, local_user, seed_user, add=True) - configure_ssh_key('hacluster') - swap_public_ssh_key(seed_host, 'hacluster', 'hacluster', local_user, seed_user, add=True) + configure_ssh_key(local_user) + if 0 != utils.ssh_copy_id_no_raise(local_user, seed_user, seed_host): + msg = f"Failed to login to {seed_user}@{seed_host}. Please check the credentials." + sudoer = userdir.get_sudoer() + if sudoer and seed_user != sudoer: + args = ['sudo crm'] + args += [x for x in sys.argv[1:]] + for i, arg in enumerate(args): + if arg == '-c' or arg == '--cluster-node' and i + 1 < len(args): + if '@' not in args[i+1]: + args[i + 1] = f'{sudoer}@{seed_host}' + msg += '\nOr, run "{}".'.format(' '.join(args)) + raise ValueError(msg) + # After this, login to remote_node is passwordless + swap_public_ssh_key(seed_host, local_user, seed_user, local_user, seed_user, add=True) # This makes sure the seed host has its own SSH keys in its own # authorized_keys file (again, to help with the case where the @@ -1791,7 +1776,9 @@ def join_ssh_impl(local_user, seed_host, seed_user, ssh_public_keys: typing.List user_by_host.set_no_generating_ssh_key(bool(ssh_public_keys)) user_by_host.save_local() + configure_ssh_key('hacluster') change_user_shell('hacluster') + swap_public_ssh_key_for_secondary_user(sh.cluster_shell(), seed_host, 'hacluster') def join_ssh_with_ssh_agent( @@ -1814,6 +1801,15 @@ def join_ssh_with_ssh_agent( authorized_key_manager.add(None, local_user, key) +def swap_public_ssh_key_for_secondary_user(shell: sh.ClusterShell, host: str, user: str): + key_file_manager = ssh_key.KeyFileManager(shell) + local_key = ssh_key.KeyFile(key_file_manager.list_public_key_for_user(None, user)[0]) + remote_key = key_file_manager.ensure_key_pair_exists_for_user(host, user)[0] + authorized_key_manager = ssh_key.AuthorizedKeyManager(shell) + authorized_key_manager.add(None, user, remote_key) + authorized_key_manager.add(host, user, local_key) + + def swap_public_ssh_key( remote_node, local_user_to_swap, @@ -2088,16 +2084,18 @@ def swap_key_for_hacluster(other_node_list): The new join node should check and swap the public key between the old cluster nodes. """ shell = sh.cluster_shell() + key_file_manager = ssh_key.KeyFileManager(shell) + authorized_key_manager = ssh_key.AuthorizedKeyManager(shell) + keys: typing.List[ssh_key.Key] = [ + key_file_manager.ensure_key_pair_exists_for_user(node, 'hacluster')[0] + for node in other_node_list + ] + keys.append(ssh_key.KeyFile(key_file_manager.list_public_key_for_user(None, 'hacluster')[0])) + for key in keys: + authorized_key_manager.add(None, 'hacluster', key) for node in other_node_list: - for n in other_node_list: - if node == n: - continue - logger.info("Checking passwordless for hacluster between %s and %s", node, n) - _, public_key, authorized_file = key_files('hacluster').values() - public_key_content = shell.get_stdout_or_raise_error(f'cat {public_key}', n) - if not utils.check_text_included(public_key_content, authorized_file, node): - cmd = "echo '{}' >> {}".format(public_key_content, authorized_file) - shell.get_stdout_or_raise_error(cmd, node) + for key in keys: + authorized_key_manager.add(node, 'hacluster', key) def sync_files_to_disk(): diff --git a/crmsh/healthcheck.py b/crmsh/healthcheck.py index 2f19f61512..27bd977eb7 100644 --- a/crmsh/healthcheck.py +++ b/crmsh/healthcheck.py @@ -147,12 +147,10 @@ def fix_cluster(self, nodes: typing.Iterable[str], ask: typing.Callable[[str], N remote_nodes = set(nodes) remote_nodes.remove(local_node) remote_nodes = list(remote_nodes) - local_user = crmsh.utils.user_pair_for_ssh(remote_nodes[0])[0] - crmsh.bootstrap.init_ssh_impl( - local_user, - None, - [(crmsh.utils.user_pair_for_ssh(node)[1], node) for node in remote_nodes], - ) + crmsh.bootstrap.configure_ssh_key('hacluster') + crmsh.bootstrap.swap_key_for_hacluster(remote_nodes) + for node in remote_nodes: + crmsh.bootstrap.change_user_shell('hacluster', node) def main_check_local(args) -> int: diff --git a/crmsh/sh.py b/crmsh/sh.py index bb3a7fcd69..df15ff07dc 100644 --- a/crmsh/sh.py +++ b/crmsh/sh.py @@ -118,7 +118,7 @@ def su_subprocess_run( if user is None or self.get_effective_user_name() == user: args = ['/bin/sh', '-c', cmd] elif 0 == self.geteuid(): - args = ['su', user, '--login', '-c', cmd] + args = ['su', user, '--login', '-s', '/bin/sh', '-c', cmd] if tty: args.append('--pty') if self.preserve_env: diff --git a/crmsh/ssh_key.py b/crmsh/ssh_key.py index dca4359b21..bc944d8ee7 100644 --- a/crmsh/ssh_key.py +++ b/crmsh/ssh_key.py @@ -162,7 +162,7 @@ def __init__(self, shell: sh.ClusterShell): def list_public_key_for_user(self, host: typing.Optional[str], user: str) -> typing.List[str]: result = self.cluster_shell.subprocess_run_without_input( host, user, - f'ls ~/.ssh/id_*.pub', + 'ls ~/.ssh/id_*.pub', stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) @@ -191,14 +191,18 @@ def load_public_keys_for_user(self, host: typing.Optional[str], user: str) -> ty def ensure_key_pair_exists_for_user(self, host: typing.Optional[str], user: str) -> typing.List[InMemoryPublicKey]: script = '''if [ ! \\( {condition} \\) ]; then - ssh-keygen -t rsa -f ~/.ssh/id_rsa -q -C 'Cluster internal on {host}' -N '' <> /dev/null + ssh-keygen -t rsa -f ~/.ssh/id_rsa -q -C "Cluster internal on $(hostname)" -N '' <> /dev/null fi -for file in ~/.ssh/id_{{{pattern}}}.pub; do - if [ -f "$file" ]; then cat "$file"; fi +for file in ~/.ssh/id_{{{pattern}}}; do + if [ -f "$file" ]; then + if ! [ -f "$file".pub ]; then + ssh-keygen -y -f "$file" > "$file".pub + fi + cat "$file".pub + fi done '''.format( condition=' -o '.join([f'-f ~/.ssh/id_{t}' for t in self.KNOWN_KEY_TYPES]), - host=host, pattern=','.join(self.KNOWN_KEY_TYPES), ) result = self.cluster_shell.subprocess_run_without_input( From baf239a828d2a7e5a5f15a63feaedffb8bf950e5 Mon Sep 17 00:00:00 2001 From: nicholasyang Date: Thu, 26 Oct 2023 13:24:51 +0800 Subject: [PATCH 04/14] Dev: bootstrap: implement ssh-agent support for geo cluster (jsc#PED-5774) --- crmsh/bootstrap.py | 92 +++++++++++++++++++++++++++++++++------------ crmsh/sh.py | 13 ++++--- crmsh/ui_cluster.py | 2 + 3 files changed, 77 insertions(+), 30 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index eb957bc451..4e145ef665 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -2822,29 +2822,6 @@ def bootstrap_init_geo(context): def geo_fetch_config(node): - user, node = utils.parse_user_at_host(node) - if user is not None: - try: - local_user = utils.user_of(utils.this_node()) - except UserNotFoundError: - local_user = user - remote_user = user - else: - try: - local_user, remote_user = UserOfHost.instance().user_pair_for_ssh(node) - except UserNotFoundError: - try: - local_user = utils.user_of(utils.this_node()) - except UserNotFoundError: - local_user = userdir.getuser() - remote_user = local_user - configure_ssh_key(local_user) - logger.info("Retrieving configuration - This may prompt for %s@%s:", remote_user, node) - utils.ssh_copy_id(local_user, remote_user, node) - user_by_host = utils.HostUserConfig() - user_by_host.add(local_user, utils.this_node()) - user_by_host.add(remote_user, node) - user_by_host.save_local() cmd = "tar -c -C '{}' .".format(BOOTH_DIR) with tempfile.TemporaryDirectory() as tmpdir: pipe_outlet, pipe_inlet = os.pipe() @@ -2875,6 +2852,27 @@ def geo_fetch_config(node): raise ValueError("Problem encountered with booth configuration from {}: {}".format(node, err)) +def _select_user_pair_for_ssh_for_secondary_components(dest: str): + """Select a user pair for operating secondary components, e.g. qdevice and geo cluster arbitor""" + user, node = utils.parse_user_at_host(dest) + if user is not None: + try: + local_user = utils.user_of(utils.this_node()) + except UserNotFoundError: + local_user = user + remote_user = user + else: + try: + local_user, remote_user = UserOfHost.instance().user_pair_for_ssh(node) + except UserNotFoundError: + try: + local_user = utils.user_of(utils.this_node()) + except UserNotFoundError: + local_user = userdir.getuser() + remote_user = local_user + return local_user, remote_user, node + + def geo_cib_config(clusters): cluster_name = corosync.get_values('totem.cluster_name')[0] if cluster_name not in list(clusters.keys()): @@ -2902,7 +2900,29 @@ def bootstrap_join_geo(context): _context = context init_common_geo() check_tty() - geo_fetch_config(_context.cluster_node) + user, node = utils.parse_user_at_host(_context.cluster_node) + if not sh.cluster_shell().can_run_as(node, 'root'): + local_user, remote_user, node = _select_user_pair_for_ssh_for_secondary_components(_context.cluster_node) + if context.use_ssh_agent: + try: + ssh_agent = ssh_key.AgentClient() + keys = ssh_agent.list() + except ssh_key.Error: + logger.error("Cannot get a public key from ssh-agent.") + raise + local_shell = sh.LocalShell(additional_environ={'SSH_AUTH_SOCK': os.environ.get('SSH_AUTH_SOCK')}) + join_ssh_with_ssh_agent(local_shell, local_user, node, remote_user, keys) + else: + configure_ssh_key(local_user) + if 0 != utils.ssh_copy_id_no_raise(local_user, remote_user, node): + raise ValueError(f"Failed to login to {remote_user}@{node}. Please check the credentials.") + swap_public_ssh_key(node, local_user, remote_user, local_user, remote_user, add=True) + user_by_host = utils.HostUserConfig() + user_by_host.add(local_user, utils.this_node()) + user_by_host.add(remote_user, node) + user_by_host.set_no_generating_ssh_key(context.use_ssh_agent) + user_by_host.save_local() + geo_fetch_config(node) logger.info("Sync booth configuration across cluster") csync2_update(BOOTH_DIR) geo_cib_config(_context.clusters) @@ -2918,7 +2938,29 @@ def bootstrap_arbitrator(context): init_common_geo() check_tty() - geo_fetch_config(_context.cluster_node) + user, node = utils.parse_user_at_host(_context.cluster_node) + if not sh.cluster_shell().can_run_as(node, 'root'): + local_user, remote_user, node = _select_user_pair_for_ssh_for_secondary_components(_context.cluster_node) + if context.use_ssh_agent: + try: + ssh_agent = ssh_key.AgentClient() + keys = ssh_agent.list() + except ssh_key.Error: + logger.error("Cannot get a public key from ssh-agent.") + raise + local_shell = sh.LocalShell(additional_environ={'SSH_AUTH_SOCK': os.environ.get('SSH_AUTH_SOCK')}) + join_ssh_with_ssh_agent(local_shell, local_user, node, remote_user, keys) + else: + configure_ssh_key(local_user) + if 0 != utils.ssh_copy_id_no_raise(local_user, remote_user, node): + raise ValueError(f"Failed to login to {remote_user}@{node}. Please check the credentials.") + swap_public_ssh_key(node, local_user, remote_user, local_user, remote_user, add=True) + user_by_host = utils.HostUserConfig() + user_by_host.add(local_user, utils.this_node()) + user_by_host.add(remote_user, node) + user_by_host.set_no_generating_ssh_key(context.use_ssh_agent) + user_by_host.save_local() + geo_fetch_config(node) if not os.path.isfile(BOOTH_CFG): utils.fatal("Failed to copy {} from {}".format(BOOTH_CFG, _context.cluster_node)) # TODO: verify that the arbitrator IP in the configuration is us? diff --git a/crmsh/sh.py b/crmsh/sh.py index df15ff07dc..aa2fa04174 100644 --- a/crmsh/sh.py +++ b/crmsh/sh.py @@ -279,11 +279,14 @@ def __init__( self.raise_ssh_error = raise_ssh_error def can_run_as(self, host: typing.Optional[str], user: str) -> bool: - result = self.subprocess_run_without_input( - host, user, 'true', - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - ) + try: + result = self.subprocess_run_without_input( + host, user, 'true', + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + except user_of_host.UserNotFoundError: + return False return 0 == result.returncode def subprocess_run_without_input(self, host: typing.Optional[str], user: typing.Optional[str], cmd: str, **kwargs): diff --git a/crmsh/ui_cluster.py b/crmsh/ui_cluster.py index 3ecb6c8551..9899577719 100644 --- a/crmsh/ui_cluster.py +++ b/crmsh/ui_cluster.py @@ -735,6 +735,8 @@ def do_geo_init_arbitrator(self, context, *args): parser.add_argument("-q", "--quiet", help="Be quiet (don't describe what's happening, just do it)", action="store_true", dest="quiet") parser.add_argument("-y", "--yes", help='Answer "yes" to all prompts (use with caution)', action="store_true", dest="yes_to_all") parser.add_argument("-c", "--cluster-node", metavar="[USER@]HOST", help="An already-configured geo cluster", dest="cluster_node") + parser.add_argument('--use-ssh-agent', action='store_true', dest='use_ssh_agent', + help="Use an existing key from ssh-agent instead of creating new key pairs") options, args = parse_options(parser, args) if options is None or args is None: return From b771c0eb2fa4377a4cb78c8f07a4cd384490b0a1 Mon Sep 17 00:00:00 2001 From: nicholasyang Date: Thu, 19 Oct 2023 11:30:24 +0800 Subject: [PATCH 05/14] Dev: behave: adjust functional tests for previous changes --- .github/workflows/crmsh-ci.yml | 12 ++++ codecov.yml | 4 +- data-manifest | 1 + test/features/ssh_agent.feature | 70 ++++++++++++++++++++++ test/features/steps/const.py | 8 ++- test/features/steps/step_implementation.py | 11 ++++ test/features/steps/utils.py | 2 +- 7 files changed, 104 insertions(+), 4 deletions(-) create mode 100644 test/features/ssh_agent.feature diff --git a/.github/workflows/crmsh-ci.yml b/.github/workflows/crmsh-ci.yml index e076e6dcb2..a6a1e82bcc 100644 --- a/.github/workflows/crmsh-ci.yml +++ b/.github/workflows/crmsh-ci.yml @@ -333,6 +333,18 @@ jobs: $DOCKER_SCRIPT `$GET_INDEX_OF user_access` - uses: codecov/codecov-action@v3 + functional_test_ssh_agent: + runs-on: ubuntu-20.04 + timeout-minutes: 40 + steps: + - uses: actions/checkout@v3 + - name: functional test for user access + run: | + echo '{ "exec-opts": ["native.cgroupdriver=systemd"] }' | sudo tee /etc/docker/daemon.json + sudo systemctl restart docker.service + $DOCKER_SCRIPT `$GET_INDEX_OF ssh_agent` + - uses: codecov/codecov-action@v3 + original_regression_test: runs-on: ubuntu-20.04 timeout-minutes: 40 diff --git a/codecov.yml b/codecov.yml index de4e3d6dfe..f11a988584 100644 --- a/codecov.yml +++ b/codecov.yml @@ -9,6 +9,6 @@ coverage: codecov: token: 16b01c29-3b23-4923-b33a-4d26a49d80c4 notify: - after_n_builds: 22 + after_n_builds: 23 comment: - after_n_builds: 22 + after_n_builds: 23 diff --git a/data-manifest b/data-manifest index 956e17d43f..5ab24f5b3a 100644 --- a/data-manifest +++ b/data-manifest @@ -85,6 +85,7 @@ test/features/qdevice_usercase.feature test/features/qdevice_validate.feature test/features/resource_failcount.feature test/features/resource_set.feature +test/features/ssh_agent.feature test/features/steps/behave_agent.py test/features/steps/const.py test/features/steps/__init__.py diff --git a/test/features/ssh_agent.feature b/test/features/ssh_agent.feature new file mode 100644 index 0000000000..66edcd03d0 --- /dev/null +++ b/test/features/ssh_agent.feature @@ -0,0 +1,70 @@ +# vim: sw=2 sts=2 +Feature: ssh-agent support + + Test ssh-agent support for crmsh + Need nodes: hanode1 hanode2 hanode3 qnetd-node + + Scenario: Skip creating ssh key pairs with --use-ssh-agent + Given Run "mkdir /root/ssh_disabled && mv /root/.ssh/id_* /root/ssh_disabled" OK on "hanode1,hanode2,hanode3" + And ssh-agent is started at "/root/ssh-auth-sock" on nodes ["hanode1", "hanode2", "hanode3"] + When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock ssh-add /root/ssh_disabled/id_rsa" on "hanode1,hanode2,hanode3" + And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode2" + And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode3" + Then Cluster service is "started" on "hanode1" + And Online nodes are "hanode1 hanode2 hanode3" + # check the number of keys in authorized_keys + And Run "test x1 == x$(awk 'END {print NR}' ~root/.ssh/authorized_keys)" OK + And Run "test x3 == x$(awk 'END {print NR}' ~hacluster/.ssh/authorized_keys)" OK + + Scenario: Skip creating ssh key pairs with --use-ssh-agent and use -N + Given Run "crm cluster stop" OK on "hanode1,hanode2,hanode3" + When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y -N hanode2 -N hanode3" on "hanode1" + Then Cluster service is "started" on "hanode3" + And Online nodes are "hanode1 hanode2 hanode3" + And Run "test x1 == x$(awk 'END {print NR}' ~root/.ssh/authorized_keys)" OK on "hanode3" + And Run "test x3 == x$(awk 'END {print NR}' ~hacluster/.ssh/authorized_keys)" OK on "hanode3" + + Scenario: Use qnetd + Given Run "crm cluster stop" OK on "hanode1,hanode2,hanode3" + When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init qdevice --use-ssh-agent -y --qnetd-hostname qnetd-node" on "hanode1" + And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode2" + Then Cluster service is "started" on "hanode1" + And Online nodes are "hanode1 hanode2" + And Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "corosync-qnetd" is "started" on "qnetd-node" + + Scenario: Use qnetd with -N + Given Run "crm cluster stop" OK on "hanode1,hanode2" + When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y -N hanode2 --qnetd-hostname qnetd-node" on "hanode1" + Then Cluster service is "started" on "hanode1" + And Online nodes are "hanode1 hanode2" + And Service "corosync-qdevice" is "started" on "hanode1" + And Service "corosync-qdevice" is "started" on "hanode2" + And Service "corosync-qnetd" is "started" on "qnetd-node" + + Scenario: GEO cluster setup with ssh-agent + Given Run "crm cluster stop" OK on "hanode1,hanode2" + And Run "systemctl disable --now booth@booth" OK on "hanode1,hanode2,hanode3" + And Cluster service is "stopped" on "hanode1" + And Cluster service is "stopped" on "hanode2" + When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init -y -n cluster1 --use-ssh-agent" on "hanode1" + Then Cluster service is "started" on "hanode1" + When Run "crm configure primitive vip IPaddr2 params ip=@vip.0" on "hanode1" + + When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init -y -n cluster2 --use-ssh-agent" on "hanode2" + Then Cluster service is "started" on "hanode2" + When Run "crm configure primitive vip IPaddr2 params ip=@vip.1" on "hanode2" + + When Run "crm cluster geo_init -y --clusters "cluster1=@vip.0 cluster2=@vip.1" --tickets tickets-geo --arbitrator hanode3" on "hanode1" + When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster geo_join -y --use-ssh-agent --cluster-node hanode1 --clusters "cluster1=@vip.0 cluster2=@vip.1"" on "hanode2" + + Given Service "booth@booth" is "stopped" on "hanode3" + When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster geo_init_arbitrator -y --use-ssh-agent --cluster-node hanode1" on "hanode3" + Then Service "booth@booth" is "started" on "hanode3" + When Run "crm resource start g-booth" on "hanode1" + Then Show cluster status on "hanode1" + When Run "crm resource start g-booth" on "hanode2" + Then Show cluster status on "hanode2" diff --git a/test/features/steps/const.py b/test/features/steps/const.py index 8dde42b640..2dd3c0dcbe 100644 --- a/test/features/steps/const.py +++ b/test/features/steps/const.py @@ -82,6 +82,8 @@ --no-overwrite-sshkey Avoid "/root/.ssh/id_rsa" overwrite if "-y" option is used (False by default; Deprecated) + --use-ssh-agent Use an existing key from ssh-agent instead of creating + new key pairs Network configuration: Options for configuring the network and messaging layer. @@ -228,6 +230,8 @@ User and host to login to an existing cluster node. The host can be specified with either a hostname or an IP. + --use-ssh-agent Use an existing key from ssh-agent instead of creating + new key pairs -i IF, --interface IF Bind to IP address on interface IF. Use -i second time for second interface @@ -344,4 +348,6 @@ -q, --quiet Be quiet (don't describe what's happening, just do it) -y, --yes Answer "yes" to all prompts (use with caution) -c [USER@]HOST, --cluster-node [USER@]HOST - An already-configured geo cluster''' + An already-configured geo cluster + --use-ssh-agent Use an existing key from ssh-agent instead of creating + new key pairs''' diff --git a/test/features/steps/step_implementation.py b/test/features/steps/step_implementation.py index c881e65cfa..1b3602721f 100644 --- a/test/features/steps/step_implementation.py +++ b/test/features/steps/step_implementation.py @@ -98,6 +98,10 @@ def step_impl(context, addr, iface): assert bool(res) is True +@given('Run "{cmd}" OK on "{addr}"') +def step_impl(context, cmd, addr): + _, out, _ = run_command_local_or_remote(context, cmd, addr) + @when('Run "{cmd}" on "{addr}"') def step_impl(context, cmd, addr): _, out, _ = run_command_local_or_remote(context, cmd, addr) @@ -523,3 +527,10 @@ def step_impl(context, nodelist): assert bootstrap.is_nologin('hacluster') is False else: assert bootstrap.is_nologin('hacluster', node) is False + + +@given('ssh-agent is started at "{path}" on nodes [{nodes:str+}]') +def step_impl(context, path, nodes): + for node in nodes: + rc, _, _ = behave_agent.call(node, 1122, f"systemd-run -u ssh-agent /usr/bin/ssh-agent -D -a '{path}'", user='root') + assert 0 == rc diff --git a/test/features/steps/utils.py b/test/features/steps/utils.py index dd347a5b1a..675c2c4710 100644 --- a/test/features/steps/utils.py +++ b/test/features/steps/utils.py @@ -55,7 +55,7 @@ def _wrap_cmd_non_root(cmd): else: return cmd if re.search('cluster (:?join|geo_join|geo_init_arbitrator)', cmd) and "@" not in cmd: - cmd = re.sub(r'''((?:-c|-N|--qnetd-hostname|--cluster-node)(?:\s+|=)['"]?)(\S{2,}['"]?)''', f'\\1{user}@\\2', cmd) + cmd = re.sub(r'''((?:-c|-N|--qnetd-hostname|--cluster-node|--arbitrator)(?:\s+|=)['"]?)(\S{2,}['"]?)''', f'\\1{user}@\\2', cmd) elif "cluster init" in cmd and ("-N" in cmd or "--qnetd-hostname" in cmd) and "@" not in cmd: cmd = re.sub(r'''((?:-c|-N|--qnetd-hostname|--cluster-node)(?:\s+|=)['"]?)(\S{2,}['"]?)''', f'\\1{user}@\\2', cmd) elif "cluster init" in cmd and "--node" in cmd and "@" not in cmd: From e5af12da6d1d3aaa8ca69e150c39343765108504 Mon Sep 17 00:00:00 2001 From: nicholasyang Date: Fri, 20 Oct 2023 11:36:04 +0800 Subject: [PATCH 06/14] Dev: unittest: adjust unit tests for previous changes --- crmsh/ssh_key.py | 9 +++ test/unittests/test_bootstrap.py | 102 +++++++++++++++++++++++-------- test/unittests/test_sh.py | 2 +- 3 files changed, 86 insertions(+), 27 deletions(-) diff --git a/crmsh/ssh_key.py b/crmsh/ssh_key.py index bc944d8ee7..f45a0fe9e3 100644 --- a/crmsh/ssh_key.py +++ b/crmsh/ssh_key.py @@ -46,6 +46,12 @@ def public_key(self) -> str: self._public_key = f.read().strip() return self._public_key + def __eq__(self, other): + return isinstance(other, KeyFile) and self._path == other._path and self.public_key() == other.public_key() + + def __repr__(self): + return f'KeyFile(path={self._path}, key={self.public_key()})' + class InMemoryPublicKey(Key): def __init__(self, content: str): @@ -54,6 +60,9 @@ def __init__(self, content: str): def public_key(self) -> str: return self.content + def __eq__(self, other): + return isinstance(other, InMemoryPublicKey) and self.content == other.content + class AuthorizedKeyManager: def __init__(self, shell: sh.SSHShell): diff --git a/test/unittests/test_bootstrap.py b/test/unittests/test_bootstrap.py index 1525943599..b551b87cc8 100644 --- a/test/unittests/test_bootstrap.py +++ b/test/unittests/test_bootstrap.py @@ -15,6 +15,8 @@ import yaml import socket +import crmsh.sh +import crmsh.ssh_key import crmsh.user_of_host import crmsh.utils from crmsh.ui_node import NodeMgmt @@ -444,7 +446,7 @@ def test_start_pacemaker(self, mock_installed, mock_enabled, mock_delay_start, m @mock.patch('crmsh.bootstrap.configure_ssh_key') @mock.patch('crmsh.service_manager.ServiceManager.start_service') def test_init_ssh(self, mock_start_service, mock_config_ssh): - bootstrap._context = mock.Mock(current_user="alice", user_at_node_list=[], ssh_key_file=None) + bootstrap._context = mock.Mock(current_user="alice", user_at_node_list=[], use_ssh_agent=False) bootstrap.init_ssh() mock_start_service.assert_called_once_with("sshd.service", enable=True) mock_config_ssh.assert_has_calls([ @@ -518,7 +520,7 @@ def test_generate_ssh_key_pair_on_remote(self, mock_su: mock.MagicMock): @mock.patch('crmsh.utils.detect_file') @mock.patch('crmsh.bootstrap.key_files') @mock.patch('crmsh.bootstrap.change_user_shell') - def test_configure_ssh_key(self, mock_change_shell, mock_key_files, mock_detect, mock_su, mock_append_unique): + def _test_configure_ssh_key(self, mock_change_shell, mock_key_files, mock_detect, mock_su, mock_append_unique): mock_key_files.return_value = {"private": "/test/.ssh/id_rsa", "public": "/test/.ssh/id_rsa.pub", "authorized": "/test/.ssh/authorized_keys"} mock_detect.side_effect = [True, True, False] @@ -534,6 +536,15 @@ def test_configure_ssh_key(self, mock_change_shell, mock_key_files, mock_detect, mock_append_unique.assert_called_once_with("/test/.ssh/id_rsa.pub", "/test/.ssh/authorized_keys", "test") mock_su.assert_called_once_with('test', 'touch /test/.ssh/authorized_keys') + @mock.patch('crmsh.ssh_key.AuthorizedKeyManager.add') + @mock.patch('crmsh.ssh_key.KeyFileManager.ensure_key_pair_exists_for_user') + def test_configure_ssh_key(self, mock_ensure_key_pair, mock_add): + public_key = crmsh.ssh_key.InMemoryPublicKey('foo') + mock_ensure_key_pair.return_value = [public_key] + bootstrap.configure_ssh_key('alice') + mock_ensure_key_pair.assert_called_once_with(None, 'alice') + mock_add.assert_called_once_with(None, 'alice', public_key) + @mock.patch('crmsh.bootstrap.append_to_remote_file') @mock.patch('crmsh.utils.check_file_content_included') def test_append_unique_remote(self, mock_check, mock_append): @@ -563,14 +574,18 @@ def test_join_ssh_no_seed_host(self, mock_error): bootstrap.join_ssh(None, None) mock_error.assert_called_once_with("No existing IP/hostname specified (use -c option)") + @mock.patch('crmsh.bootstrap.swap_public_ssh_key_for_secondary_user') @mock.patch('crmsh.bootstrap.change_user_shell') @mock.patch('crmsh.sh.LocalShell.get_stdout_or_raise_error') @mock.patch('crmsh.bootstrap.swap_public_ssh_key') @mock.patch('crmsh.utils.ssh_copy_id_no_raise') @mock.patch('crmsh.bootstrap.configure_ssh_key') @mock.patch('crmsh.service_manager.ServiceManager.start_service') - def test_join_ssh(self, mock_start_service, mock_config_ssh, mock_ssh_copy_id, mock_swap, mock_invoke, mock_change): - bootstrap._context = mock.Mock(current_user="bob", user_list=["alice"], node_list=['node1'], default_nic_list=["eth1"]) + def test_join_ssh( + self, + mock_start_service, mock_config_ssh, mock_ssh_copy_id, mock_swap, mock_invoke, mock_change, mock_swap_2, + ): + bootstrap._context = mock.Mock(current_user="bob", default_nic_list=["eth1"], use_ssh_agent=False) mock_invoke.return_value = '' mock_swap.return_value = None mock_ssh_copy_id.return_value = 0 @@ -583,14 +598,46 @@ def test_join_ssh(self, mock_start_service, mock_config_ssh, mock_ssh_copy_id, m mock.call("hacluster"), ]) mock_ssh_copy_id.assert_called_once_with("bob", "alice", "node1") - mock_swap.assert_has_calls([ - mock.call("node1", "bob", "alice", "bob", "alice", add=True), - mock.call("node1", "hacluster", "hacluster", "bob", "alice", add=True), - ]) + mock_swap.assert_called_once_with("node1", "bob", "alice", "bob", "alice", add=True) mock_invoke.assert_called_once_with( "bob", "ssh {} alice@node1 sudo crm cluster init -i eth1 ssh_remote".format(constants.SSH_OPTION), ) + mock_swap_2.assert_called_once() + args, kwargs = mock_swap_2.call_args + self.assertEqual(3, len(args)) + self.assertEqual('node1', args[1]) + self.assertEqual('hacluster', args[2]) + + @mock.patch('crmsh.ssh_key.AuthorizedKeyManager.add') + @mock.patch('crmsh.ssh_key.KeyFile.public_key') + @mock.patch('crmsh.ssh_key.KeyFileManager.ensure_key_pair_exists_for_user') + @mock.patch('crmsh.ssh_key.KeyFileManager.list_public_key_for_user') + def test_swap_public_ssh_key_for_secondary_user( + self, + mock_list_public_key_for_user, + mock_ensure_key_pair_exists_for_user, + mock_public_key, + mock_authorized_key_manager_add, + ): + mock_shell = mock.Mock( + crmsh.sh.ClusterShell, + local_shell=mock.Mock(crmsh.sh.LocalShell), + user_of_host=mock.Mock(crmsh.user_of_host.UserOfHost), + ) + mock_list_public_key_for_user.return_value = ['~/.ssh/id_rsa', '~/.ssh/id_ed25519'] + mock_ensure_key_pair_exists_for_user.return_value = [ + crmsh.ssh_key.InMemoryPublicKey('foo'), + crmsh.ssh_key.InMemoryPublicKey('bar'), + ] + mock_public_key.return_value = 'public_key' + crmsh.bootstrap.swap_public_ssh_key_for_secondary_user(mock_shell, 'node1', 'alice') + mock_list_public_key_for_user.assert_called_once_with(None, 'alice') + mock_ensure_key_pair_exists_for_user.assert_called_once_with('node1', 'alice') + mock_authorized_key_manager_add.assert_has_calls([ + mock.call(None, 'alice', crmsh.ssh_key.InMemoryPublicKey('foo')), + mock.call('node1', 'alice', crmsh.ssh_key.KeyFile('~/.ssh/id_rsa')), + ]) @mock.patch('crmsh.bootstrap.change_user_shell') @mock.patch('crmsh.sh.LocalShell.get_stdout_or_raise_error') @@ -599,7 +646,7 @@ def test_join_ssh(self, mock_start_service, mock_config_ssh, mock_ssh_copy_id, m @mock.patch('crmsh.bootstrap.configure_ssh_key') @mock.patch('crmsh.service_manager.ServiceManager.start_service') def test_join_ssh_bad_credential(self, mock_start_service, mock_config_ssh, mock_ssh_copy_id, mock_swap, mock_invoke, mock_change): - bootstrap._context = mock.Mock(current_user="bob", user_list=["alice"], node_list=['node1'], default_nic_list=["eth1"]) + bootstrap._context = mock.Mock(current_user="bob", default_nic_list=["eth1"], use_ssh_agent=False) mock_invoke.return_value = '' mock_swap.return_value = None mock_ssh_copy_id.return_value = 255 @@ -644,7 +691,7 @@ def test_swap_public_ssh_key(self, mock_check_passwd, mock_export_ssh, mock_impo @mock.patch('crmsh.utils.this_node') def test_bootstrap_add_return(self, mock_this_node): - ctx = mock.Mock(user_at_node_list=[]) + ctx = mock.Mock(user_at_node_list=[], use_ssh_agent=False) bootstrap.bootstrap_add(ctx) mock_this_node.assert_not_called() @@ -652,7 +699,7 @@ def test_bootstrap_add_return(self, mock_this_node): @mock.patch('logging.Logger.info') @mock.patch('crmsh.utils.this_node') def test_bootstrap_add(self, mock_this_node, mock_info, mock_run): - ctx = mock.Mock(current_user="alice", user_at_node_list=["bob@node2", "carol@node3"], nic_list=["eth1"]) + ctx = mock.Mock(current_user="alice", user_at_node_list=["bob@node2", "carol@node3"], nic_list=["eth1"], use_ssh_agent=False) mock_this_node.return_value = "node1" bootstrap.bootstrap_add(ctx) mock_info.assert_has_calls([ @@ -663,22 +710,22 @@ def test_bootstrap_add(self, mock_this_node, mock_info, mock_run): ]) @mock.patch('crmsh.utils.fatal') - @mock.patch('crmsh.sh.LocalShell.get_rc_stdout_stderr') + @mock.patch('crmsh.sh.ClusterShell.get_rc_stdout_stderr_without_input') def test_setup_passwordless_with_other_nodes_failed_fetch_nodelist(self, mock_run, mock_error): - bootstrap._context = mock.Mock(current_user="carol") + bootstrap._context = mock.Mock(current_user="carol", use_ssh_agent=False) mock_run.return_value = (1, None, None) mock_error.side_effect = SystemExit with self.assertRaises(SystemExit): bootstrap.setup_passwordless_with_other_nodes("node1", "alice") - mock_run.assert_called_once_with('carol', 'ssh {} alice@node1 sudo crm_node -l'.format(constants.SSH_OPTION)) + mock_run.assert_called_once_with('node1', 'crm_node -l') mock_error.assert_called_once_with("Can't fetch cluster nodes list from node1: None") @mock.patch('crmsh.utils.fatal') @mock.patch('crmsh.utils.HostUserConfig') @mock.patch('crmsh.bootstrap._fetch_core_hosts') - @mock.patch('crmsh.sh.LocalShell.get_rc_stdout_stderr') + @mock.patch('crmsh.sh.ClusterShell.get_rc_stdout_stderr_without_input') def test_setup_passwordless_with_other_nodes_failed_fetch_hostname( self, mock_run, @@ -686,7 +733,7 @@ def test_setup_passwordless_with_other_nodes_failed_fetch_hostname( mock_host_user_config_class, mock_error, ): - bootstrap._context = mock.Mock(current_user="carol") + bootstrap._context = mock.Mock(current_user="carol", use_ssh_agent=False) out_node_list = """1 node1 member 2 node2 member""" mock_run.side_effect = [ @@ -700,8 +747,8 @@ def test_setup_passwordless_with_other_nodes_failed_fetch_hostname( bootstrap.setup_passwordless_with_other_nodes("node1", "alice") mock_run.assert_has_calls([ - mock.call('carol', 'ssh {} alice@node1 sudo crm_node -l'.format(constants.SSH_OPTION)), - mock.call('carol', 'ssh {} alice@node1 hostname'.format(constants.SSH_OPTION)) + mock.call('node1', 'crm_node -l'), + mock.call('node1', 'hostname'), ]) mock_error.assert_called_once_with("Can't fetch hostname of node1: None") @@ -712,7 +759,7 @@ def test_setup_passwordless_with_other_nodes_failed_fetch_hostname( @mock.patch('crmsh.utils.ssh_copy_id') @mock.patch('crmsh.utils.user_of') @mock.patch('crmsh.bootstrap.swap_public_ssh_key') - @mock.patch('crmsh.sh.LocalShell.get_rc_stdout_stderr') + @mock.patch('crmsh.sh.ClusterShell.get_rc_stdout_stderr_without_input') def test_setup_passwordless_with_other_nodes( self, mock_run, @@ -724,7 +771,7 @@ def test_setup_passwordless_with_other_nodes( mock_change_shell, mock_swap_hacluster ): - bootstrap._context = mock.Mock(current_user="carol", user_list=["alice", "bob"]) + bootstrap._context = mock.Mock(current_user="carol", use_ssh_agent=False) mock_fetch_core_hosts.return_value = (["alice", "bob"], ["node1", "node2"]) mock_userof.return_value = "bob" out_node_list = """1 node1 member @@ -737,9 +784,9 @@ def test_setup_passwordless_with_other_nodes( bootstrap.setup_passwordless_with_other_nodes("node1", "alice") mock_run.assert_has_calls([ - mock.call('carol', 'ssh {} alice@node1 sudo crm_node -l'.format(constants.SSH_OPTION)), - mock.call('carol', 'ssh {} alice@node1 hostname'.format(constants.SSH_OPTION)) - ]) + mock.call('node1', 'crm_node -l'), + mock.call('node1', 'hostname'), + ]) mock_userof.assert_called_once_with("node2") mock_ssh_copy_id.assert_has_calls([ mock.call('carol', 'bob', 'node2') @@ -952,11 +999,12 @@ def test_init_qdevice_copy_ssh_key_failed( mock_host_user_config_class, ): mock_list_nodes.return_value = [] - bootstrap._context = mock.Mock(qdevice_inst=self.qdevice_with_ip, current_user="bob", user_list=["alice"]) + bootstrap._context = mock.Mock(qdevice_inst=self.qdevice_with_ip, current_user="bob") mock_check_ssh_passwd_need.return_value = True mock_ssh_copy_id.return_value = 255 mock_user_of_host.return_value = mock.MagicMock(crmsh.user_of_host.UserOfHost) mock_user_of_host.return_value.user_pair_for_ssh.return_value = "bob", "bob" + mock_user_of_host.return_value.use_ssh_agent.return_value = False with self.assertRaises(ValueError): bootstrap.init_qdevice() @@ -983,10 +1031,11 @@ def test_init_qdevice_already_configured( mock_host_user_config_class, ): mock_list_nodes.return_value = [] - bootstrap._context = mock.Mock(qdevice_inst=self.qdevice_with_ip, current_user="bob", user_list=["alice"]) + bootstrap._context = mock.Mock(qdevice_inst=self.qdevice_with_ip, current_user="bob") mock_ssh.return_value = False mock_user_of_host.return_value = mock.MagicMock(crmsh.user_of_host.UserOfHost) mock_user_of_host.return_value.user_pair_for_ssh.return_value = "bob", "bob" + mock_user_of_host.return_value.use_ssh_agent.return_value = False mock_qdevice_configured.return_value = True mock_confirm.return_value = False self.qdevice_with_ip.start_qdevice_service = mock.Mock() @@ -1014,12 +1063,13 @@ def test_init_qdevice_already_configured( def test_init_qdevice(self, mock_info, mock_ssh, mock_configure_ssh_key, mock_qdevice_configured, mock_this_node, mock_list_nodes, mock_adjust_priority, mock_adjust_fence_delay, mock_user_of_host, mock_host_user_config_class): - bootstrap._context = mock.Mock(qdevice_inst=self.qdevice_with_ip, current_user="bob", user_list=["alice"]) + bootstrap._context = mock.Mock(qdevice_inst=self.qdevice_with_ip, current_user="bob") mock_this_node.return_value = "192.0.2.100" mock_list_nodes.return_value = [] mock_ssh.return_value = False mock_user_of_host.return_value = mock.MagicMock(crmsh.user_of_host.UserOfHost) mock_user_of_host.return_value.user_pair_for_ssh.return_value = "bob", "bob" + mock_user_of_host.return_value.use_ssh_agent.return_value = False mock_qdevice_configured.return_value = False self.qdevice_with_ip.set_cluster_name = mock.Mock() self.qdevice_with_ip.valid_qnetd = mock.Mock() diff --git a/test/unittests/test_sh.py b/test/unittests/test_sh.py index d0531707bc..b3c0f0bb11 100644 --- a/test/unittests/test_sh.py +++ b/test/unittests/test_sh.py @@ -22,7 +22,7 @@ def test_su_subprocess_run(self, mock_run: mock.MagicMock): input=b'bar', ) mock_run.assert_called_once_with( - ['su', 'alice', '--login', '-c', 'foo'], + ['su', 'alice', '--login', '-s', '/bin/sh', '-c', 'foo'], input=b'bar', ) From efa8c0b081ce6a292f0b94fe45fe41a7b4959191 Mon Sep 17 00:00:00 2001 From: nicholasyang Date: Mon, 30 Oct 2023 15:04:33 +0800 Subject: [PATCH 07/14] Dev: behave: adjust test cases for better coverage of `crm cluster init -N` --- test/features/bootstrap_options.feature | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/features/bootstrap_options.feature b/test/features/bootstrap_options.feature index fd73472bcf..51fed6298b 100644 --- a/test/features/bootstrap_options.feature +++ b/test/features/bootstrap_options.feature @@ -10,7 +10,7 @@ Feature: crmsh bootstrap process - options "-u": Configure corosync to communicate over unicast "-U": Configure corosync to communicate over multicast Tag @clean means need to stop cluster service if the service is available - Need nodes: hanode1 hanode2 + Need nodes: hanode1 hanode2 hanode3 @clean Scenario: Check help output @@ -39,7 +39,7 @@ Feature: crmsh bootstrap process - options Scenario: Init whole cluster service on node "hanode1" using "--node" option Given Cluster service is "stopped" on "hanode1" And Cluster service is "stopped" on "hanode2" - When Run "crm cluster init -y --node "hanode1 hanode2"" on "hanode1" + When Run "crm cluster init -y --node "hanode1 hanode2 hanode3"" on "hanode1" Then Cluster service is "started" on "hanode1" And Cluster service is "started" on "hanode2" And Online nodes are "hanode1 hanode2" From b357c54d77ab023a28bb6066e21c6251708bb0e2 Mon Sep 17 00:00:00 2001 From: nicholasyang Date: Thu, 26 Oct 2023 14:16:10 +0800 Subject: [PATCH 08/14] Dev: ssh-agent: add diagnose messages --- crmsh/ssh_key.py | 23 ++++++++++++++++++++-- test/features/ssh_agent.feature | 12 ++++++++++- test/features/steps/step_implementation.py | 2 +- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/crmsh/ssh_key.py b/crmsh/ssh_key.py index f45a0fe9e3..2ac2eed310 100644 --- a/crmsh/ssh_key.py +++ b/crmsh/ssh_key.py @@ -5,6 +5,7 @@ import subprocess import tempfile import typing +from io import StringIO from crmsh import sh @@ -18,11 +19,29 @@ def __init__(self, msg: str): class AgentNotAvailableError(Error): - pass + def __init__(self, msg): + super().__init__(f'{msg}{self.diagnose()}') + + @staticmethod + def diagnose() -> str: + with StringIO() as buf: + if 'SSH_AUTH_SOCK' not in os.environ: + buf.write(' Environment variable SSH_AUTH_SOCK does not exist.') + if 'SUDO_USER' in os.environ: + buf.write(' Please check whether ssh-agent is available and consider using "sudo --preserve-env=SSH_AUTH_SOCK".') + return buf.getvalue() class NoKeysInAgentError(Error): - pass + def __init__(self, msg): + super().__init__(f'{msg}{self.diagnose()}') + + @staticmethod + def diagnose() -> str: + ssh_auth_sock = os.environ["SSH_AUTH_SOCK"] + st = os.stat(ssh_auth_sock) + owner_name = pwd.getpwuid(st.st_uid).pw_name + return f' crmsh is using an ssh-agent listening at {ssh_auth_sock}, owned by {owner_name}. Please add at least one key pair with `ssh-add`' class Key: diff --git a/test/features/ssh_agent.feature b/test/features/ssh_agent.feature index 66edcd03d0..ee9a4875dd 100644 --- a/test/features/ssh_agent.feature +++ b/test/features/ssh_agent.feature @@ -4,9 +4,19 @@ Feature: ssh-agent support Test ssh-agent support for crmsh Need nodes: hanode1 hanode2 hanode3 qnetd-node + Scenario: Errors are reported when ssh-agent is not avaible + When Try "crm cluster init --use-ssh-agent -y" on "hanode1" + Then Expected "Environment variable SSH_AUTH_SOCK does not exist." in stderr + When Try "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + Then Expected "Environment variable SSH_AUTH_SOCK does not exist." not in stderr + + Scenario: Errors are reported when there are no keys in ssh-agent + Given ssh-agent is started at "/root/ssh-auth-sock" on nodes ["hanode1", "hanode2", "hanode3"] + When Try "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + Then Expected "ssh-add" in stderr + Scenario: Skip creating ssh key pairs with --use-ssh-agent Given Run "mkdir /root/ssh_disabled && mv /root/.ssh/id_* /root/ssh_disabled" OK on "hanode1,hanode2,hanode3" - And ssh-agent is started at "/root/ssh-auth-sock" on nodes ["hanode1", "hanode2", "hanode3"] When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock ssh-add /root/ssh_disabled/id_rsa" on "hanode1,hanode2,hanode3" And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode2" diff --git a/test/features/steps/step_implementation.py b/test/features/steps/step_implementation.py index 1b3602721f..ec5bc5d7c1 100644 --- a/test/features/steps/step_implementation.py +++ b/test/features/steps/step_implementation.py @@ -100,7 +100,7 @@ def step_impl(context, addr, iface): @given('Run "{cmd}" OK on "{addr}"') def step_impl(context, cmd, addr): - _, out, _ = run_command_local_or_remote(context, cmd, addr) + _, out, _ = run_command_local_or_remote(context, cmd, addr, True) @when('Run "{cmd}" on "{addr}"') def step_impl(context, cmd, addr): From 6886b434da61dbac79822e38dd0155c980db6658 Mon Sep 17 00:00:00 2001 From: nicholasyang Date: Tue, 31 Oct 2023 13:06:15 +0800 Subject: [PATCH 09/14] Dev: ssh-agent: add informative logging for adding keys to authorized_keys --- crmsh/bootstrap.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 4e145ef665..da2d0cf6f9 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -837,6 +837,7 @@ def init_ssh(): try: ssh_agent = ssh_key.AgentClient() keys = ssh_agent.list() + logger.info("Using public keys from ssh-agent...") except ssh_key.Error: logger.error("Cannot get a public key from ssh-agent.") raise @@ -866,6 +867,7 @@ def init_ssh_impl(local_user: str, ssh_public_keys: typing.List[ssh_key.Key], us authorized_key_manager = ssh_key.AuthorizedKeyManager(shell) if ssh_public_keys: # Use specified key. Do not generate new ones. + logger.info("Adding public keys to authorized_keys for user %s...", local_user) for key in ssh_public_keys: authorized_key_manager.add(None, local_user, key) else: @@ -879,7 +881,7 @@ def init_ssh_impl(local_user: str, ssh_public_keys: typing.List[ssh_key.Key], us print() if ssh_public_keys: for user, node in user_node_list: - logger.info("Adding public key to authorized_keys on %s@%s", user, node) + logger.info("Adding public keys to authorized_keys on %s@%s", user, node) for key in ssh_public_keys: authorized_key_manager.add(node, local_user, key) if user != 'root' and 0 != shell.subprocess_run_without_input( @@ -1669,6 +1671,7 @@ def init_qdevice(): ssh_user = local_user # Configure ssh passwordless to qnetd if detect password is needed if UserOfHost.instance().use_ssh_agent(): + logger.info("Adding public keys to authorized_keys for user root...") for key in ssh_key.AgentClient().list(): ssh_key.AuthorizedKeyManager(sh.SSHShell( sh.LocalShell(additional_environ={'SSH_AUTH_SOCK': os.environ.get('SSH_AUTH_SOCK')}), @@ -1728,6 +1731,7 @@ def join_ssh(seed_host, seed_user): try: ssh_agent = ssh_key.AgentClient() keys = ssh_agent.list() + logger.info("Using public keys from ssh-agent...") except ssh_key.Error: logger.error("Cannot get a public key from ssh-agent.") raise @@ -2907,6 +2911,7 @@ def bootstrap_join_geo(context): try: ssh_agent = ssh_key.AgentClient() keys = ssh_agent.list() + logger.info("Using public keys from ssh-agent...") except ssh_key.Error: logger.error("Cannot get a public key from ssh-agent.") raise @@ -2945,6 +2950,7 @@ def bootstrap_arbitrator(context): try: ssh_agent = ssh_key.AgentClient() keys = ssh_agent.list() + logger.info("Using public keys from ssh-agent...") except ssh_key.Error: logger.error("Cannot get a public key from ssh-agent.") raise From f69db0a435faa0f037d682cf5117620bec557377 Mon Sep 17 00:00:00 2001 From: nicholasyang Date: Tue, 31 Oct 2023 13:09:17 +0800 Subject: [PATCH 10/14] Dev: bootstrap: allow to authenticate interactively in `crm cluster join --use-ssh-agent` --- crmsh/bootstrap.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index da2d0cf6f9..27c18b4d14 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -1792,15 +1792,16 @@ def join_ssh_with_ssh_agent( ): # As ssh-agent is used, the local_user does not have any effects shell = sh.SSHShell(local_shell, 'root') + authorized_key_manager = ssh_key.AuthorizedKeyManager(shell) if not shell.can_run_as(seed_host, seed_user): - raise ValueError(f'Failed to login to {seed_user}@{seed_host}') + for key in ssh_public_keys: + authorized_key_manager.add(seed_host, seed_user, key) if seed_user != 'root' and 0 != shell.subprocess_run_without_input( seed_host, seed_user, 'sudo true', stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ).returncode: raise ValueError(f'Failed to sudo on {seed_user}@{seed_host}') - authorized_key_manager = ssh_key.AuthorizedKeyManager(shell) for key in ssh_public_keys: authorized_key_manager.add(None, local_user, key) From d52e5d753a13b9dc4214d220846039b29c64a6e1 Mon Sep 17 00:00:00 2001 From: nicholasyang Date: Thu, 2 Nov 2023 15:57:31 +0800 Subject: [PATCH 11/14] Fix: sh: raise AuthorizationError and generate diagnose messages when ClusterShell fails with 255 --- crmsh/sh.py | 22 +++++++++++++++++----- crmsh/ssh_key.py | 1 - 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/crmsh/sh.py b/crmsh/sh.py index aa2fa04174..dba3bad91c 100644 --- a/crmsh/sh.py +++ b/crmsh/sh.py @@ -25,6 +25,7 @@ import socket import subprocess import typing +from io import StringIO from . import constants from .pyshim import cache @@ -45,15 +46,26 @@ def __init__(self, msg, cmd): class AuthorizationError(Error): def __init__(self, cmd: str, host: typing.Optional[str], user: str, msg: str): super().__init__( - 'Failed to run command on {optional_user}{host}: {msg}: {cmd}'.format( + 'Failed to run command {cmd} on {optional_user}{host}: {msg} {diagnose}'.format( optional_user=f'{user}@' if user is not None else '', - host=host, msg=msg, cmd=cmd + host=host, msg=msg, cmd=cmd, + diagnose=self.diagnose(), ), cmd ) self.host = host self.user = user + @staticmethod + def diagnose() -> str: + if user_of_host.instance().use_ssh_agent(): + with StringIO() as buf: + if 'SSH_AUTH_SOCK' not in os.environ: + buf.write('Environment variable SSH_AUTH_SOCK does not exist.') + if 'SUDO_USER' in os.environ: + buf.write(' Please check whether ssh-agent is available and consider using "sudo --preserve-env=SSH_AUTH_SOCK".') + return buf.getvalue() + class CommandFailure(Error): def __init__(self, cmd: str, host: typing.Optional[str], user: typing.Optional[str], msg: str): @@ -309,7 +321,7 @@ def subprocess_run_without_input(self, host: typing.Optional[str], user: typing. local_user, remote_user = self.user_of_host.user_pair_for_ssh(host) result = self.local_shell.su_subprocess_run( local_user, - 'ssh {} {} {}@{} sudo -H -u {} {} /bin/sh'.format( + 'ssh {} {} -o BatchMode=yes {}@{} sudo -H -u {} {} /bin/sh'.format( '-A' if self.forward_ssh_agent else '', constants.SSH_OPTION, remote_user, @@ -319,6 +331,7 @@ def subprocess_run_without_input(self, host: typing.Optional[str], user: typing. constants.SSH_OPTION, ), input=cmd.encode('utf-8'), + start_new_session=True, **kwargs, ) if self.raise_ssh_error and result.returncode == 255: @@ -331,7 +344,6 @@ def get_rc_and_error(self, host: typing.Optional[str], user: str, cmd: str): host, user, cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - start_new_session=True, ) if result.returncode == 0: return 0, None @@ -463,4 +475,4 @@ def subprocess_run_without_input(self, host: str, user: typing.Optional[str], cm def cluster_shell(): - return ClusterShell(LocalShell(), user_of_host.instance()) + return ClusterShell(LocalShell(), user_of_host.instance(), raise_ssh_error=True) diff --git a/crmsh/ssh_key.py b/crmsh/ssh_key.py index 2ac2eed310..e1ac66ebbb 100644 --- a/crmsh/ssh_key.py +++ b/crmsh/ssh_key.py @@ -238,7 +238,6 @@ def ensure_key_pair_exists_for_user(self, host: typing.Optional[str], user: str) script, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - start_new_session=True, ) if result.returncode != 0: print(script) From 58c664ee65f79461d3f1e240f0fb0a88582bc321 Mon Sep 17 00:00:00 2001 From: Nicholas Yang Date: Thu, 16 Nov 2023 15:21:47 +0800 Subject: [PATCH 12/14] Dev: behave: adjust test cases for non-root ssh-agent scenoria --- .github/workflows/crmsh-ci.yml | 2 +- test/features/ssh_agent.feature | 43 +++++++++++----------- test/features/steps/step_implementation.py | 5 ++- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/.github/workflows/crmsh-ci.yml b/.github/workflows/crmsh-ci.yml index a6a1e82bcc..44e20a950e 100644 --- a/.github/workflows/crmsh-ci.yml +++ b/.github/workflows/crmsh-ci.yml @@ -342,7 +342,7 @@ jobs: run: | echo '{ "exec-opts": ["native.cgroupdriver=systemd"] }' | sudo tee /etc/docker/daemon.json sudo systemctl restart docker.service - $DOCKER_SCRIPT `$GET_INDEX_OF ssh_agent` + $DOCKER_SCRIPT `$GET_INDEX_OF ssh_agent` && $DOCKER_SCRIPT -d && $DOCKER_SCRIPT -u `$GET_INDEX_OF ssh_agent` - uses: codecov/codecov-action@v3 original_regression_test: diff --git a/test/features/ssh_agent.feature b/test/features/ssh_agent.feature index ee9a4875dd..be6e87f0ff 100644 --- a/test/features/ssh_agent.feature +++ b/test/features/ssh_agent.feature @@ -7,39 +7,40 @@ Feature: ssh-agent support Scenario: Errors are reported when ssh-agent is not avaible When Try "crm cluster init --use-ssh-agent -y" on "hanode1" Then Expected "Environment variable SSH_AUTH_SOCK does not exist." in stderr - When Try "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + When Try "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" Then Expected "Environment variable SSH_AUTH_SOCK does not exist." not in stderr Scenario: Errors are reported when there are no keys in ssh-agent - Given ssh-agent is started at "/root/ssh-auth-sock" on nodes ["hanode1", "hanode2", "hanode3"] - When Try "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + Given ssh-agent is started at "/tmp/ssh-auth-sock" on nodes ["hanode1", "hanode2", "hanode3"] + When Try "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" Then Expected "ssh-add" in stderr Scenario: Skip creating ssh key pairs with --use-ssh-agent - Given Run "mkdir /root/ssh_disabled && mv /root/.ssh/id_* /root/ssh_disabled" OK on "hanode1,hanode2,hanode3" - When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock ssh-add /root/ssh_disabled/id_rsa" on "hanode1,hanode2,hanode3" - And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" - And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode2" - And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode3" + Given Run "mkdir ~/ssh_disabled" OK on "hanode1,hanode2,hanode3" + And Run "mv ~/.ssh/id_* ~/ssh_disabled" OK on "hanode1,hanode2,hanode3" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock ssh-add ~/ssh_disabled/id_rsa" on "hanode1,hanode2,hanode3" + And Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + And Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode2" + And Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode3" Then Cluster service is "started" on "hanode1" And Online nodes are "hanode1 hanode2 hanode3" # check the number of keys in authorized_keys - And Run "test x1 == x$(awk 'END {print NR}' ~root/.ssh/authorized_keys)" OK - And Run "test x3 == x$(awk 'END {print NR}' ~hacluster/.ssh/authorized_keys)" OK + And Run "test x1 == x$(awk 'END {print NR}' ~/.ssh/authorized_keys)" OK + And Run "test x3 == x$(sudo awk 'END {print NR}' ~hacluster/.ssh/authorized_keys)" OK Scenario: Skip creating ssh key pairs with --use-ssh-agent and use -N Given Run "crm cluster stop" OK on "hanode1,hanode2,hanode3" - When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y -N hanode2 -N hanode3" on "hanode1" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y -N hanode2 -N hanode3" on "hanode1" Then Cluster service is "started" on "hanode3" And Online nodes are "hanode1 hanode2 hanode3" - And Run "test x1 == x$(awk 'END {print NR}' ~root/.ssh/authorized_keys)" OK on "hanode3" - And Run "test x3 == x$(awk 'END {print NR}' ~hacluster/.ssh/authorized_keys)" OK on "hanode3" + And Run "test x1 == x$(awk 'END {print NR}' ~/.ssh/authorized_keys)" OK on "hanode3" + And Run "test x3 == x$(sudo awk 'END {print NR}' ~hacluster/.ssh/authorized_keys)" OK on "hanode3" Scenario: Use qnetd Given Run "crm cluster stop" OK on "hanode1,hanode2,hanode3" - When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" - And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init qdevice --use-ssh-agent -y --qnetd-hostname qnetd-node" on "hanode1" - And Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode2" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" + And Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init qdevice --use-ssh-agent -y --qnetd-hostname qnetd-node" on "hanode1" + And Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster join --use-ssh-agent -y -c hanode1" on "hanode2" Then Cluster service is "started" on "hanode1" And Online nodes are "hanode1 hanode2" And Service "corosync-qdevice" is "started" on "hanode1" @@ -48,7 +49,7 @@ Feature: ssh-agent support Scenario: Use qnetd with -N Given Run "crm cluster stop" OK on "hanode1,hanode2" - When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init --use-ssh-agent -y -N hanode2 --qnetd-hostname qnetd-node" on "hanode1" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y -N hanode2 --qnetd-hostname qnetd-node" on "hanode1" Then Cluster service is "started" on "hanode1" And Online nodes are "hanode1 hanode2" And Service "corosync-qdevice" is "started" on "hanode1" @@ -60,19 +61,19 @@ Feature: ssh-agent support And Run "systemctl disable --now booth@booth" OK on "hanode1,hanode2,hanode3" And Cluster service is "stopped" on "hanode1" And Cluster service is "stopped" on "hanode2" - When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init -y -n cluster1 --use-ssh-agent" on "hanode1" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init -y -n cluster1 --use-ssh-agent" on "hanode1" Then Cluster service is "started" on "hanode1" When Run "crm configure primitive vip IPaddr2 params ip=@vip.0" on "hanode1" - When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster init -y -n cluster2 --use-ssh-agent" on "hanode2" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init -y -n cluster2 --use-ssh-agent" on "hanode2" Then Cluster service is "started" on "hanode2" When Run "crm configure primitive vip IPaddr2 params ip=@vip.1" on "hanode2" When Run "crm cluster geo_init -y --clusters "cluster1=@vip.0 cluster2=@vip.1" --tickets tickets-geo --arbitrator hanode3" on "hanode1" - When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster geo_join -y --use-ssh-agent --cluster-node hanode1 --clusters "cluster1=@vip.0 cluster2=@vip.1"" on "hanode2" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster geo_join -y --use-ssh-agent --cluster-node hanode1 --clusters "cluster1=@vip.0 cluster2=@vip.1"" on "hanode2" Given Service "booth@booth" is "stopped" on "hanode3" - When Run "SSH_AUTH_SOCK=/root/ssh-auth-sock crm cluster geo_init_arbitrator -y --use-ssh-agent --cluster-node hanode1" on "hanode3" + When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster geo_init_arbitrator -y --use-ssh-agent --cluster-node hanode1" on "hanode3" Then Service "booth@booth" is "started" on "hanode3" When Run "crm resource start g-booth" on "hanode1" Then Show cluster status on "hanode1" diff --git a/test/features/steps/step_implementation.py b/test/features/steps/step_implementation.py index ec5bc5d7c1..8c278f3b63 100644 --- a/test/features/steps/step_implementation.py +++ b/test/features/steps/step_implementation.py @@ -531,6 +531,9 @@ def step_impl(context, nodelist): @given('ssh-agent is started at "{path}" on nodes [{nodes:str+}]') def step_impl(context, path, nodes): + user = userdir.get_sudoer() + if not user: + user = userdir.getuser() for node in nodes: - rc, _, _ = behave_agent.call(node, 1122, f"systemd-run -u ssh-agent /usr/bin/ssh-agent -D -a '{path}'", user='root') + rc, _, _ = behave_agent.call(node, 1122, f"systemd-run --uid '{user}' -u ssh-agent /usr/bin/ssh-agent -D -a '{path}'", user='root') assert 0 == rc From 52f77f71b3c7d38539e27141ec98e445f20b47f3 Mon Sep 17 00:00:00 2001 From: Nicholas Yang Date: Mon, 20 Nov 2023 11:12:16 +0800 Subject: [PATCH 13/14] Fix: forward ssh-agent for `crm report __slave` --- crmsh/report/collect.py | 10 +++++++++- crmsh/report/utillib.py | 2 +- test/features/ssh_agent.feature | 3 +++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/crmsh/report/collect.py b/crmsh/report/collect.py index 5641f681dd..7476be0928 100644 --- a/crmsh/report/collect.py +++ b/crmsh/report/collect.py @@ -10,6 +10,7 @@ import pwd import datetime +import crmsh.user_of_host from crmsh import log, sh from crmsh import utils as crmutils from crmsh.report import constants, utillib @@ -56,7 +57,14 @@ def collect_ratraces(): """ # since the "trace_dir" attribute been removed from cib after untrace # need to parse crmsh log file to extract custom trace ra log directory on each node - shell = sh.cluster_shell() + if crmsh.user_of_host.instance().use_ssh_agent(): + shell = sh.ClusterShell( + sh.LocalShell(additional_environ={'SSH_AUTH_SOCK': os.environ.get('SSH_AUTH_SOCK', '')}), + crmsh.user_of_host.instance(), + forward_ssh_agent=True, + ) + else: + shell = sh.cluster_shell() log_contents = "" cmd = "grep 'INFO: Trace for .* is written to ' {}*|grep -v 'collect'".format(log.CRMSH_LOG_FILE) for node in crmutils.list_cluster_nodes(): diff --git a/crmsh/report/utillib.py b/crmsh/report/utillib.py index 7f5e135ffd..261899de48 100644 --- a/crmsh/report/utillib.py +++ b/crmsh/report/utillib.py @@ -417,7 +417,7 @@ def get_distro_info(): res = re.search("PRETTY_NAME=\"(.*)\"", read_from_file(constants.OSRELEASE)) elif which("lsb_release"): logger.debug("Using lsb_release to get distribution info") - out = sh.cluster_shell().get_stdout_or_raise_error("lsb_release -d") + out = sh.LocalShell().get_stdout_or_raise_error("lsb_release -d") res = re.search("Description:\s+(.*)", out) return res.group(1) if res else "Unknown" diff --git a/test/features/ssh_agent.feature b/test/features/ssh_agent.feature index be6e87f0ff..5b959c413c 100644 --- a/test/features/ssh_agent.feature +++ b/test/features/ssh_agent.feature @@ -36,6 +36,9 @@ Feature: ssh-agent support And Run "test x1 == x$(awk 'END {print NR}' ~/.ssh/authorized_keys)" OK on "hanode3" And Run "test x3 == x$(sudo awk 'END {print NR}' ~hacluster/.ssh/authorized_keys)" OK on "hanode3" + Scenario: crm report + Then Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm report" OK + Scenario: Use qnetd Given Run "crm cluster stop" OK on "hanode1,hanode2,hanode3" When Run "SSH_AUTH_SOCK=/tmp/ssh-auth-sock crm cluster init --use-ssh-agent -y" on "hanode1" From 6a49af969b824a578ea0ff721fb3b735c1de2666 Mon Sep 17 00:00:00 2001 From: Nicholas Yang Date: Thu, 23 Nov 2023 15:55:30 +0800 Subject: [PATCH 14/14] Fix: bootstrap: add informative logging for generating new ssh keypairs --- crmsh/bootstrap.py | 18 ++++++++++++------ crmsh/ssh_key.py | 23 +++++++++++++++++++++-- test/unittests/test_bootstrap.py | 9 ++++++--- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/crmsh/bootstrap.py b/crmsh/bootstrap.py index 27c18b4d14..6f5a834af6 100644 --- a/crmsh/bootstrap.py +++ b/crmsh/bootstrap.py @@ -948,7 +948,9 @@ def _init_ssh_for_secondary_user_on_remote_nodes( if not sh.SSHShell(cluster_shell.local_shell, user).can_run_as(node, user): for key in local_keys: authorized_key_manager.add(node, user, key) - remote_keys = key_file_manager.ensure_key_pair_exists_for_user(node, user) + is_generated, remote_keys = key_file_manager.ensure_key_pair_exists_for_user(node, user) + if is_generated: + logger.info("A new ssh keypair is generated for user %s@%s.", user, node) for key in remote_keys: authorized_key_manager.add(None, user, key) @@ -1037,8 +1039,10 @@ def configure_ssh_key(user): shell = sh.LocalShell() key_file_manager = ssh_key.KeyFileManager(sh.ClusterShellAdaptorForLocalShell(shell)) authorized_key_manager = ssh_key.AuthorizedKeyManager(sh.SSHShell(shell, None)) - key = key_file_manager.ensure_key_pair_exists_for_user(None, user)[0] - authorized_key_manager.add(None, user, key) + is_generated, keys = key_file_manager.ensure_key_pair_exists_for_user(None, user) + if is_generated: + logger.info("A new ssh keypair is generated for user %s.", user) + authorized_key_manager.add(None, user, keys[0]) def generate_ssh_key_pair_on_remote( @@ -1809,9 +1813,11 @@ def join_ssh_with_ssh_agent( def swap_public_ssh_key_for_secondary_user(shell: sh.ClusterShell, host: str, user: str): key_file_manager = ssh_key.KeyFileManager(shell) local_key = ssh_key.KeyFile(key_file_manager.list_public_key_for_user(None, user)[0]) - remote_key = key_file_manager.ensure_key_pair_exists_for_user(host, user)[0] + is_generated, remote_keys = key_file_manager.ensure_key_pair_exists_for_user(host, user) + if is_generated: + logger.info("A new ssh keypair is generated for user %s@%s.", user, host) authorized_key_manager = ssh_key.AuthorizedKeyManager(shell) - authorized_key_manager.add(None, user, remote_key) + authorized_key_manager.add(None, user, remote_keys[0]) authorized_key_manager.add(host, user, local_key) @@ -2092,7 +2098,7 @@ def swap_key_for_hacluster(other_node_list): key_file_manager = ssh_key.KeyFileManager(shell) authorized_key_manager = ssh_key.AuthorizedKeyManager(shell) keys: typing.List[ssh_key.Key] = [ - key_file_manager.ensure_key_pair_exists_for_user(node, 'hacluster')[0] + key_file_manager.ensure_key_pair_exists_for_user(node, 'hacluster')[1][0] for node in other_node_list ] keys.append(ssh_key.KeyFile(key_file_manager.list_public_key_for_user(None, 'hacluster')[0])) diff --git a/crmsh/ssh_key.py b/crmsh/ssh_key.py index e1ac66ebbb..b036f73b28 100644 --- a/crmsh/ssh_key.py +++ b/crmsh/ssh_key.py @@ -217,9 +217,21 @@ def load_public_keys_for_user(self, host: typing.Optional[str], user: str) -> ty raise sh.CommandFailure(cmd, host, user, sh.Utils.decode_str(result.stderr).strip()) return [InMemoryPublicKey(line) for line in sh.Utils.decode_str(result.stdout).splitlines()] - def ensure_key_pair_exists_for_user(self, host: typing.Optional[str], user: str) -> typing.List[InMemoryPublicKey]: + def ensure_key_pair_exists_for_user( + self, + host: typing.Optional[str], + user: str, + ) -> typing.Tuple[bool, typing.List[InMemoryPublicKey]]: + """Ensure at least one keypair exists for the specified user. If it does not exist, generate a new one. + + Return (is_generated, list_of_public_keys): + + * is_generated: whether a new keypair is generated + * list_of_public_keys: all public keys of known types, including the newly generated one + """ script = '''if [ ! \\( {condition} \\) ]; then ssh-keygen -t rsa -f ~/.ssh/id_rsa -q -C "Cluster internal on $(hostname)" -N '' <> /dev/null + echo 'GENERATED=1' fi for file in ~/.ssh/id_{{{pattern}}}; do if [ -f "$file" ]; then @@ -243,4 +255,11 @@ def ensure_key_pair_exists_for_user(self, host: typing.Optional[str], user: str) print(script) print(result.stdout) raise sh.CommandFailure(f'Script({script[:16]}...) failed. rc = {result.returncode}', host, user, sh.Utils.decode_str(result.stderr).strip()) - return [InMemoryPublicKey(line) for line in sh.Utils.decode_str(result.stdout).splitlines()] + generated = False + keys = list() + for line in sh.Utils.decode_str(result.stdout).splitlines(): + if line == 'GENERATED=1': + generated = True + else: + keys.append(InMemoryPublicKey(line)) + return generated, keys diff --git a/test/unittests/test_bootstrap.py b/test/unittests/test_bootstrap.py index b551b87cc8..45bf03d76b 100644 --- a/test/unittests/test_bootstrap.py +++ b/test/unittests/test_bootstrap.py @@ -540,7 +540,7 @@ def _test_configure_ssh_key(self, mock_change_shell, mock_key_files, mock_detect @mock.patch('crmsh.ssh_key.KeyFileManager.ensure_key_pair_exists_for_user') def test_configure_ssh_key(self, mock_ensure_key_pair, mock_add): public_key = crmsh.ssh_key.InMemoryPublicKey('foo') - mock_ensure_key_pair.return_value = [public_key] + mock_ensure_key_pair.return_value = (True, [public_key]) bootstrap.configure_ssh_key('alice') mock_ensure_key_pair.assert_called_once_with(None, 'alice') mock_add.assert_called_once_with(None, 'alice', public_key) @@ -613,8 +613,10 @@ def test_join_ssh( @mock.patch('crmsh.ssh_key.KeyFile.public_key') @mock.patch('crmsh.ssh_key.KeyFileManager.ensure_key_pair_exists_for_user') @mock.patch('crmsh.ssh_key.KeyFileManager.list_public_key_for_user') + @mock.patch('logging.Logger.info') def test_swap_public_ssh_key_for_secondary_user( self, + mock_log_info, mock_list_public_key_for_user, mock_ensure_key_pair_exists_for_user, mock_public_key, @@ -626,10 +628,10 @@ def test_swap_public_ssh_key_for_secondary_user( user_of_host=mock.Mock(crmsh.user_of_host.UserOfHost), ) mock_list_public_key_for_user.return_value = ['~/.ssh/id_rsa', '~/.ssh/id_ed25519'] - mock_ensure_key_pair_exists_for_user.return_value = [ + mock_ensure_key_pair_exists_for_user.return_value = (True, [ crmsh.ssh_key.InMemoryPublicKey('foo'), crmsh.ssh_key.InMemoryPublicKey('bar'), - ] + ]) mock_public_key.return_value = 'public_key' crmsh.bootstrap.swap_public_ssh_key_for_secondary_user(mock_shell, 'node1', 'alice') mock_list_public_key_for_user.assert_called_once_with(None, 'alice') @@ -638,6 +640,7 @@ def test_swap_public_ssh_key_for_secondary_user( mock.call(None, 'alice', crmsh.ssh_key.InMemoryPublicKey('foo')), mock.call('node1', 'alice', crmsh.ssh_key.KeyFile('~/.ssh/id_rsa')), ]) + mock_log_info.assert_called_with("A new ssh keypair is generated for user %s@%s.", 'alice', 'node1') @mock.patch('crmsh.bootstrap.change_user_shell') @mock.patch('crmsh.sh.LocalShell.get_stdout_or_raise_error')