From 43a5e9f849b45fe61eb2ba3ac1b5850b5a84e8f9 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Sun, 23 May 2021 09:12:41 -0400 Subject: [PATCH] daemon: Add socket activation via /run/rpm-ostreed.socket For historical reasons, the daemon ends up doing a lot of initialization before even claiming the DBus name. For example, it calls `ostree_sysroot_load()`. We also end up scanning the RPM database, and actually parse all the GPG keys in `/etc/pki/rpm-gpg` etc. This causes two related problems: - By doing all this work before claiming the bus name, we race against the (pretty low) DBus service timeout of 25s. - If something hard fails at initialization, the client can't easily see the error because it just appears in the systemd journal. The client will just see a service timeout. The solution to this is to adopt systemd socket activation, which drops out DBus as an intermediary. On daemon startup, we now do the process-global initialization (like ostree sysroot) and if that fails, the daemon just sticks around (but without claiming the bus name), ready to return the error message to each client. After this patch: ``` $ systemctl stop rpm-ostreed $ umount /boot $ rpm-ostree status error: Couldn't start daemon: Error setting up sysroot: loading sysroot: Unexpected state: /run/ostree-booted found, but no /boot/loader directory ``` --- Cargo.toml | 2 + Makefile-daemon.am | 10 +- configure.ac | 7 ++ packaging/rpm-ostree.spec.in | 9 +- rust/src/client.rs | 36 +++++- rust/src/daemon.rs | 134 ++++++++++++++++++++- rust/src/lib.rs | 8 ++ rust/src/main.rs | 4 +- src/app/rpmostree-builtin-start-daemon.cxx | 51 +++++--- src/daemon/rpm-ostreed.socket | 9 ++ src/daemon/rpmostreed-daemon.cxx | 1 + src/daemon/rpmostreed-daemon.hpp | 28 +++++ 12 files changed, 274 insertions(+), 25 deletions(-) create mode 100644 src/daemon/rpm-ostreed.socket create mode 100644 src/daemon/rpmostreed-daemon.hpp diff --git a/Cargo.toml b/Cargo.toml index 4539fcc402..b9c380663a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -123,6 +123,8 @@ lto = "thin" # Note: If you add a feature here, you also probably want to update utils.rs:get_features() fedora-integration = [] rhsm = ["libdnf-sys/rhsm"] +# Enable hard requirement on `rpm-ostreed.socket`; requires https://bugzilla.redhat.com/show_bug.cgi?id=2110012 +client-socket = [] bin-unit-tests = [] # ASAN+UBSAN sanitizers = [] diff --git a/Makefile-daemon.am b/Makefile-daemon.am index 6e6cb3a3d5..25e753ee39 100644 --- a/Makefile-daemon.am +++ b/Makefile-daemon.am @@ -63,14 +63,18 @@ systemdunit_service_in_files = \ $(NULL) systemdunit_service_files = $(systemdunit_service_in_files:.service.in=.service) -systemdunit_timer_files = \ +systemdunit_other_files = \ $(srcdir)/src/daemon/rpm-ostreed-automatic.timer \ $(srcdir)/src/daemon/rpm-ostree-countme.timer \ $(NULL) +if CLIENT_SOCKET + systemdunit_other_files += $(srcdir)/src/daemon/rpm-ostreed.socket +endif + systemdunit_DATA = \ $(systemdunit_service_files) \ - $(systemdunit_timer_files) \ + $(systemdunit_other_files) \ $(NULL) systemdunitdir = $(prefix)/lib/systemd/system/ @@ -110,7 +114,7 @@ EXTRA_DIST += \ $(sysconf_DATA) \ $(service_in_files) \ $(systemdunit_service_in_files) \ - $(systemdunit_timer_files) \ + $(systemdunit_other_files) \ $(NULL) CLEANFILES += \ diff --git a/configure.ac b/configure.ac index ba0ed4cf0b..a3347e4b21 100644 --- a/configure.ac +++ b/configure.ac @@ -71,6 +71,13 @@ AC_ARG_ENABLE(featuresrs, [enable_featuresrs=]) AC_SUBST([RUST_FEATURES], $enable_featuresrs) +AC_ARG_ENABLE(client-socket, + AS_HELP_STRING([--enable-client-socket], + [(default: no)]),, + [enable_client_socket=no]) +AS_IF([test x$enable_client_socket = xyes], [enable_featuresrs="$enable_featuresrs client-socket"]) +AM_CONDITIONAL(CLIENT_SOCKET, [echo $enable_featuresrs | grep -q 'client-socket']) + # Initialize libtool LT_PREREQ([2.2.4]) LT_INIT([disable-static]) diff --git a/packaging/rpm-ostree.spec.in b/packaging/rpm-ostree.spec.in index c7b93040cf..aeb09fb9ee 100644 --- a/packaging/rpm-ostree.spec.in +++ b/packaging/rpm-ostree.spec.in @@ -34,6 +34,13 @@ BuildRequires: rust %bcond_with rhsm %endif +# https://bugzilla.redhat.com/show_bug.cgi?id=2110012 +%if 0%{?fedora} >= 37 +%bcond_without client_socket +%else +%bcond_with client_socket +%endif + # RHEL (8,9) doesn't ship zchunk today. Keep this in sync # with libdnf: https://gitlab.com/redhat/centos-stream/rpms/libdnf/-/blob/762f631e36d1e42c63a794882269d26c156b68c1/libdnf.spec#L45 %if 0%{?rhel} @@ -179,7 +186,7 @@ env NOCONFIGURE=1 ./autogen.sh export RUSTFLAGS="%{build_rustflags}" %endif %configure --disable-silent-rules --enable-gtk-doc %{?rpmdb_default} %{?with_sanitizers:--enable-sanitizers} %{?with_bin_unit_tests:--enable-bin-unit-tests} \ - %{?with_rhsm:--enable-featuresrs=rhsm} + %{?with_rhsm:--enable-featuresrs=rhsm} %{?with_client_socket:--enable-client-socket} %make_build diff --git a/rust/src/client.rs b/rust/src/client.rs index 2ae9338e5b..57b0b29314 100644 --- a/rust/src/client.rs +++ b/rust/src/client.rs @@ -154,6 +154,26 @@ pub(crate) fn client_handle_fd_argument( } } +/// Connect to the client socket and ensure the daemon is initialized; +/// this avoids DBus and ensures that we get any early startup errors +/// returned cleanly. +#[cfg(feature = "client-socket")] +fn start_daemon_via_socket() -> Result<()> { + let address = "/run/rpm-ostree/client.sock"; + tracing::debug!("Starting daemon via {address}"); + let s = std::os::unix::net::UnixStream::connect(address) + .with_context(|| anyhow!("Failed to connect to {}", address))?; + let mut s = std::io::BufReader::new(s); + let mut r = String::new(); + s.read_to_string(&mut r) + .context("Reading from client socket")?; + if r.is_empty() { + Ok(()) + } else { + Err(anyhow!("{r}").into()) + } +} + /// Explicitly ensure the daemon is started via systemd, if possible. /// /// This works around bugs from DBus activation, see @@ -164,12 +184,14 @@ pub(crate) fn client_handle_fd_argument( /// to systemd directly and use its client tools to scrape errors. /// /// What we really should do probably is use native socket activation. -pub(crate) fn client_start_daemon() -> CxxResult<()> { +#[cfg(not(feature = "client-socket"))] +fn start_daemon_via_systemctl() -> Result<()> { let service = "rpm-ostreed.service"; // Assume non-root can't use systemd right now. if rustix::process::getuid().as_raw() != 0 { return Ok(()); } + // Unfortunately, RHEL8 systemd will count "systemctl start" // invocations against the restart limit, so query the status // first. @@ -196,6 +218,18 @@ pub(crate) fn client_start_daemon() -> CxxResult<()> { Ok(()) } +pub(crate) fn client_start_daemon() -> CxxResult<()> { + // systemctl and socket paths only work for root right now; in the future + // the socket may be opened up. + if rustix::process::getuid().as_raw() != 0 { + return Ok(()); + } + #[cfg(feature = "client-socket")] + return start_daemon_via_socket().map_err(Into::into); + #[cfg(not(feature = "client-socket"))] + return start_daemon_via_systemctl().map_err(Into::into); +} + /// Convert the GVariant parameters from the DownloadProgress DBus API to a human-readable English string. pub(crate) fn client_render_download_progress(progress: &crate::ffi::GVariant) -> String { let progress = progress diff --git a/rust/src/daemon.rs b/rust/src/daemon.rs index 41e4984151..334370ee09 100644 --- a/rust/src/daemon.rs +++ b/rust/src/daemon.rs @@ -8,19 +8,23 @@ use crate::cxxrsutil::*; use crate::ffi::{ OverrideReplacementSource, OverrideReplacementType, ParsedRevision, ParsedRevisionKind, }; -use anyhow::{anyhow, format_err, Result}; +use anyhow::{anyhow, format_err, Context, Result}; use cap_std::fs::Dir; use cap_std_ext::dirext::CapStdExtDirExt; use cap_std_ext::{cap_std, rustix}; use fn_error_context::context; use glib::prelude::*; +use once_cell::sync::Lazy; use ostree_ext::{gio, glib, ostree}; -use rustix::fd::BorrowedFd; +use rustix::fd::{BorrowedFd, FromRawFd}; use rustix::fs::MetadataExt; use std::collections::{BTreeMap, BTreeSet}; -use std::io::Read; +use std::io::{Read, Write}; use std::os::unix::fs::PermissionsExt; use std::path::Path; +use std::sync::Mutex; +use tokio::net::{UnixListener, UnixStream}; +use tokio::sync::oneshot::{Receiver, Sender}; const RPM_OSTREED_COMMIT_VERIFICATION_CACHE: &str = "rpm-ostree/gpgcheck-cache"; @@ -131,6 +135,130 @@ fn deployment_populate_variant_origin( Ok(()) } +async fn send_ok_result_to_client(_client: UnixStream) { + // On success we close the stream without writing anything, + // which acknowledges successful startup to the client. + // In the future we may actually implement a protocol here, so this + // is stubbed out as a full async fn in preparation for that. + tracing::debug!("Acknowleged client"); +} + +static SHUTDOWN_SIGNAL: Lazy>>> = Lazy::new(|| Mutex::new(None)); + +async fn process_clients_with_ok(listener: UnixListener, mut cancel: Receiver<()>) { + tracing::debug!("Processing clients..."); + loop { + tokio::select! { + _ = &mut cancel => { + tracing::debug!("Got cancellation event"); + return + } + r = listener.accept() => { + match r { + Ok((stream, _addr)) => { + send_ok_result_to_client(stream).await; + }, + Err(e) => { + tracing::debug!("failed to accept client: {e}") + } + } + } + } + } +} + +/// Ensure all asynchronous tasks in this Rust half of the daemon code are stopped. +/// Called from C++. +pub(crate) fn daemon_terminate() { + let chan = (*SHUTDOWN_SIGNAL).lock().unwrap().take().unwrap(); + let _ = chan.send(()); +} + +fn process_one_client(listener: std::os::unix::net::UnixListener, e: anyhow::Error) -> Result<()> { + let mut incoming = match listener.incoming().next() { + Some(r) => r?, + None => { + anyhow::bail!("Expected to find client socket from activation"); + } + }; + + let buf = format!("{e}"); + incoming.write_all(buf.as_bytes())?; + + todo!() +} + +/// Perform initialization steps required by systemd service activation. +/// +/// This ensures that the system is running under systemd, then receives the +/// socket-FD for main IPC logic, and notifies systemd about ready-state. +pub(crate) fn daemon_main(debug: bool) -> Result<()> { + let handle = tokio::runtime::Handle::current(); + let _tokio_guard = handle.enter(); + use std::os::unix::net::UnixListener as StdUnixListener; + if !systemd::daemon::booted()? { + return Err(anyhow!("not running as a systemd service")); + } + + let init_res: Result<()> = crate::ffi::daemon_init_inner(debug).map_err(|e| e.into()); + tracing::debug!("Initialization result: {init_res:?}"); + + let mut fds = systemd::daemon::listen_fds(false)?.iter(); + let listener = match fds.next() { + None => { + // If started directly via `systemctl start` or DBus activation, we + // directly propagate the error back to our exit code. + init_res?; + tracing::debug!("Initializing directly (not socket activated)"); + cfg!(feature = "client-socket") + .then(|| StdUnixListener::bind("/run/rpm-ostree/client.sock")) + .transpose() + .context("Binding to socket")? + } + Some(fd) => { + if fds.next().is_some() { + return Err(anyhow!("Expected exactly 1 fd from systemd activation")); + } + tracing::debug!("Initializing from socket activation; fd={fd}"); + let listener = unsafe { StdUnixListener::from_raw_fd(fd) }; + + match init_res { + Ok(_) => Some(listener), + Err(e) => { + let err_copy = anyhow!("{e}"); + tracing::debug!("Reporting initialization error: {e}"); + match process_one_client(listener, err_copy) { + Ok(()) => { + tracing::debug!("Acknowleged initial client"); + } + Err(e) => { + tracing::debug!("Caught error while processing client {e}"); + } + } + return Err(e); + } + } + } + }; + + if let Some(listener) = listener { + let (shutdown_send, shutdown_recv) = tokio::sync::oneshot::channel(); + (*SHUTDOWN_SIGNAL).lock().unwrap().replace(shutdown_send); + + let listener = UnixListener::from_std(listener)?; + + // On success, we spawn a helper task that just responds with + // sucess to clients that connect via the socket. In the future, + // perhaps we'll expose an API here. + tracing::debug!("Spawning acknowledgement task"); + tokio::task::spawn(async { process_clients_with_ok(listener, shutdown_recv).await }); + } + + tracing::debug!("Entering daemon mainloop"); + // And now, enter the main loop. + Ok(crate::ffi::daemon_main_inner()?) +} + /// Serialize information about the given deployment into the `dict`; /// this will be exposed via DBus and is hence public API. pub(crate) fn deployment_populate_variant( diff --git a/rust/src/lib.rs b/rust/src/lib.rs index ea4e2f0f5e..51fa309d6f 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -305,6 +305,8 @@ pub mod ffi { // daemon.rs extern "Rust" { + fn daemon_main(debug: bool) -> Result<()>; + fn daemon_terminate(); fn daemon_sanitycheck_environment(sysroot: &OstreeSysroot) -> Result<()>; fn deployment_generate_id(deployment: &OstreeDeployment) -> String; fn deployment_populate_variant( @@ -768,6 +770,12 @@ pub mod ffi { fn c_unit_tests() -> Result<()>; } + unsafe extern "C++" { + include!("rpmostreed-daemon.hpp"); + fn daemon_init_inner(debug: bool) -> Result<()>; + fn daemon_main_inner() -> Result<()>; + } + unsafe extern "C++" { include!("rpmostree-clientlib.h"); fn client_require_root() -> Result<()>; diff --git a/rust/src/main.rs b/rust/src/main.rs index 69ba8fe804..19f2f0c9a4 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -135,7 +135,9 @@ fn inner_main() -> Result { .enable_all() .build() .context("Failed to build tokio runtime")?; - runtime.block_on(dispatch_multicall(callname, args)) + let r = runtime.block_on(dispatch_multicall(callname, args)); + tracing::debug!("Exiting inner main with result: {r:?}"); + r } fn print_error(e: anyhow::Error) { diff --git a/src/app/rpmostree-builtin-start-daemon.cxx b/src/app/rpmostree-builtin-start-daemon.cxx index dcc5382361..79aacc3f84 100644 --- a/src/app/rpmostree-builtin-start-daemon.cxx +++ b/src/app/rpmostree-builtin-start-daemon.cxx @@ -35,6 +35,7 @@ #include "rpmostree-libbuiltin.h" #include "rpmostree-util.h" #include "rpmostreed-daemon.h" +#include "rpmostreed-utils.h" typedef enum { @@ -53,6 +54,7 @@ static GOptionEntry opt_entries[] = { { "debug", 'd', 0, G_OPTION_ARG_NONE, &opt "Use system root SYSROOT (default: /)", "SYSROOT" }, { NULL } }; +static GDBusConnection *bus = NULL; static RpmostreedDaemon *rpm_ostree_daemon = NULL; static void @@ -211,17 +213,15 @@ on_log_handler (const gchar *log_domain, GLogLevelFlags log_level, const gchar * sd_journal_print (priority, "%s", message); } -gboolean -rpmostree_builtin_start_daemon (int argc, char **argv, RpmOstreeCommandInvocation *invocation, - GCancellable *cancellable, GError **error) +namespace rpmostreecxx { - g_autoptr (GOptionContext) opt_context = g_option_context_new (" - start the daemon process"); - g_option_context_add_main_entries (opt_context, opt_entries, NULL); - - if (!g_option_context_parse (opt_context, &argc, &argv, error)) - return FALSE; - - if (opt_debug) +// This function is always called from the Rust side. Hopefully +// soon we'll move more of this code into daemon.rs. +void +daemon_init_inner (bool debug) +{ + g_autoptr (GError) local_error = NULL; + if (debug) { g_autoptr (GIOChannel) channel = NULL; g_log_set_handler (G_LOG_DOMAIN, (GLogLevelFlags)(G_LOG_LEVEL_DEBUG | G_LOG_LEVEL_INFO), @@ -243,19 +243,24 @@ rpmostree_builtin_start_daemon (int argc, char **argv, RpmOstreeCommandInvocatio g_unix_signal_add (SIGTERM, on_sigint, NULL); /* Get an explicit ref to the bus so we can use it later */ - g_autoptr (GDBusConnection) bus = g_bus_get_sync (G_BUS_TYPE_SYSTEM, NULL, error); + g_autoptr (GDBusConnection) bus = g_bus_get_sync (G_BUS_TYPE_SYSTEM, NULL, &local_error); if (!bus) - return FALSE; - if (!start_daemon (bus, error)) + util::throw_gerror (local_error); + if (!start_daemon (bus, &local_error)) { - if (*error) - sd_notifyf (0, "STATUS=error: %s", (*error)->message); - return FALSE; + sd_notifyf (0, "STATUS=error: %s", local_error->message); + util::throw_gerror (local_error); } +} +// Called from rust side to enter mainloop. +void +daemon_main_inner () +{ state_transition (APPSTATE_RUNNING); g_debug ("Entering main event loop"); + g_assert (rpm_ostree_daemon); rpmostreed_daemon_run_until_idle_exit (rpm_ostree_daemon); if (bus) @@ -285,6 +290,20 @@ rpmostree_builtin_start_daemon (int argc, char **argv, RpmOstreeCommandInvocatio g_autoptr (GMainContext) mainctx = g_main_context_default (); while (appstate == APPSTATE_FLUSHING) g_main_context_iteration (mainctx, TRUE); +} +} /* namespace */ + +gboolean +rpmostree_builtin_start_daemon (int argc, char **argv, RpmOstreeCommandInvocation *invocation, + GCancellable *cancellable, GError **error) +{ + g_autoptr (GOptionContext) opt_context = g_option_context_new (" - start the daemon process"); + g_option_context_add_main_entries (opt_context, opt_entries, NULL); + + if (!g_option_context_parse (opt_context, &argc, &argv, error)) + return FALSE; + + rpmostreecxx::daemon_main (opt_debug); return TRUE; } diff --git a/src/daemon/rpm-ostreed.socket b/src/daemon/rpm-ostreed.socket new file mode 100644 index 0000000000..020c640360 --- /dev/null +++ b/src/daemon/rpm-ostreed.socket @@ -0,0 +1,9 @@ +[Unit] +ConditionKernelCommandLine=ostree + +[Socket] +ListenStream=/run/rpm-ostree/client.sock +SocketMode=0600 + +[Install] +WantedBy=sockets.target diff --git a/src/daemon/rpmostreed-daemon.cxx b/src/daemon/rpmostreed-daemon.cxx index 74d46528c4..ffe2fa846c 100644 --- a/src/daemon/rpmostreed-daemon.cxx +++ b/src/daemon/rpmostreed-daemon.cxx @@ -799,6 +799,7 @@ rpmostreed_daemon_run_until_idle_exit (RpmostreedDaemon *self) update_status (self); while (self->running) g_main_context_iteration (NULL, TRUE); + rpmostreecxx::daemon_terminate (); } void diff --git a/src/daemon/rpmostreed-daemon.hpp b/src/daemon/rpmostreed-daemon.hpp new file mode 100644 index 0000000000..4c788f0960 --- /dev/null +++ b/src/daemon/rpmostreed-daemon.hpp @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2022 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#pragma once + +#include "rust/cxx.h" + +namespace rpmostreecxx +{ +/* Note: Currently actually defined in rpmostree-builtin-start-daemon.cxx for historical reasons */ +void daemon_init_inner (bool debug); +void daemon_main_inner (); +} \ No newline at end of file