From d3b70deb3b5baf27fb3b110f6858f772cd887755 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 23 Jan 2020 12:19:09 -0600 Subject: [PATCH 01/10] Log: libcrmcommon: improve messages for IPC hello creation --- lib/common/ipc.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/lib/common/ipc.c b/lib/common/ipc.c index df508020ea8..263ff7f4aa1 100644 --- a/lib/common/ipc.c +++ b/lib/common/ipc.c @@ -1680,23 +1680,35 @@ create_hello_message(const char *uuid, xmlNode *hello_node = NULL; xmlNode *hello = NULL; - if (uuid == NULL || strlen(uuid) == 0 - || client_name == NULL || strlen(client_name) == 0 - || major_version == NULL || strlen(major_version) == 0 - || minor_version == NULL || strlen(minor_version) == 0) { - crm_err("Missing fields, Hello message will not be valid."); + if (crm_strlen_zero(uuid) || crm_strlen_zero(client_name) + || crm_strlen_zero(major_version) || crm_strlen_zero(minor_version)) { + crm_err("Could not create IPC hello message from %s (UUID %s): " + "missing information", + client_name? client_name : "unknown client", + uuid? uuid : "unknown"); return NULL; } hello_node = create_xml_node(NULL, XML_TAG_OPTIONS); + if (hello_node == NULL) { + crm_err("Could not create IPC hello message from %s (UUID %s): " + "Message data creation failed", client_name, uuid); + return NULL; + } + crm_xml_add(hello_node, "major_version", major_version); crm_xml_add(hello_node, "minor_version", minor_version); crm_xml_add(hello_node, "client_name", client_name); crm_xml_add(hello_node, "client_uuid", uuid); - crm_trace("creating hello message"); hello = create_request(CRM_OP_HELLO, hello_node, NULL, NULL, client_name, uuid); + if (hello == NULL) { + crm_err("Could not create IPC hello message from %s (UUID %s): " + "Request creation failed", client_name, uuid); + return NULL; + } free_xml(hello_node); + crm_trace("Created hello message from %s (UUID %s)", client_name, uuid); return hello; } From 7bb0d1c0ae304f8ba8e020c1b925a3d1c3727eac Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Thu, 23 Jan 2020 13:31:55 -0600 Subject: [PATCH 02/10] Log: libcrmcommon: improve messages when sending IPC request --- lib/common/ipc.c | 79 +++++++++++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/lib/common/ipc.c b/lib/common/ipc.c index 263ff7f4aa1..ca4d538ecb1 100644 --- a/lib/common/ipc.c +++ b/lib/common/ipc.c @@ -1381,10 +1381,12 @@ internal_ipc_get_reply(crm_ipc_t *client, int request_id, int ms_timeout, * \param[in] client Connection to IPC server * \param[in] message XML message to send * \param[in] flags Bitmask of crm_ipc_flags - * \param[in] ms_timeout Give up if not sent within this time (5s if 0) + * \param[in] ms_timeout Give up if not sent within this much time + * (5 seconds if 0, or no timeout if negative) * \param[out] reply Reply from server (or NULL if none) * - * \return Negative errno on error, otherwise number of bytes sent + * \return Negative errno on error, otherwise size of reply received in bytes + * if reply was needed, otherwise number of bytes sent */ int crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, int32_t ms_timeout, @@ -1401,12 +1403,14 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in crm_ipc_init(); if (client == NULL) { - crm_notice("Invalid connection"); + crm_notice("Can't send IPC request without connection (bug?): %.100s", + message); return -ENOTCONN; } else if (crm_ipc_connected(client) == FALSE) { /* Don't even bother */ - crm_notice("Connection to %s closed", client->name); + crm_notice("Can't send IPC request to %s: Connection closed", + client->name); return -ENOTCONN; } @@ -1415,7 +1419,6 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in } if (client->need_reply) { - crm_trace("Trying again to obtain pending reply from %s", client->name); qb_rc = qb_ipcc_recv(client->ipc, client->buffer, client->buf_size, ms_timeout); if (qb_rc < 0) { crm_warn("Sending IPC to %s disabled until pending reply received", @@ -1423,7 +1426,7 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in return -EALREADY; } else { - crm_notice("Lost reply from %s finally arrived, sending re-enabled", + crm_notice("Sending IPC to %s re-enabled after pending reply received", client->name); client->need_reply = FALSE; } @@ -1433,6 +1436,8 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in CRM_LOG_ASSERT(id != 0); /* Crude wrap-around detection */ rc = pcmk__ipc_prepare_iov(id, message, client->max_buf_size, &iov, &bytes); if (rc != pcmk_rc_ok) { + crm_warn("Couldn't prepare IPC request to %s: %s " CRM_XS " rc=%d", + client->name, pcmk_rc_str(rc), rc); return pcmk_rc2legacy(rc); } @@ -1446,14 +1451,15 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in if(header->size_compressed) { if(factor < 10 && (client->max_buf_size / 10) < (bytes / factor)) { - crm_notice("Compressed message exceeds %d0%% of the configured ipc limit (%u bytes), " - "consider setting PCMK_ipc_buffer to %u or higher", + crm_notice("Compressed message exceeds %d0%% of configured IPC " + "limit (%u bytes); consider setting PCMK_ipc_buffer to " + "%u or higher", factor, client->max_buf_size, 2 * client->max_buf_size); factor++; } } - crm_trace("Sending from client: %s request id: %d bytes: %u timeout:%d msg...", + crm_trace("Sending %s IPC request %d of %u bytes using %dms timeout", client->name, header->qb.id, header->qb.size, ms_timeout); if (ms_timeout > 0 || is_not_set(flags, crm_ipc_client_response)) { @@ -1461,72 +1467,77 @@ crm_ipc_send(crm_ipc_t * client, xmlNode * message, enum crm_ipc_flags flags, in time_t timeout = time(NULL) + 1 + (ms_timeout / 1000); do { + /* @TODO Is this check really needed? Won't qb_ipcc_sendv() return + * an error if it's not connected? + */ + if (!crm_ipc_connected(client)) { + goto send_cleanup; + } + qb_rc = qb_ipcc_sendv(client->ipc, iov, 2); - } while ((qb_rc == -EAGAIN) && (time(NULL) < timeout) - && crm_ipc_connected(client)); + } while ((qb_rc == -EAGAIN) && (time(NULL) < timeout)); + rc = (int) qb_rc; // Negative of system errno, or bytes sent if (qb_rc <= 0) { - crm_trace("Failed to send %s IPC request %d of %u bytes", - client->name, header->qb.id, header->qb.size); - rc = (int) qb_rc; goto send_cleanup; } else if (is_not_set(flags, crm_ipc_client_response)) { - crm_trace("Sent %s IPC request %d of %u bytes (not waiting for reply)", - client->name, header->qb.id, header->qb.size); - rc = (int) qb_rc; + crm_trace("Not waiting for reply to %s IPC request %d", + client->name, header->qb.id); goto send_cleanup; } rc = internal_ipc_get_reply(client, header->qb.id, ms_timeout, &bytes); if (rc != pcmk_rc_ok) { - /* No reply, for now, disable sending - * - * The alternative is to close the connection since we don't know - * how to detect and discard out-of-sequence replies + /* We didn't get the reply in time, so disable future sends for now. + * The only alternative would be to close the connection since we + * don't know how to detect and discard out-of-sequence replies. * - * TODO - implement the above + * @TODO Implement out-of-sequence detection */ client->need_reply = TRUE; } - rc = (int) bytes; + rc = (int) bytes; // Negative system errno, or size of reply received } else { + // No timeout, and client response needed do { qb_rc = qb_ipcc_sendv_recv(client->ipc, iov, 2, client->buffer, - client->buf_size, -1); + client->buf_size, -1); } while ((qb_rc == -EAGAIN) && crm_ipc_connected(client)); - rc = (int) qb_rc; + rc = (int) qb_rc; // Negative system errno, or size of reply received } if (rc > 0) { struct crm_ipc_response_header *hdr = (struct crm_ipc_response_header *)(void*)client->buffer; - crm_trace("Received response %d, size=%u, rc=%d, text: %.200s", - hdr->qb.id, hdr->qb.size, rc, crm_ipc_buffer(client)); + crm_trace("Received %d-byte reply %d to %s IPC %d: %.100s", + rc, hdr->qb.id, client->name, header->qb.id, + crm_ipc_buffer(client)); if (reply) { *reply = string2xml(crm_ipc_buffer(client)); } } else { - crm_trace("Response not received: rc=%d, errno=%d", rc, errno); + crm_trace("No reply to %s IPC %d: rc=%d", + client->name, header->qb.id, rc); } send_cleanup: if (crm_ipc_connected(client) == FALSE) { - crm_notice("Connection to %s closed: %s (%d)", - client->name, pcmk_strerror(rc), rc); + crm_notice("Couldn't send %s IPC request %d: Connection closed " + CRM_XS " rc=%d", client->name, header->qb.id, rc); } else if (rc == -ETIMEDOUT) { - crm_warn("Request %d to %s failed: %s after %dms " CRM_XS " rc=%d", - header->qb.id, client->name, pcmk_strerror(rc), ms_timeout, + crm_warn("%s IPC request %d failed: %s after %dms " CRM_XS " rc=%d", + client->name, header->qb.id, pcmk_strerror(rc), ms_timeout, rc); crm_write_blackbox(0, NULL); } else if (rc <= 0) { - crm_warn("Request %d to %s failed: %s " CRM_XS " rc=%d", - header->qb.id, client->name, + crm_warn("%s IPC request %d failed: %s " CRM_XS " rc=%d", + client->name, header->qb.id, ((rc == 0)? "No bytes sent" : pcmk_strerror(rc)), rc); } From 75867398ad5470a1de99152c9d4838d4fd19a487 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 21 Jan 2020 18:33:25 -0600 Subject: [PATCH 03/10] Refactor: tools: abstract controller IPC interface from crm_resource This is an initial step towards a public API in libcrmcommon. --- tools/Makefile.am | 8 +- tools/crm_resource.c | 149 +++++------ tools/crm_resource.h | 20 +- tools/crm_resource_controller.c | 425 ++++++++++++++++++++++++++++++++ tools/crm_resource_controller.h | 198 +++++++++++++++ tools/crm_resource_runtime.c | 155 +++++------- 6 files changed, 757 insertions(+), 198 deletions(-) create mode 100644 tools/crm_resource_controller.c create mode 100644 tools/crm_resource_controller.h diff --git a/tools/Makefile.am b/tools/Makefile.am index b68ad51318e..9f59c9debf1 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -12,7 +12,7 @@ if BUILD_SYSTEMD systemdsystemunit_DATA = crm_mon.service endif -noinst_HEADERS = crm_mon.h crm_resource.h +noinst_HEADERS = crm_mon.h crm_resource.h crm_resource_controller.h pcmkdir = $(datadir)/$(PACKAGE) pcmk_DATA = report.common report.collector @@ -110,7 +110,11 @@ crm_attribute_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la -crm_resource_SOURCES = crm_resource.c crm_resource_ban.c crm_resource_runtime.c crm_resource_print.c +crm_resource_SOURCES = crm_resource.c \ + crm_resource_ban.c \ + crm_resource_controller.c \ + crm_resource_print.c \ + crm_resource_runtime.c crm_resource_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ diff --git a/tools/crm_resource.c b/tools/crm_resource.c index 62a9e755a1d..b7ce90b6c71 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -43,50 +43,38 @@ resource_ipc_timeout(gpointer data) } static void -resource_ipc_connection_destroy(gpointer user_data) +handle_controller_reply(pcmk_controld_api_t *controld_api, void *api_data, + void *user_data) { - crm_info("Connection to controller was terminated"); - crm_exit(CRM_EX_DISCONNECT); + fprintf(stderr, "."); + if ((controld_api->replies_expected(controld_api) == 0) + && mainloop && g_main_loop_is_running(mainloop)) { + fprintf(stderr, " OK\n"); + crm_debug("Got all the replies we expected"); + crm_exit(CRM_EX_OK); + } } static void -start_mainloop(void) +handle_controller_drop(pcmk_controld_api_t *controld_api, void *api_data, + void *user_data) { - if (crmd_replies_needed == 0) { - return; - } - - mainloop = g_main_loop_new(NULL, FALSE); - fprintf(stderr, "Waiting for %d %s from the controller", - crmd_replies_needed, - pcmk__plural_alt(crmd_replies_needed, "reply", "replies")); - crm_debug("Waiting for %d %s from the controller", - crmd_replies_needed, - pcmk__plural_alt(crmd_replies_needed, "reply", "replies")); - - g_timeout_add(MESSAGE_TIMEOUT_S * 1000, resource_ipc_timeout, NULL); - g_main_loop_run(mainloop); + crm_info("Connection to controller was terminated"); + crm_exit(CRM_EX_DISCONNECT); } -static int -resource_ipc_callback(const char *buffer, ssize_t length, gpointer userdata) +static void +start_mainloop(pcmk_controld_api_t *controld_api) { - xmlNode *msg = string2xml(buffer); - - fprintf(stderr, "."); - crm_log_xml_trace(msg, "[inbound]"); - - crmd_replies_needed--; - if ((crmd_replies_needed == 0) && mainloop - && g_main_loop_is_running(mainloop)) { - - fprintf(stderr, " OK\n"); - crm_debug("Got all the replies we expected"); - crm_exit(CRM_EX_OK); + if (controld_api->replies_expected(controld_api) > 0) { + unsigned int count = controld_api->replies_expected(controld_api); + + fprintf(stderr, "Waiting for %d %s from the controller", + count, pcmk__plural_alt(count, "reply", "replies")); + mainloop = g_main_loop_new(NULL, FALSE); + g_timeout_add(MESSAGE_TIMEOUT_S * 1000, resource_ipc_timeout, NULL); + g_main_loop_run(mainloop); } - - free_xml(msg); - return 0; } static int @@ -115,12 +103,6 @@ build_constraint_list(xmlNode *root) return retval; } -struct ipc_client_callbacks crm_callbacks = { - .dispatch = resource_ipc_callback, - .destroy = resource_ipc_connection_destroy, -}; - - /* short option letters still available: eEJkKXyYZ */ /* *INDENT-OFF* */ @@ -484,13 +466,12 @@ main(int argc, char **argv) GHashTable *override_params = NULL; char *xml_file = NULL; - crm_ipc_t *crmd_channel = NULL; + pcmk_controld_api_t *controld_api = NULL; pe_working_set_t *data_set = NULL; xmlNode *cib_xml_copy = NULL; cib_t *cib_conn = NULL; resource_t *rsc = NULL; bool recursive = FALSE; - char *our_pid = NULL; bool validate_cmdline = FALSE; /* whether we are just validating based on command line options */ bool require_resource = TRUE; /* whether command requires that resource be specified */ @@ -924,8 +905,6 @@ main(int argc, char **argv) crm_exit(CRM_EX_USAGE); } - our_pid = crm_getpid_s(); - if (do_force) { crm_debug("Forcing..."); cib_options |= cib_quorum_override; @@ -990,20 +969,26 @@ main(int argc, char **argv) // Establish a connection to the controller if needed if (require_crmd) { - xmlNode *xml = NULL; - mainloop_io_t *source = - mainloop_add_ipc_client(CRM_SYSTEM_CRMD, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks); - crmd_channel = mainloop_get_ipc_client(source); - - if (crmd_channel == NULL) { - CMD_ERR("Error connecting to the controller"); - rc = -ENOTCONN; + char *client_uuid; + pcmk_controld_api_cb_t dispatch_cb = { + handle_controller_reply, NULL + }; + pcmk_controld_api_cb_t destroy_cb = { + handle_controller_drop, NULL + }; + + + client_uuid = crm_getpid_s(); + controld_api = pcmk_new_controld_api(crm_system_name, client_uuid); + free(client_uuid); + + rc = controld_api->connect(controld_api, true, &dispatch_cb, + &destroy_cb); + if (rc != pcmk_rc_ok) { + CMD_ERR("Error connecting to the controller: %s", pcmk_rc_str(rc)); + rc = pcmk_rc2legacy(rc); goto bail; } - - xml = create_hello_message(our_pid, crm_system_name, "0", "1"); - crm_ipc_send(crmd_channel, xml, 0, 0, NULL); - free_xml(xml); } /* Handle rsc_cmd appropriately */ @@ -1086,10 +1071,11 @@ main(int argc, char **argv) cli_resource_print_cts_constraints(data_set); } else if (rsc_cmd == 'F') { - rc = cli_resource_fail(crmd_channel, host_uname, rsc_id, data_set); - if (rc == pcmk_ok) { - start_mainloop(); + rc = cli_resource_fail(controld_api, host_uname, rsc_id, data_set); + if (rc == pcmk_rc_ok) { + start_mainloop(controld_api); } + rc = pcmk_rc2legacy(rc); } else if (rsc_cmd == 'O') { rc = cli_resource_print_operations(rsc_id, host_uname, TRUE, data_set); @@ -1283,12 +1269,11 @@ main(int argc, char **argv) if (do_force == FALSE) { rsc = uber_parent(rsc); } - crmd_replies_needed = 0; crm_debug("Erasing failures of %s (%s requested) on %s", rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); - rc = cli_resource_delete(crmd_channel, host_uname, rsc, - operation, interval_spec, TRUE, data_set); + rc = cli_resource_delete(controld_api, host_uname, rsc, operation, + interval_spec, TRUE, data_set); if ((rc == pcmk_ok) && !BE_QUIET) { // Show any reasons why resource might stay stopped @@ -1296,26 +1281,25 @@ main(int argc, char **argv) } if (rc == pcmk_ok) { - start_mainloop(); + start_mainloop(controld_api); } } else if (rsc_cmd == 'C') { - rc = cli_cleanup_all(crmd_channel, host_uname, operation, interval_spec, + rc = cli_cleanup_all(controld_api, host_uname, operation, interval_spec, data_set); if (rc == pcmk_ok) { - start_mainloop(); + start_mainloop(controld_api); } } else if ((rsc_cmd == 'R') && rsc) { if (do_force == FALSE) { rsc = uber_parent(rsc); } - crmd_replies_needed = 0; crm_debug("Re-checking the state of %s (%s requested) on %s", rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); - rc = cli_resource_delete(crmd_channel, host_uname, rsc, - NULL, 0, FALSE, data_set); + rc = cli_resource_delete(controld_api, host_uname, rsc, NULL, 0, FALSE, + data_set); if ((rc == pcmk_ok) && !BE_QUIET) { // Show any reasons why resource might stay stopped @@ -1323,13 +1307,11 @@ main(int argc, char **argv) } if (rc == pcmk_ok) { - start_mainloop(); + start_mainloop(controld_api); } } else if (rsc_cmd == 'R') { const char *router_node = host_uname; - xmlNode *msg_data = NULL; - xmlNode *cmd = NULL; int attr_options = pcmk__node_attr_none; if (host_uname) { @@ -1348,35 +1330,24 @@ main(int argc, char **argv) } } - if (crmd_channel == NULL) { + if (controld_api == NULL) { printf("Dry run: skipping clean-up of %s due to CIB_file\n", host_uname? host_uname : "all nodes"); rc = pcmk_ok; goto bail; } - msg_data = create_xml_node(NULL, "crm-resource-reprobe-op"); - crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); - if (safe_str_neq(router_node, host_uname)) { - crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); - } - - cmd = create_request(CRM_OP_REPROBE, msg_data, router_node, - CRM_SYSTEM_CRMD, crm_system_name, our_pid); - free_xml(msg_data); - crm_debug("Re-checking the state of all resources on %s", host_uname?host_uname:"all nodes"); rc = pcmk_rc2legacy(pcmk__node_attr_request_clear(NULL, host_uname, NULL, NULL, NULL, NULL, attr_options)); - if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { - start_mainloop(); + if (controld_api->reprobe(controld_api, host_uname, + router_node) == pcmk_rc_ok) { + start_mainloop(controld_api); } - free_xml(cmd); - } else if (rsc_cmd == 'D') { xmlNode *msg_data = NULL; @@ -1398,12 +1369,14 @@ main(int argc, char **argv) bail: - free(our_pid); pe_free_working_set(data_set); if (cib_conn != NULL) { cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); } + if (controld_api != NULL) { + pcmk_free_controld_api(controld_api); + } if (is_ocf_rc) { exit_code = rc; diff --git a/tools/crm_resource.h b/tools/crm_resource.h index 84b094b1b6b..d5bfff35c4f 100644 --- a/tools/crm_resource.h +++ b/tools/crm_resource.h @@ -1,5 +1,5 @@ /* - * Copyright 2004-2019 the Pacemaker project contributors + * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -21,6 +21,7 @@ #include #include #include +#include "crm_resource_controller.h" extern bool print_pending; @@ -30,12 +31,13 @@ extern bool BE_QUIET; extern int resource_verbose; extern int cib_options; -extern int crmd_replies_needed; extern char *move_lifetime; extern const char *attr_set_type; +extern pcmk_controld_api_cb_t controld_api_cb; + /* ban */ int cli_resource_prefer(const char *rsc_id, const char *host, cib_t * cib_conn); int cli_resource_ban(const char *rsc_id, const char *host, GListPtr allnodes, cib_t * cib_conn); @@ -61,14 +63,16 @@ int cli_resource_print_operations(const char *rsc_id, const char *host_uname, bo /* runtime */ void cli_resource_check(cib_t * cib, resource_t *rsc); -int cli_resource_fail(crm_ipc_t * crmd_channel, const char *host_uname, const char *rsc_id, pe_working_set_t * data_set); +int cli_resource_fail(pcmk_controld_api_t *controld_api, + const char *host_uname, const char *rsc_id, + pe_working_set_t *data_set); int cli_resource_search(resource_t *rsc, const char *requested_name, pe_working_set_t *data_set); -int cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, - resource_t *rsc, const char *operation, - const char *interval_spec, bool just_failures, - pe_working_set_t *data_set); -int cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, +int cli_resource_delete(pcmk_controld_api_t *controld_api, + const char *host_uname, pe_resource_t *rsc, + const char *operation, const char *interval_spec, + bool just_failures, pe_working_set_t *data_set); +int cli_cleanup_all(pcmk_controld_api_t *controld_api, const char *node_name, const char *operation, const char *interval_spec, pe_working_set_t *data_set); int cli_resource_restart(pe_resource_t *rsc, const char *host, int timeout_ms, diff --git a/tools/crm_resource_controller.c b/tools/crm_resource_controller.c new file mode 100644 index 00000000000..845791632d4 --- /dev/null +++ b/tools/crm_resource_controller.c @@ -0,0 +1,425 @@ +/* + * Copyright 2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include +#include +#include +#include "crm_resource.h" + +// API object's private members +struct controller_private { + char *client_name; // Client name to use with IPC + char *client_uuid; // Client UUID to use with IPC + mainloop_io_t *source; // If main loop used, I/O source for IPC + crm_ipc_t *ipc; // IPC connection to controller + int replies_expected; // How many controller replies are expected + pcmk_controld_api_cb_t dispatch_cb; // Caller's registered dispatch callback + pcmk_controld_api_cb_t destroy_cb; // Caller's registered destroy callback +}; + +static void +call_client_callback(pcmk_controld_api_t *api, pcmk_controld_api_cb_t *cb, + void *api_data) +{ + if ((cb != NULL) && (cb->callback != NULL)) { + cb->callback(api, api_data, cb->user_data); + } +} + +/* + * IPC callbacks when used with main loop + */ + +static void +controller_ipc_destroy(gpointer user_data) +{ + pcmk_controld_api_t *api = user_data; + struct controller_private *private = api->private; + + private->ipc = NULL; + private->source = NULL; + call_client_callback(api, &(private->destroy_cb), NULL); +} + +// \return < 0 if connection is no longer required, >= 0 if it is +static int +controller_ipc_dispatch(const char *buffer, ssize_t length, gpointer user_data) +{ + xmlNode *msg = NULL; + pcmk_controld_api_t *api = user_data; + + CRM_CHECK(buffer && api && api->private, return 0); + + msg = string2xml(buffer); + if (msg == NULL) { + crm_warn("Received malformed controller IPC message"); + } else { + struct controller_private *private = api->private; + + crm_log_xml_trace(msg, "controller-reply"); + private->replies_expected--; + call_client_callback(api, &(private->dispatch_cb), + get_message_xml(msg, F_CRM_DATA)); + free_xml(msg); + } + return 0; +} + +/* + * IPC utilities + */ + +// \return Standard Pacemaker return code +static int +send_hello(crm_ipc_t *ipc, const char *client_name, const char *client_uuid) +{ + xmlNode *hello = create_hello_message(client_uuid, client_name, "0", "1"); + int rc = crm_ipc_send(ipc, hello, 0, 0, NULL); + + free_xml(hello); + if (rc < 0) { + rc = pcmk_legacy2rc(rc); + crm_info("Could not send IPC hello to %s: %s " CRM_XS " rc=%s", + CRM_SYSTEM_CRMD /* ipc->name */, + pcmk_rc_str(rc), rc); + return rc; + } + crm_debug("Sent IPC hello to %s", CRM_SYSTEM_CRMD /* ipc->name */); + return pcmk_rc_ok; +} + +// \return Standard Pacemaker return code +static int +send_controller_request(pcmk_controld_api_t *api, const char *op, + xmlNode *msg_data, const char *node) +{ + int rc; + struct controller_private *private = api->private; + xmlNode *cmd = create_request(op, msg_data, node, CRM_SYSTEM_CRMD, + private->client_name, private->client_uuid); + const char *reference = crm_element_value(cmd, XML_ATTR_REFERENCE); + + if ((cmd == NULL) || (reference == NULL)) { + return EINVAL; + } + + //@TODO pass as args? 0=crm_ipc_flags, 0=timeout_ms (default 5s), NULL=reply + crm_log_xml_trace(cmd, "controller-request"); + rc = crm_ipc_send(private->ipc, cmd, 0, 0, NULL); + free_xml(cmd); + if (rc < 0) { + return pcmk_legacy2rc(rc); + } + private->replies_expected++; + return pcmk_rc_ok; +} + +/* + * pcmk_controld_api_t methods + */ + +static int +controller_connect_mainloop(pcmk_controld_api_t *api) +{ + struct controller_private *private = api->private; + struct ipc_client_callbacks callbacks = { + .dispatch = controller_ipc_dispatch, + .destroy = controller_ipc_destroy, + }; + + private->source = mainloop_add_ipc_client(CRM_SYSTEM_CRMD, + G_PRIORITY_DEFAULT, 0, api, + &callbacks); + if (private->source == NULL) { + return ENOTCONN; + } + + private->ipc = mainloop_get_ipc_client(private->source); + if (private->ipc == NULL) { + (void) api->disconnect(api); + return ENOTCONN; + } + + crm_debug("Connected to %s IPC (attaching to main loop)", CRM_SYSTEM_CRMD); + return pcmk_rc_ok; +} + +static int +controller_connect_no_mainloop(pcmk_controld_api_t *api) +{ + struct controller_private *private = api->private; + + private->ipc = crm_ipc_new(CRM_SYSTEM_CRMD, 0); + if (private->ipc == NULL) { + return ENOTCONN; + } + if (!crm_ipc_connect(private->ipc)) { + crm_ipc_close(private->ipc); + crm_ipc_destroy(private->ipc); + private->ipc = NULL; + return errno; + } + /* @TODO caller needs crm_ipc_get_fd(private->ipc); either add method for + * that, or replace use_mainloop with int *fd + */ + crm_debug("Connected to %s IPC", CRM_SYSTEM_CRMD); + return pcmk_rc_ok; +} + +static void +set_callback(pcmk_controld_api_cb_t *dest, pcmk_controld_api_cb_t *source) +{ + if (source) { + dest->callback = source->callback; + dest->user_data = source->user_data; + } +} + +static int +controller_api_connect(pcmk_controld_api_t *api, bool use_mainloop, + pcmk_controld_api_cb_t *dispatch_cb, + pcmk_controld_api_cb_t *destroy_cb) +{ + int rc = pcmk_rc_ok; + struct controller_private *private; + + if (api == NULL) { + return EINVAL; + } + private = api->private; + + set_callback(&(private->dispatch_cb), dispatch_cb); + set_callback(&(private->destroy_cb), destroy_cb); + + if (private->ipc != NULL) { + return pcmk_rc_ok; // already connected + } + + if (use_mainloop) { + rc = controller_connect_mainloop(api); + } else { + rc = controller_connect_no_mainloop(api); + } + if (rc != pcmk_rc_ok) { + return rc; + } + + rc = send_hello(private->ipc, private->client_name, private->client_uuid); + if (rc != pcmk_rc_ok) { + (void) api->disconnect(api); + } + return rc; +} + +static int +controller_api_disconnect(pcmk_controld_api_t *api) +{ + struct controller_private *private = api->private; + + if (private->source != NULL) { + // Attached to main loop + mainloop_del_ipc_client(private->source); + private->source = NULL; + private->ipc = NULL; + + } else if (private->ipc != NULL) { + // Not attached to main loop + crm_ipc_t *ipc = private->ipc; + + private->ipc = NULL; + crm_ipc_close(ipc); + crm_ipc_destroy(ipc); + } + crm_debug("Disconnected from %s IPC", CRM_SYSTEM_CRMD /* ipc->name */); + return pcmk_rc_ok; +} + +//@TODO dispatch function for non-mainloop a la stonith_dispatch() +//@TODO convenience retry-connect function a la stonith_api_connect_retry() + +static unsigned int +controller_api_replies_expected(pcmk_controld_api_t *api) +{ + if (api != NULL) { + struct controller_private *private = api->private; + + return private->replies_expected; + } + return 0; +} + +static xmlNode * +create_reprobe_message_data(const char *target_node, const char *router_node) +{ + xmlNode *msg_data; + + msg_data = create_xml_node(NULL, "data_for_" CRM_OP_REPROBE); + crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); + if ((router_node != NULL) && safe_str_neq(router_node, target_node)) { + crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); + } + return msg_data; +} + +static int +controller_api_reprobe(pcmk_controld_api_t *api, const char *target_node, + const char *router_node) +{ + int rc = EINVAL; + + if (api != NULL) { + xmlNode *msg_data; + + crm_debug("Sending %s IPC request to reprobe %s via %s", + CRM_SYSTEM_CRMD, crm_str(target_node), crm_str(router_node)); + msg_data = create_reprobe_message_data(target_node, router_node); + rc = send_controller_request(api, CRM_OP_REPROBE, msg_data, + (router_node? router_node : target_node)); + free_xml(msg_data); + } + return rc; +} + +// \return Standard Pacemaker return code +static int +controller_resource_op(pcmk_controld_api_t *api, const char *op, + const char *target_node, const char *router_node, + bool cib_only, const char *rsc_id, + const char *rsc_long_id, const char *standard, + const char *provider, const char *type) +{ + int rc; + char *key; + xmlNode *msg_data, *xml_rsc, *params; + + if (api == NULL) { + return EINVAL; + } + if (router_node == NULL) { + router_node = target_node; + } + + msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); + + /* The controller logs the transition key from resource op requests, so we + * need to have *something* for it. + */ + key = generate_transition_key(0, getpid(), 0, + "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); + crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); + free(key); + + crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); + if (safe_str_neq(router_node, target_node)) { + crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); + } + + if (cib_only) { + // Indicate that only the CIB needs to be cleaned + crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB); + } + + xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); + crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); + crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc_long_id); + crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, standard); + crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, provider); + crm_xml_add(xml_rsc, XML_ATTR_TYPE, type); + + params = create_xml_node(msg_data, XML_TAG_ATTRS); + crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + + // The controller parses the timeout from the request + key = crm_meta_name(XML_ATTR_TIMEOUT); + crm_xml_add(params, key, "60000"); /* 1 minute */ //@TODO pass as arg + free(key); + + rc = send_controller_request(api, op, msg_data, router_node); + free_xml(msg_data); + return rc; +} + +static int +controller_api_fail_resource(pcmk_controld_api_t *api, + const char *target_node, const char *router_node, + const char *rsc_id, const char *rsc_long_id, + const char *standard, const char *provider, + const char *type) +{ + crm_debug("Sending %s IPC request to fail %s (a.k.a. %s) on %s via %s", + CRM_SYSTEM_CRMD, crm_str(rsc_id), crm_str(rsc_long_id), + crm_str(target_node), crm_str(router_node)); + return controller_resource_op(api, CRM_OP_LRM_FAIL, target_node, + router_node, false, rsc_id, rsc_long_id, + standard, provider, type); +} + +static int +controller_api_refresh_resource(pcmk_controld_api_t *api, + const char *target_node, + const char *router_node, + const char *rsc_id, const char *rsc_long_id, + const char *standard, const char *provider, + const char *type, bool cib_only) +{ + crm_debug("Sending %s IPC request to refresh %s (a.k.a. %s) on %s via %s", + CRM_SYSTEM_CRMD, crm_str(rsc_id), crm_str(rsc_long_id), + crm_str(target_node), crm_str(router_node)); + return controller_resource_op(api, CRM_OP_LRM_DELETE, target_node, + router_node, cib_only, rsc_id, rsc_long_id, + standard, provider, type); +} + +pcmk_controld_api_t * +pcmk_new_controld_api(const char *client_name, const char *client_uuid) +{ + struct controller_private *private; + pcmk_controld_api_t *api = calloc(1, sizeof(pcmk_controld_api_t)); + + CRM_ASSERT(api != NULL); + + api->private = calloc(1, sizeof(struct controller_private)); + CRM_ASSERT(api->private != NULL); + private = api->private; + + if (client_name == NULL) { + client_name = crm_system_name? crm_system_name : "client"; + } + private->client_name = strdup(client_name); + CRM_ASSERT(private->client_name != NULL); + + if (client_uuid == NULL) { + private->client_uuid = crm_generate_uuid(); + } else { + private->client_uuid = strdup(client_uuid); + } + CRM_ASSERT(private->client_uuid != NULL); + + api->connect = controller_api_connect; + api->disconnect = controller_api_disconnect; + api->replies_expected = controller_api_replies_expected; + api->reprobe = controller_api_reprobe; + api->fail_resource = controller_api_fail_resource; + api->refresh_resource = controller_api_refresh_resource; + return api; +} + +void +pcmk_free_controld_api(pcmk_controld_api_t *api) +{ + if (api != NULL) { + struct controller_private *private = api->private; + + api->disconnect(api); + free(private->client_name); + free(private->client_uuid); + free(api->private); + free(api); + } +} diff --git a/tools/crm_resource_controller.h b/tools/crm_resource_controller.h new file mode 100644 index 00000000000..50e20b43f79 --- /dev/null +++ b/tools/crm_resource_controller.h @@ -0,0 +1,198 @@ +/* + * Copyright 2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ +#ifndef PCMK__CONTROLD_API_H +#define PCMK__CONTROLD_API_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include // bool + +/* This is a demonstration of an abstracted controller IPC API. It is expected + * that this will be improved and moved to libcrmcommon. + * + * @TODO We could consider whether it's reasonable to have a single type for + * all daemons' IPC APIs (e.g. pcmk_ipc_api_t instead of pcmk_*_api_t). They + * could potentially have common connect/disconnect methods and then a void* to + * a group of API-specific methods. + * + * In that case, the callback type would also need to be generic, taking + * (pcmk_ipc_api_t *api, void *api_data, void *user_data), with individual APIs + * having functions for getting useful info from api_data. If all APIs followed + * the call_id model, we could use int call_id instead of api_data. + * + * A major annoyance is that the controller IPC protocol currently does not have + * any way to tie a particular reply to a particular request. The current + * clients (crmadmin, crm_node, and crm_resource) simply know what kind of reply + * to expect for the kind of request they sent. In crm_resource's case, all it + * does is count replies, ignoring their content altogether. + * + * That really forces us to have a single callback for all events rather than a + * per-request callback. That in turn implies that callers can only provide a + * single user data pointer. + * + * @TODO Define protocol version constants to use in hello message. + * @TODO Allow callers to specify timeouts. + * @TODO Define call IDs for controller ops, while somehow maintaining backward + * compatibility, since a client running on a Pacemaker Remote node could + * be older or newer than the controller on the connection's cluster + * node. + * @TODO The controller currently does not respond to hello messages. We should + * establish a common connection handshake protocol for all daemons that + * involves a hello message and acknowledgement. We should support sync + * or async connection (i.e. block until the ack is received, or return + * after the hello is sent and call a connection callback when the hello + * ack is received). + */ + +//! \internal +typedef struct pcmk_controld_api_s pcmk_controld_api_t; + +//! \internal +typedef struct pcmk_controld_api_callback_s { + void (*callback)(pcmk_controld_api_t *api, void *api_data, void *user_data); + void *user_data; +} pcmk_controld_api_cb_t; + +//! \internal +struct pcmk_controld_api_s { + //! \internal + void *private; + + /*! + * \internal + * \brief Connect to the local controller + * + * \param[in] api Controller API instance + * \param[in] use_mainloop If true, attach IPC to main loop + * \param[in] dispatch_cb If not NULL, call this when replies are received + * \param[in] destroy_cb If not NULL, call this if connection drops + * + * \return Standard Pacemaker return code + * \note Only the pointers inside the callback objects need to be + * persistent, not the callback objects themselves. The destroy_cb + * will be called only for unrequested disconnects. + */ + int (*connect)(pcmk_controld_api_t *api, bool use_mainloop, + pcmk_controld_api_cb_t *dispatch_cb, + pcmk_controld_api_cb_t *destroy_cb); + + /*! + * \internal + * \brief Disconnect from the local controller + * + * \param[in] api Controller API instance + * + * \return Standard Pacemaker return code + */ + int (*disconnect)(pcmk_controld_api_t *api); + + /*! + * \internal + * \brief Check number of replies still expected from controller + * + * \param[in] api Controller API instance + * + * \return Number of expected replies + */ + unsigned int (*replies_expected)(pcmk_controld_api_t *api); + + /*! + * \internal + * \brief Send a reprobe controller operation + * + * \param[in] api Controller API instance + * \param[in] target_node Name of node to reprobe + * \param[in] router_node Router node for host + * + * \return Standard Pacemaker return code + */ + int (*reprobe)(pcmk_controld_api_t *api, const char *target_node, + const char *router_node); + + /* @TODO These methods have a lot of arguments. One possibility would be to + * make a struct for agent info (standard/provider/type), which theortically + * could be used throughout pacemaker code. However that would end up being + * really awkward to use generically, since sometimes you need to allocate + * those strings (char *) and other times you only have references into XML + * (const char *). We could make some structs just for this API. + */ + + /*! + * \internal + * \brief Ask the controller to fail a resource + * + * \param[in] api Controller API instance + * \param[in] target_node Name of node resource is on + * \param[in] router_node Router node for target + * \param[in] rsc_id ID of resource to fail + * \param[in] rsc_long_id Long ID of resource (if any) + * \param[in] standard Standard of resource + * \param[in] provider Provider of resource (if any) + * \param[in] type Type of resource to fail + * + * \return Standard Pacemaker return code + */ + int (*fail_resource)(pcmk_controld_api_t *api, const char *target_node, + const char *router_node, const char *rsc_id, + const char *rsc_long_id, const char *standard, + const char *provider, const char *type); + + /*! + * \internal + * \brief Ask the controller to refresh a resource + * + * \param[in] api Controller API instance + * \param[in] target_node Name of node resource is on + * \param[in] router_node Router node for target + * \param[in] rsc_id ID of resource to refresh + * \param[in] rsc_long_id Long ID of resource (if any) + * \param[in] standard Standard of resource + * \param[in] provider Provider of resource (if any) + * \param[in] type Type of resource + * \param[in] cib_only If true, clean resource from CIB only + * + * \return Standard Pacemaker return code + */ + int (*refresh_resource)(pcmk_controld_api_t *api, const char *target_node, + const char *router_node, const char *rsc_id, + const char *rsc_long_id, const char *standard, + const char *provider, const char *type, + bool cib_only); +}; + +/*! + * \internal + * \brief Create new controller IPC API object for clients + * + * \param[in] client_name Client name to use with IPC + * \param[in] client_uuid Client UUID to use with IPC + * + * \return Newly allocated object + * \note This function asserts on errors, so it will never return NULL. + * The caller is responsible for freeing the result with + * pcmk_free_controld_api(). + */ +pcmk_controld_api_t *pcmk_new_controld_api(const char *client_name, + const char *client_uuid); + +/*! + * \internal + * \brief Free a controller IPC API object + * + * \param[in] api Controller IPC API object to free + */ +void pcmk_free_controld_api(pcmk_controld_api_t *api); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index ff2fa702d65..5c4e1b6ad34 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -11,7 +11,6 @@ int resource_verbose = 0; bool do_force = FALSE; -int crmd_replies_needed = 1; /* The welcome message */ const char *attr_set_type = XML_TAG_ATTR_SETS; @@ -458,58 +457,56 @@ cli_resource_delete_attribute(resource_t *rsc, const char *requested_name, return rc; } +// \return Standard Pacemaker return code static int -send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, +send_lrm_rsc_op(pcmk_controld_api_t *controld_api, bool do_fail_resource, const char *host_uname, const char *rsc_id, - bool only_failed, pe_working_set_t * data_set) + pe_working_set_t *data_set) { - char *our_pid = NULL; - char *key = NULL; - int rc = -ECOMM; - xmlNode *cmd = NULL; - xmlNode *xml_rsc = NULL; const char *router_node = host_uname; + const char *rsc_api_id = NULL; + const char *rsc_long_id = NULL; const char *rsc_class = NULL; + const char *rsc_provider = NULL; const char *rsc_type = NULL; - xmlNode *params = NULL; - xmlNode *msg_data = NULL; bool cib_only = false; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { CMD_ERR("Resource %s not found", rsc_id); - return -ENXIO; + return ENXIO; } else if (rsc->variant != pe_native) { CMD_ERR("We can only process primitive resources, not %s", rsc_id); - return -EINVAL; + return EINVAL; } rsc_class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + rsc_provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER), rsc_type = crm_element_value(rsc->xml, XML_ATTR_TYPE); if ((rsc_class == NULL) || (rsc_type == NULL)) { CMD_ERR("Resource %s does not have a class and type", rsc_id); - return -EINVAL; + return EINVAL; } if (host_uname == NULL) { CMD_ERR("Please specify a node name"); - return -EINVAL; + return EINVAL; } else { pe_node_t *node = pe_find_node(data_set->nodes, host_uname); if (node == NULL) { CMD_ERR("Node %s not found", host_uname); - return -pcmk_err_node_unknown; + return pcmk_rc_node_unknown; } if (!(node->details->online)) { - if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { - cib_only = true; - } else { + if (do_fail_resource) { CMD_ERR("Node %s is not online", host_uname); - return -ENOTCONN; + return ENOTCONN; + } else { + cib_only = true; } } if (!cib_only && pe__is_guest_or_remote_node(node)) { @@ -517,67 +514,28 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, if (node == NULL) { CMD_ERR("No cluster connection to Pacemaker Remote node %s detected", host_uname); - return -ENOTCONN; + return ENOTCONN; } router_node = node->details->uname; } } - msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); - - /* The controller logs the transition key from requests, so we need to have - * *something* for it. - */ - key = generate_transition_key(0, getpid(), 0, - "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); - crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); - free(key); - - crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); - if (safe_str_neq(router_node, host_uname)) { - crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); - } - - if (cib_only) { - // Indicate that only the CIB needs to be cleaned - crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB); - } - - xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); if (rsc->clone_name) { - crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->clone_name); - crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc->id); - + rsc_api_id = rsc->clone_name; + rsc_long_id = rsc->id; } else { - crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->id); + rsc_api_id = rsc->id; } - - crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, rsc_class); - crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_PROVIDER); - crm_xml_add(xml_rsc, XML_ATTR_TYPE, rsc_type); - - params = create_xml_node(msg_data, XML_TAG_ATTRS); - crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); - - // The controller parses the timeout from the request - key = crm_meta_name(XML_ATTR_TIMEOUT); - crm_xml_add(params, key, "60000"); /* 1 minute */ - free(key); - - our_pid = crm_getpid_s(); - cmd = create_request(op, msg_data, router_node, CRM_SYSTEM_CRMD, crm_system_name, our_pid); - free_xml(msg_data); - - if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { - rc = 0; - + if (do_fail_resource) { + return controld_api->fail_resource(controld_api, host_uname, + router_node, rsc_api_id, rsc_long_id, + rsc_class, rsc_provider, rsc_type); } else { - crm_debug("Could not send %s op to the controller", op); - rc = -ENOTCONN; + return controld_api->refresh_resource(controld_api, host_uname, + router_node, rsc_api_id, + rsc_long_id, rsc_class, + rsc_provider, rsc_type, cib_only); } - - free_xml(cmd); - return rc; } /*! @@ -597,8 +555,9 @@ rsc_fail_name(resource_t *rsc) return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name); } +// \return Standard Pacemaker return code static int -clear_rsc_history(crm_ipc_t *crmd_channel, const char *host_uname, +clear_rsc_history(pcmk_controld_api_t *controld_api, const char *host_uname, const char *rsc_id, pe_working_set_t *data_set) { int rc = pcmk_ok; @@ -608,27 +567,22 @@ clear_rsc_history(crm_ipc_t *crmd_channel, const char *host_uname, * single operation, we might wind up with a wrong idea of the current * resource state, and we might not re-probe the resource. */ - rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc_id, - TRUE, data_set); - if (rc != pcmk_ok) { + rc = send_lrm_rsc_op(controld_api, false, host_uname, rsc_id, data_set); + if (rc != pcmk_rc_ok) { return rc; } - crmd_replies_needed++; - crm_trace("Processing %d mainloop inputs", crmd_replies_needed); + crm_trace("Processing %d mainloop inputs", + controld_api->replies_expected(controld_api)); while (g_main_context_iteration(NULL, FALSE)) { crm_trace("Processed mainloop input, %d still remaining", - crmd_replies_needed); - } - - if (crmd_replies_needed < 0) { - crmd_replies_needed = 0; + controld_api->replies_expected(controld_api)); } return rc; } static int -clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, +clear_rsc_failures(pcmk_controld_api_t *controld_api, const char *node_name, const char *rsc_id, const char *operation, const char *interval_spec, pe_working_set_t *data_set) { @@ -700,9 +654,9 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, g_hash_table_iter_init(&iter, rscs); while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) { crm_debug("Erasing failures of %s on %s", failed_id, node_name); - rc = clear_rsc_history(crmd_channel, node_name, failed_id, data_set); - if (rc != pcmk_ok) { - return rc; + rc = clear_rsc_history(controld_api, node_name, failed_id, data_set); + if (rc != pcmk_rc_ok) { + return pcmk_rc2legacy(rc); } } g_hash_table_destroy(rscs); @@ -728,7 +682,7 @@ clear_rsc_fail_attrs(resource_t *rsc, const char *operation, } int -cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, +cli_resource_delete(pcmk_controld_api_t *controld_api, const char *host_uname, resource_t *rsc, const char *operation, const char *interval_spec, bool just_failures, pe_working_set_t *data_set) @@ -745,7 +699,7 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; - rc = cli_resource_delete(crmd_channel, host_uname, child, operation, + rc = cli_resource_delete(controld_api, host_uname, child, operation, interval_spec, just_failures, data_set); if (rc != pcmk_ok) { return rc; @@ -779,7 +733,7 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, node = (node_t *) lpc->data; if (node->details->online) { - rc = cli_resource_delete(crmd_channel, node->details->uname, + rc = cli_resource_delete(controld_api, node->details->uname, rsc, operation, interval_spec, just_failures, data_set); } @@ -807,7 +761,7 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, return -EOPNOTSUPP; } - if (crmd_channel == NULL) { + if (controld_api == NULL) { printf("Dry run: skipping clean-up of %s on %s due to CIB_file\n", rsc->id, host_uname); return pcmk_ok; @@ -821,10 +775,11 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, } if (just_failures) { - rc = clear_rsc_failures(crmd_channel, host_uname, rsc->id, operation, + rc = clear_rsc_failures(controld_api, host_uname, rsc->id, operation, interval_spec, data_set); } else { - rc = clear_rsc_history(crmd_channel, host_uname, rsc->id, data_set); + rc = clear_rsc_history(controld_api, host_uname, rsc->id, data_set); + rc = pcmk_rc2legacy(rc); } if (rc != pcmk_ok) { printf("Cleaned %s failures on %s, but unable to clean history: %s\n", @@ -836,7 +791,7 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, } int -cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, +cli_cleanup_all(pcmk_controld_api_t *controld_api, const char *node_name, const char *operation, const char *interval_spec, pe_working_set_t *data_set) { @@ -844,12 +799,11 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, int attr_options = pcmk__node_attr_none; const char *display_name = node_name? node_name : "all nodes"; - if (crmd_channel == NULL) { + if (controld_api == NULL) { printf("Dry run: skipping clean-up of %s due to CIB_file\n", display_name); return pcmk_ok; } - crmd_replies_needed = 0; if (node_name) { node_t *node = pe_find_node(data_set->nodes, node_name); @@ -872,7 +826,7 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, } if (node_name) { - rc = clear_rsc_failures(crmd_channel, node_name, NULL, + rc = clear_rsc_failures(controld_api, node_name, NULL, operation, interval_spec, data_set); if (rc != pcmk_ok) { printf("Cleaned all resource failures on %s, but unable to clean history: %s\n", @@ -883,7 +837,7 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, for (GList *iter = data_set->nodes; iter; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; - rc = clear_rsc_failures(crmd_channel, node->details->uname, NULL, + rc = clear_rsc_failures(controld_api, node->details->uname, NULL, operation, interval_spec, data_set); if (rc != pcmk_ok) { printf("Cleaned all resource failures on all nodes, but unable to clean history: %s\n", @@ -948,12 +902,13 @@ cli_resource_check(cib_t * cib_conn, resource_t *rsc) } } +// \return Standard Pacemaker return code int -cli_resource_fail(crm_ipc_t * crmd_channel, const char *host_uname, - const char *rsc_id, pe_working_set_t * data_set) +cli_resource_fail(pcmk_controld_api_t *controld_api, const char *host_uname, + const char *rsc_id, pe_working_set_t *data_set) { - crm_warn("Failing: %s", rsc_id); - return send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_FAIL, host_uname, rsc_id, FALSE, data_set); + crm_notice("Failing %s on %s", rsc_id, host_uname); + return send_lrm_rsc_op(controld_api, true, host_uname, rsc_id, data_set); } static GHashTable * From 5b486ac7dc1b3dc3c15d58967bd161c029add078 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Tue, 28 Jan 2020 21:31:58 -0600 Subject: [PATCH 04/10] Fix: tools: clean up before exiting crm_resource Previously, crm_resource would close open connections and free memory before exiting only in certain circumstances. Now, it always does. --- tools/crm_resource.c | 64 +++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/tools/crm_resource.c b/tools/crm_resource.c index b7ce90b6c71..0339faa416a 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -31,43 +31,63 @@ int cib_options = cib_sync_call; static GMainLoop *mainloop = NULL; +// Things that should be cleaned up on exit +static cib_t *cib_conn = NULL; +static pcmk_controld_api_t *controld_api = NULL; +static pe_working_set_t *data_set = NULL; + #define MESSAGE_TIMEOUT_S 60 +// Clean up and exit +_Noreturn static void +bye(crm_exit_t exit_code) +{ + if (cib_conn != NULL) { + cib_conn->cmds->signoff(cib_conn); + cib_delete(cib_conn); + } + if (controld_api != NULL) { + pcmk_free_controld_api(controld_api); + } + pe_free_working_set(data_set); + crm_exit(exit_code); +} + static gboolean resource_ipc_timeout(gpointer data) { fprintf(stderr, "Aborting because no messages received in %d seconds\n", MESSAGE_TIMEOUT_S); crm_err("No messages received in %d seconds", MESSAGE_TIMEOUT_S); - crm_exit(CRM_EX_TIMEOUT); + bye(CRM_EX_TIMEOUT); } static void -handle_controller_reply(pcmk_controld_api_t *controld_api, void *api_data, +handle_controller_reply(pcmk_controld_api_t *capi, void *api_data, void *user_data) { fprintf(stderr, "."); - if ((controld_api->replies_expected(controld_api) == 0) + if ((capi->replies_expected(capi) == 0) && mainloop && g_main_loop_is_running(mainloop)) { fprintf(stderr, " OK\n"); crm_debug("Got all the replies we expected"); - crm_exit(CRM_EX_OK); + bye(CRM_EX_OK); } } static void -handle_controller_drop(pcmk_controld_api_t *controld_api, void *api_data, +handle_controller_drop(pcmk_controld_api_t *capi, void *api_data, void *user_data) { crm_info("Connection to controller was terminated"); - crm_exit(CRM_EX_DISCONNECT); + bye(CRM_EX_DISCONNECT); } static void -start_mainloop(pcmk_controld_api_t *controld_api) +start_mainloop(pcmk_controld_api_t *capi) { - if (controld_api->replies_expected(controld_api) > 0) { - unsigned int count = controld_api->replies_expected(controld_api); + if (capi->replies_expected(capi) > 0) { + unsigned int count = capi->replies_expected(capi); fprintf(stderr, "Waiting for %d %s from the controller", count, pcmk__plural_alt(count, "reply", "replies")); @@ -466,10 +486,7 @@ main(int argc, char **argv) GHashTable *override_params = NULL; char *xml_file = NULL; - pcmk_controld_api_t *controld_api = NULL; - pe_working_set_t *data_set = NULL; xmlNode *cib_xml_copy = NULL; - cib_t *cib_conn = NULL; resource_t *rsc = NULL; bool recursive = FALSE; @@ -560,7 +577,7 @@ main(int argc, char **argv) } lrmd_api_delete(lrmd_conn); - crm_exit(exit_code); + bye(exit_code); } else if (safe_str_eq("show-metadata", longname)) { char *standard = NULL; @@ -589,7 +606,7 @@ main(int argc, char **argv) exit_code = crm_errno2exit(rc); } lrmd_api_delete(lrmd_conn); - crm_exit(exit_code); + bye(exit_code); } else if (safe_str_eq("list-agents", longname)) { lrmd_list_t *list = NULL; @@ -613,7 +630,7 @@ main(int argc, char **argv) exit_code = CRM_EX_NOSUCH; } lrmd_api_delete(lrmd_conn); - crm_exit(exit_code); + bye(exit_code); } else if (safe_str_eq("class", longname)) { if (!(pcmk_get_ra_caps(optarg) & pcmk_ra_cap_params)) { @@ -622,7 +639,7 @@ main(int argc, char **argv) optarg); } - crm_exit(exit_code); + bye(exit_code); } else { v_class = optarg; } @@ -896,13 +913,13 @@ main(int argc, char **argv) "validate-all", validate_options, override_params, timeout_ms); exit_code = crm_errno2exit(rc); - crm_exit(exit_code); + bye(exit_code); } } if (argerr) { CMD_ERR("Invalid option(s) supplied, use --help for valid usage"); - crm_exit(CRM_EX_USAGE); + bye(CRM_EX_USAGE); } if (do_force) { @@ -1369,15 +1386,6 @@ main(int argc, char **argv) bail: - pe_free_working_set(data_set); - if (cib_conn != NULL) { - cib_conn->cmds->signoff(cib_conn); - cib_delete(cib_conn); - } - if (controld_api != NULL) { - pcmk_free_controld_api(controld_api); - } - if (is_ocf_rc) { exit_code = rc; @@ -1391,5 +1399,5 @@ main(int argc, char **argv) } } - crm_exit(exit_code); + bye(exit_code); } From b0057acbfab6415290e11ec1b25eedcc19687f17 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 21 Feb 2020 15:17:12 -0600 Subject: [PATCH 05/10] Low: controller: improve client IPC hello validation --- daemons/controld/controld_control.c | 4 +- daemons/controld/controld_execd_state.c | 8 +- daemons/controld/controld_messages.c | 159 +++++++++++------------- daemons/controld/controld_messages.h | 6 +- 4 files changed, 79 insertions(+), 98 deletions(-) diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index 82fb620225d..83a27deb7d0 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -391,11 +391,11 @@ crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) crm_acl_get_set_user(msg, F_CRM_USER, client->user); #endif - crm_trace("Processing msg from %s", pcmk__client_name(client)); + crm_trace("Processing IPC message from %s", pcmk__client_name(client)); crm_log_xml_trace(msg, "controller[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, client->id); - if (crmd_authorize_message(msg, client, NULL)) { + if (controld_authorize_ipc_message(msg, client, NULL)) { route_message(C_IPC_MESSAGE, msg); } diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c index b527dcf0f2d..7684b9453ea 100644 --- a/daemons/controld/controld_execd_state.c +++ b/daemons/controld/controld_execd_state.c @@ -435,14 +435,12 @@ crmd_proxy_send(const char *session, xmlNode *msg) static void crmd_proxy_dispatch(const char *session, xmlNode *msg) { - - crm_log_xml_trace(msg, "controller-proxy[inbound]"); - + crm_trace("Processing proxied IPC message from session %s", session); + crm_log_xml_trace(msg, "controller[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, session); - if (crmd_authorize_message(msg, NULL, session)) { + if (controld_authorize_ipc_message(msg, NULL, session)) { route_message(C_IPC_MESSAGE, msg); } - trigger_fsa(fsa_source); } diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c index 6651489e22b..1065a48f4e4 100644 --- a/daemons/controld/controld_messages.c +++ b/daemons/controld/controld_messages.c @@ -481,113 +481,96 @@ relay_message(xmlNode * msg, gboolean originated_locally) return processing_complete; } -static gboolean -process_hello_message(xmlNode * hello, - char **client_name, char **major_version, char **minor_version) +// Return true if field contains a positive integer +static bool +authorize_version(xmlNode *message_data, const char *field, + const char *client_name, const char *ref, const char *uuid) { - const char *local_client_name; - const char *local_major_version; - const char *local_minor_version; + const char *version = crm_element_value(message_data, field); - *client_name = NULL; - *major_version = NULL; - *minor_version = NULL; - - if (hello == NULL) { - return FALSE; - } - - local_client_name = crm_element_value(hello, "client_name"); - local_major_version = crm_element_value(hello, "major_version"); - local_minor_version = crm_element_value(hello, "minor_version"); - - if (local_client_name == NULL || strlen(local_client_name) == 0) { - crm_err("Hello message was not valid (field %s not found)", "client name"); - return FALSE; - - } else if (local_major_version == NULL || strlen(local_major_version) == 0) { - crm_err("Hello message was not valid (field %s not found)", "major version"); - return FALSE; + if ((version == NULL) || (version[0] == '\0')) { + crm_warn("IPC hello from %s rejected: No protocol %s", + CRM_XS " ref=%s uuid=%s", + client_name, field, (ref? ref : "none"), uuid); + return false; + } else { + int version_num = crm_parse_int(version, NULL); - } else if (local_minor_version == NULL || strlen(local_minor_version) == 0) { - crm_err("Hello message was not valid (field %s not found)", "minor version"); - return FALSE; + if (version_num < 0) { + crm_warn("IPC hello from %s rejected: Protocol %s '%s' " + "not recognized", CRM_XS " ref=%s uuid=%s", + client_name, field, version, (ref? ref : "none"), uuid); + return false; + } } - - *client_name = strdup(local_client_name); - *major_version = strdup(local_major_version); - *minor_version = strdup(local_minor_version); - - crm_trace("Hello message ok"); - return TRUE; + return true; } -gboolean -crmd_authorize_message(xmlNode *client_msg, pcmk__client_t *curr_client, - const char *proxy_session) +/*! + * \internal + * \brief Check whether a client IPC message is acceptable + * + * If a given client IPC message is a hello, "authorize" it by ensuring it has + * valid information such as a protocol version, and return false indicating + * that nothing further needs to be done with the message. If the message is not + * a hello, just return true to indicate it needs further processing. + * + * \param[in] client_msg XML of IPC message + * \param[in] curr_client If IPC is not proxied, client that sent message + * \param[in] proxy_session If IPC is proxied, the session ID + * + * \return true if message needs further processing, false if it doesn't + */ +bool +controld_authorize_ipc_message(xmlNode *client_msg, pcmk__client_t *curr_client, + const char *proxy_session) { - char *client_name = NULL; - char *major_version = NULL; - char *minor_version = NULL; - gboolean auth_result = FALSE; - - xmlNode *xml = NULL; + xmlNode *message_data = NULL; + const char *client_name = NULL; const char *op = crm_element_value(client_msg, F_CRM_TASK); - const char *uuid = curr_client ? curr_client->id : proxy_session; + const char *ref = crm_element_value(client_msg, XML_ATTR_REFERENCE); + const char *uuid = (curr_client? curr_client->id : proxy_session); if (uuid == NULL) { - crm_warn("Message [%s] not authorized", crm_element_value(client_msg, XML_ATTR_REFERENCE)); - return FALSE; - - } else if (safe_str_neq(CRM_OP_HELLO, op)) { - return TRUE; + crm_warn("IPC message from client rejected: No client identifier " + CRM_XS " ref=%s", (ref? ref : "none")); + goto rejected; } - xml = get_message_xml(client_msg, F_CRM_DATA); - auth_result = process_hello_message(xml, &client_name, &major_version, &minor_version); - - if (auth_result == TRUE) { - if (client_name == NULL) { - crm_err("Bad client details (client_name=%s, uuid=%s)", - crm_str(client_name), uuid); - auth_result = FALSE; - } + if (safe_str_neq(CRM_OP_HELLO, op)) { + // Only hello messages need to be authorized + return true; } - if (auth_result == TRUE) { - /* check version */ - int mav = atoi(major_version); - int miv = atoi(minor_version); + message_data = get_message_xml(client_msg, F_CRM_DATA); - crm_trace("Checking client version number"); - if (mav < 0 || miv < 0) { - crm_err("Client version (%d:%d) is not acceptable", mav, miv); - auth_result = FALSE; - } + client_name = crm_element_value(message_data, "client_name"); + if ((client_name == NULL) || (client_name[0] == '\0')) { + crm_warn("IPC hello from client rejected: No client name", + CRM_XS " ref=%s uuid=%s", (ref? ref : "none"), uuid); + goto rejected; } - - if (auth_result == TRUE) { - crm_trace("Accepted client %s", client_name); - if (curr_client) { - curr_client->userdata = strdup(client_name); - } - - crm_trace("Triggering FSA: %s", __FUNCTION__); - mainloop_set_trigger(fsa_source); - - } else { - crm_warn("Rejected client logon request"); - if (curr_client) { - qb_ipcs_disconnect(curr_client->ipcs); - } + if (!authorize_version(message_data, "major_version", client_name, ref, + uuid)) { + goto rejected; + } + if (!authorize_version(message_data, "minor_version", client_name, ref, + uuid)) { + goto rejected; } - free(minor_version); - free(major_version); - free(client_name); + crm_trace("Validated IPC hello from client %s", client_name); + if (curr_client) { + curr_client->userdata = strdup(client_name); + } + mainloop_set_trigger(fsa_source); + return false; - /* hello messages should never be processed further */ - return FALSE; +rejected: + if (curr_client) { + qb_ipcs_disconnect(curr_client->ipcs); + } + return false; } static enum crmd_fsa_input diff --git a/daemons/controld/controld_messages.h b/daemons/controld/controld_messages.h index a733b8d5b8c..cd71c0313fd 100644 --- a/daemons/controld/controld_messages.h +++ b/daemons/controld/controld_messages.h @@ -80,9 +80,9 @@ extern gboolean send_msg_via_ipc(xmlNode * msg, const char *sys); gboolean crmd_is_proxy_session(const char *session); void crmd_proxy_send(const char *session, xmlNode *msg); -gboolean crmd_authorize_message(xmlNode *client_msg, - pcmk__client_t *curr_client, - const char *proxy_session); +bool controld_authorize_ipc_message(xmlNode *client_msg, + pcmk__client_t *curr_client, + const char *proxy_session); extern gboolean send_request(xmlNode * msg, char **msg_reference); From 7072ccc75ec7a395bea0aa6b65782825e3137713 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 21 Feb 2020 16:35:39 -0600 Subject: [PATCH 06/10] Log: daemons: don't log unnecessary trace after accepting IPC client For all of the daemons except the executor, the IPC post-connection handler did nothing but log a trace message with the libqb client pointer address. This is unnecessary since the daemons log a trace when first accepting the connection, and libqb logs another when creating the client. --- daemons/attrd/attrd_utils.c | 14 +------------- daemons/based/based_callbacks.c | 10 ++-------- daemons/controld/controld_control.c | 8 +------- daemons/execd/remoted_proxy.c | 16 +++++----------- daemons/fenced/pacemaker-fenced.c | 8 +------- daemons/pacemakerd/pacemakerd.c | 8 +------- daemons/schedulerd/pacemaker-schedulerd.c | 8 +------- maint/mocked/based.c | 9 +-------- 8 files changed, 13 insertions(+), 68 deletions(-) diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c index 6fc16bedce0..8bd3cd5d2aa 100644 --- a/daemons/attrd/attrd_utils.c +++ b/daemons/attrd/attrd_utils.c @@ -116,18 +116,6 @@ attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) return pcmk_ok; } -/*! - * \internal - * \brief Callback for successful client connection - * - * \param[in] c New connection - */ -static void -attrd_ipc_created(qb_ipcs_connection_t *c) -{ - crm_trace("Client connection %p accepted", c); -} - /*! * \internal * \brief Destroy a client IPC connection @@ -179,7 +167,7 @@ attrd_init_ipc(qb_ipcs_service_t **ipcs, qb_ipcs_msg_process_fn dispatch_fn) static struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = attrd_ipc_accept, - .connection_created = attrd_ipc_created, + .connection_created = NULL, .msg_process = NULL, .connection_closed = attrd_ipc_closed, .connection_destroyed = attrd_ipc_destroy diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c index 200bce902aa..87ed31c9b46 100644 --- a/daemons/based/based_callbacks.c +++ b/daemons/based/based_callbacks.c @@ -81,12 +81,6 @@ cib_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) return 0; } -static void -cib_ipc_created(qb_ipcs_connection_t * c) -{ - crm_trace("Connection %p", c); -} - static int32_t cib_ipc_dispatch_rw(qb_ipcs_connection_t * c, void *data, size_t size) { @@ -131,7 +125,7 @@ cib_ipc_destroy(qb_ipcs_connection_t * c) struct qb_ipcs_service_handlers ipc_ro_callbacks = { .connection_accept = cib_ipc_accept, - .connection_created = cib_ipc_created, + .connection_created = NULL, .msg_process = cib_ipc_dispatch_ro, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy @@ -139,7 +133,7 @@ struct qb_ipcs_service_handlers ipc_ro_callbacks = { struct qb_ipcs_service_handlers ipc_rw_callbacks = { .connection_accept = cib_ipc_accept, - .connection_created = cib_ipc_created, + .connection_created = NULL, .msg_process = cib_ipc_dispatch_rw, .connection_closed = cib_ipc_closed, .connection_destroyed = cib_ipc_destroy diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index 83a27deb7d0..1a1c8c44bfa 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -364,12 +364,6 @@ crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) return 0; } -static void -crmd_ipc_created(qb_ipcs_connection_t * c) -{ - crm_trace("Connection %p", c); -} - static int32_t crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) { @@ -446,7 +440,7 @@ do_started(long long action, { static struct qb_ipcs_service_handlers crmd_callbacks = { .connection_accept = crmd_ipc_accept, - .connection_created = crmd_ipc_created, + .connection_created = NULL, .msg_process = crmd_ipc_dispatch, .connection_closed = crmd_ipc_closed, .connection_destroyed = crmd_ipc_destroy diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c index bfe0202999f..dda111e2420 100644 --- a/daemons/execd/remoted_proxy.c +++ b/daemons/execd/remoted_proxy.c @@ -132,12 +132,6 @@ cib_proxy_accept_ro(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) return ipc_proxy_accept(c, uid, gid, CIB_CHANNEL_RO); } -static void -ipc_proxy_created(qb_ipcs_connection_t * c) -{ - crm_trace("Connection %p", c); -} - void ipc_proxy_forward_client(pcmk__client_t *ipc_proxy, xmlNode *xml) { @@ -335,7 +329,7 @@ ipc_proxy_destroy(qb_ipcs_connection_t * c) static struct qb_ipcs_service_handlers crmd_proxy_callbacks = { .connection_accept = crmd_proxy_accept, - .connection_created = ipc_proxy_created, + .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy @@ -343,7 +337,7 @@ static struct qb_ipcs_service_handlers crmd_proxy_callbacks = { static struct qb_ipcs_service_handlers attrd_proxy_callbacks = { .connection_accept = attrd_proxy_accept, - .connection_created = ipc_proxy_created, + .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy @@ -351,7 +345,7 @@ static struct qb_ipcs_service_handlers attrd_proxy_callbacks = { static struct qb_ipcs_service_handlers stonith_proxy_callbacks = { .connection_accept = stonith_proxy_accept, - .connection_created = ipc_proxy_created, + .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy @@ -359,7 +353,7 @@ static struct qb_ipcs_service_handlers stonith_proxy_callbacks = { static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = { .connection_accept = cib_proxy_accept_ro, - .connection_created = ipc_proxy_created, + .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy @@ -367,7 +361,7 @@ static struct qb_ipcs_service_handlers cib_proxy_callbacks_ro = { static struct qb_ipcs_service_handlers cib_proxy_callbacks_rw = { .connection_accept = cib_proxy_accept_rw, - .connection_created = ipc_proxy_created, + .connection_created = NULL, .msg_process = ipc_proxy_dispatch, .connection_closed = ipc_proxy_closed, .connection_destroyed = ipc_proxy_destroy diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index bff2efd0822..5274f599a6d 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -74,12 +74,6 @@ st_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) return 0; } -static void -st_ipc_created(qb_ipcs_connection_t * c) -{ - crm_trace("Connection created for %p", c); -} - /* Exit code means? */ static int32_t st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) @@ -1227,7 +1221,7 @@ setup_cib(void) struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = st_ipc_accept, - .connection_created = st_ipc_created, + .connection_created = NULL, .msg_process = st_ipc_dispatch, .connection_closed = st_ipc_closed, .connection_destroyed = st_ipc_destroy diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c index 642f5c4f1c7..70c044b9903 100644 --- a/daemons/pacemakerd/pacemakerd.c +++ b/daemons/pacemakerd/pacemakerd.c @@ -549,12 +549,6 @@ pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) return 0; } -static void -pcmk_ipc_created(qb_ipcs_connection_t * c) -{ - crm_trace("Connection %p", c); -} - /* Exit code means? */ static int32_t pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) @@ -623,7 +617,7 @@ pcmk_ipc_destroy(qb_ipcs_connection_t * c) struct qb_ipcs_service_handlers mcp_ipc_callbacks = { .connection_accept = pcmk_ipc_accept, - .connection_created = pcmk_ipc_created, + .connection_created = NULL, .msg_process = pcmk_ipc_dispatch, .connection_closed = pcmk_ipc_closed, .connection_destroyed = pcmk_ipc_destroy diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c index 88a17b2fd36..e4cd9a4a7ff 100644 --- a/daemons/schedulerd/pacemaker-schedulerd.c +++ b/daemons/schedulerd/pacemaker-schedulerd.c @@ -214,12 +214,6 @@ pe_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) return 0; } -static void -pe_ipc_created(qb_ipcs_connection_t * c) -{ - crm_trace("Connection %p", c); -} - gboolean process_pe_message(xmlNode *msg, xmlNode *xml_data, pcmk__client_t *sender); @@ -264,7 +258,7 @@ pe_ipc_destroy(qb_ipcs_connection_t * c) struct qb_ipcs_service_handlers ipc_callbacks = { .connection_accept = pe_ipc_accept, - .connection_created = pe_ipc_created, + .connection_created = NULL, .msg_process = pe_ipc_dispatch, .connection_closed = pe_ipc_closed, .connection_destroyed = pe_ipc_destroy diff --git a/maint/mocked/based.c b/maint/mocked/based.c index 59cc0d94e6b..fa5797f0f5d 100644 --- a/maint/mocked/based.c +++ b/maint/mocked/based.c @@ -57,13 +57,6 @@ mock_based_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) return ret; } -/* see based/based_callbacks.c:cib_ipc_created */ -static void -mock_based_ipc_created(qb_ipcs_connection_t *c) -{ - crm_trace("Connection %p", c); -} - /* see based/based_callbacks.c:cib_ipc_closed */ static int32_t mock_based_ipc_closed(qb_ipcs_connection_t *c) @@ -313,7 +306,7 @@ int main(int argc, char *argv[]) if (mock_based_options(ctxt, false, argc, (const char **) argv) > 0) { struct qb_ipcs_service_handlers cib_ipc_callbacks = { .connection_accept = mock_based_ipc_accept, - .connection_created = mock_based_ipc_created, + .connection_created = NULL, .msg_process = mock_based_dispatch_command, .connection_closed = mock_based_ipc_closed, .connection_destroyed = mock_based_ipc_destroy, From afdecca058f83f1dcaef3495fc623e39f3b20ae7 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 21 Feb 2020 16:47:11 -0600 Subject: [PATCH 07/10] Log: controller: improve messages related to incoming IPC --- daemons/controld/controld_control.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index 1a1c8c44bfa..6da517a94d1 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -354,18 +354,20 @@ do_startup(long long action, } } +// \return libqb error code (0 on success, -errno on error) static int32_t -crmd_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) +accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid) { - crm_trace("Connection %p", c); + crm_trace("Accepting new IPC client connection"); if (pcmk__new_client(c, uid, gid) == NULL) { return -EIO; } return 0; } +// \return libqb error code (0 on success, -errno on error) static int32_t -crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) +dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size) { uint32_t id = 0; uint32_t flags = 0; @@ -373,9 +375,7 @@ crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) xmlNode *msg = pcmk__client_data2xml(client, data, size, &id, &flags); - crm_trace("Invoked: %s", pcmk__client_name(client)); pcmk__ipc_send_ack(client, id, flags, "ack"); - if (msg == NULL) { return 0; } @@ -385,11 +385,10 @@ crmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) crm_acl_get_set_user(msg, F_CRM_USER, client->user); #endif - crm_trace("Processing IPC message from %s", pcmk__client_name(client)); crm_log_xml_trace(msg, "controller[inbound]"); - crm_xml_add(msg, F_CRM_SYS_FROM, client->id); if (controld_authorize_ipc_message(msg, client, NULL)) { + crm_trace("Processing IPC message from %s", pcmk__client_name(client)); route_message(C_IPC_MESSAGE, msg); } @@ -439,9 +438,9 @@ do_started(long long action, enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data) { static struct qb_ipcs_service_handlers crmd_callbacks = { - .connection_accept = crmd_ipc_accept, + .connection_accept = accept_controller_client, .connection_created = NULL, - .msg_process = crmd_ipc_dispatch, + .msg_process = dispatch_controller_ipc, .connection_closed = crmd_ipc_closed, .connection_destroyed = crmd_ipc_destroy }; From fcc6714b65725e66ebcc38a4c4e3d1a12b2d00ce Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 21 Feb 2020 16:48:43 -0600 Subject: [PATCH 08/10] Log: libcrmcommon: improve new IPC client messages --- lib/common/ipc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/common/ipc.c b/lib/common/ipc.c index ca4d538ecb1..039da410508 100644 --- a/lib/common/ipc.c +++ b/lib/common/ipc.c @@ -425,7 +425,7 @@ pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid_client, gid_t gid_client) } if (uid_client != 0) { - crm_trace("Giving access to group %u", gid_cluster); + crm_trace("Giving group %u access to new IPC connection", gid_cluster); /* Passing -1 to chown(2) means don't change */ qb_ipcs_connection_auth_set(c, -1, gid_cluster, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); } @@ -441,8 +441,8 @@ pcmk__new_client(qb_ipcs_connection_t *c, uid_t uid_client, gid_t gid_client) set_bit(client->flags, pcmk__client_privileged); } - crm_debug("Connecting %p for uid=%d gid=%d pid=%u id=%s", c, uid_client, gid_client, client->pid, client->id); - + crm_debug("New IPC client %s for PID %u with uid %d and gid %d", + client->id, client->pid, uid_client, gid_client); return client; } From 6b779b544c35563ecb9e1eb5049fde4535090545 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 21 Feb 2020 17:30:16 -0600 Subject: [PATCH 09/10] Log: daemons: trace incoming IPC when parsing it Previously, some callers of pcmk__client_data2xml() trace-logged the resulting XML, and some didn't. Moving the log into the function itself reduces duplication and ensures the trace is always done (at the minor cost of not being able to trace just one daemon's incoming IPC). Also remove an unused argument from the function. --- daemons/attrd/pacemaker-attrd.c | 4 +--- daemons/based/based_callbacks.c | 5 +---- daemons/controld/controld_control.c | 3 +-- daemons/execd/execd_commands.c | 3 +-- daemons/execd/pacemaker-execd.c | 2 +- daemons/execd/remoted_proxy.c | 2 +- daemons/fenced/fenced_commands.c | 4 ++-- daemons/fenced/pacemaker-fenced.c | 3 +-- daemons/pacemakerd/pacemakerd.c | 2 +- daemons/schedulerd/pacemaker-schedulerd.c | 2 +- include/crm/common/ipcs_internal.h | 2 +- lib/common/ipc.c | 15 +++++++++++++-- maint/mocked/based.c | 3 +-- 13 files changed, 26 insertions(+), 24 deletions(-) diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c index eb6a63602af..4318e194a27 100644 --- a/daemons/attrd/pacemaker-attrd.c +++ b/daemons/attrd/pacemaker-attrd.c @@ -250,7 +250,7 @@ attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) crm_debug("No IPC data from PID %d", pcmk__client_pid(c)); return 0; } - xml = pcmk__client_data2xml(client, data, size, &id, &flags); + xml = pcmk__client_data2xml(client, data, &id, &flags); if (xml == NULL) { crm_debug("Unrecognizable IPC data from PID %d", pcmk__client_pid(c)); return 0; @@ -261,8 +261,6 @@ attrd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) crm_acl_get_set_user(xml, F_ATTRD_USER, client->user); #endif - crm_log_xml_trace(xml, __FUNCTION__); - op = crm_element_value(xml, F_ATTRD_TASK); if (client->name == NULL) { diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c index 87ed31c9b46..ba5482fd412 100644 --- a/daemons/based/based_callbacks.c +++ b/daemons/based/based_callbacks.c @@ -207,8 +207,7 @@ cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean uint32_t flags = 0; int call_options = 0; pcmk__client_t *cib_client = pcmk__find_client(c); - xmlNode *op_request = pcmk__client_data2xml(cib_client, data, size, &id, - &flags); + xmlNode *op_request = pcmk__client_data2xml(cib_client, data, &id, &flags); if (op_request) { crm_element_value_int(op_request, F_CIB_CALLOPTS, &call_options); @@ -261,8 +260,6 @@ cib_common_callback(qb_ipcs_connection_t * c, void *data, size_t size, gboolean crm_acl_get_set_user(op_request, F_CIB_USER, cib_client->user); #endif - crm_log_xml_trace(op_request, "Client[inbound]"); - cib_common_callback_worker(id, flags, op_request, cib_client, privileged); free_xml(op_request); diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c index 6da517a94d1..32cd6c9cde3 100644 --- a/daemons/controld/controld_control.c +++ b/daemons/controld/controld_control.c @@ -373,7 +373,7 @@ dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size) uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); - xmlNode *msg = pcmk__client_data2xml(client, data, size, &id, &flags); + xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags); pcmk__ipc_send_ack(client, id, flags, "ack"); if (msg == NULL) { @@ -385,7 +385,6 @@ dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size) crm_acl_get_set_user(msg, F_CRM_USER, client->user); #endif - crm_log_xml_trace(msg, "controller[inbound]"); crm_xml_add(msg, F_CRM_SYS_FROM, client->id); if (controld_authorize_ipc_message(msg, client, NULL)) { crm_trace("Processing IPC message from %s", pcmk__client_name(client)); diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c index 6471983d12c..6058ec77dec 100644 --- a/daemons/execd/execd_commands.c +++ b/daemons/execd/execd_commands.c @@ -1841,8 +1841,7 @@ process_lrmd_message(pcmk__client_t *client, uint32_t id, xmlNode *request) } else { rc = -EOPNOTSUPP; do_reply = 1; - crm_err("Unknown %s from %s", op, client->name); - crm_log_xml_warn(request, "UnknownOp"); + crm_err("Unknown IPC request '%s' from %s", op, client->name); } crm_debug("Processed %s operation from %s: rc=%d, reply=%d, notify=%d", diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c index f63fbfefe16..e74777a66d2 100644 --- a/daemons/execd/pacemaker-execd.c +++ b/daemons/execd/pacemaker-execd.c @@ -111,7 +111,7 @@ lrmd_ipc_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *client = pcmk__find_client(c); - xmlNode *request = pcmk__client_data2xml(client, data, size, &id, &flags); + xmlNode *request = pcmk__client_data2xml(client, data, &id, &flags); CRM_CHECK(client != NULL, crm_err("Invalid client"); return FALSE); diff --git a/daemons/execd/remoted_proxy.c b/daemons/execd/remoted_proxy.c index dda111e2420..e6da1914846 100644 --- a/daemons/execd/remoted_proxy.c +++ b/daemons/execd/remoted_proxy.c @@ -234,7 +234,7 @@ ipc_proxy_dispatch(qb_ipcs_connection_t * c, void *data, size_t size) * This function is receiving a request from connection * 1 and forwarding it to connection 2. */ - request = pcmk__client_data2xml(client, data, size, &id, &flags); + request = pcmk__client_data2xml(client, data, &id, &flags); if (!request) { return 0; diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c index b5b0e82ac38..d92dd9a1e0a 100644 --- a/daemons/fenced/fenced_commands.c +++ b/daemons/fenced/fenced_commands.c @@ -2641,8 +2641,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, return pcmk_ok; } else { - crm_err("Unknown %s from %s", op, client ? client->name : remote_peer); - crm_log_xml_warn(request, "UnknownOp"); + crm_err("Unknown IPC request %s from %s", + op, (client? client->name : remote_peer)); } done: diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c index 5274f599a6d..1d461fc5d9e 100644 --- a/daemons/fenced/pacemaker-fenced.c +++ b/daemons/fenced/pacemaker-fenced.c @@ -90,7 +90,7 @@ st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) return 0; } - request = pcmk__client_data2xml(c, data, size, &id, &flags); + request = pcmk__client_data2xml(c, data, &id, &flags); if (request == NULL) { pcmk__ipc_send_ack(c, id, flags, "nack"); return 0; @@ -133,7 +133,6 @@ st_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) crm_xml_add(request, F_STONITH_CLIENTNAME, pcmk__client_name(c)); crm_xml_add(request, F_STONITH_CLIENTNODE, stonith_our_uname); - crm_log_xml_trace(request, "Client[inbound]"); stonith_command(c, id, flags, request, NULL); free_xml(request); diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c index 70c044b9903..2c1e44982b8 100644 --- a/daemons/pacemakerd/pacemakerd.c +++ b/daemons/pacemakerd/pacemakerd.c @@ -557,7 +557,7 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) uint32_t flags = 0; const char *task = NULL; pcmk__client_t *c = pcmk__find_client(qbc); - xmlNode *msg = pcmk__client_data2xml(c, data, size, &id, &flags); + xmlNode *msg = pcmk__client_data2xml(c, data, &id, &flags); pcmk__ipc_send_ack(c, id, flags, "ack"); if (msg == NULL) { diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c index e4cd9a4a7ff..92f5eaa0b59 100644 --- a/daemons/schedulerd/pacemaker-schedulerd.c +++ b/daemons/schedulerd/pacemaker-schedulerd.c @@ -223,7 +223,7 @@ pe_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) uint32_t id = 0; uint32_t flags = 0; pcmk__client_t *c = pcmk__find_client(qbc); - xmlNode *msg = pcmk__client_data2xml(c, data, size, &id, &flags); + xmlNode *msg = pcmk__client_data2xml(c, data, &id, &flags); pcmk__ipc_send_ack(c, id, flags, "ack"); if (msg != NULL) { diff --git a/include/crm/common/ipcs_internal.h b/include/crm/common/ipcs_internal.h index 2db19c1e9ef..ff148b46859 100644 --- a/include/crm/common/ipcs_internal.h +++ b/include/crm/common/ipcs_internal.h @@ -121,7 +121,7 @@ int pcmk__ipc_prepare_iov(uint32_t request, xmlNode *message, int pcmk__ipc_send_xml(pcmk__client_t *c, uint32_t request, xmlNode *message, uint32_t flags); int pcmk__ipc_send_iov(pcmk__client_t *c, struct iovec *iov, uint32_t flags); -xmlNode *pcmk__client_data2xml(pcmk__client_t *c, void *data, size_t size, +xmlNode *pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags); int pcmk__client_pid(qb_ipcs_connection_t *c); diff --git a/lib/common/ipc.c b/lib/common/ipc.c index 039da410508..0a859e6a76b 100644 --- a/lib/common/ipc.c +++ b/lib/common/ipc.c @@ -562,8 +562,19 @@ pcmk__client_pid(qb_ipcs_connection_t *c) return stats.client_pid; } +/*! + * \internal + * \brief Retrieve message XML from data read from client IPC + * + * \param[in] c IPC client connection + * \param[in] data Data read from client connection + * \param[out] id Where to store message ID from libqb header + * \param[out] flags Where to store flags from libqb header + * + * \return Message XML on success, NULL otherwise + */ xmlNode * -pcmk__client_data2xml(pcmk__client_t *c, void *data, size_t size, uint32_t *id, +pcmk__client_data2xml(pcmk__client_t *c, void *data, uint32_t *id, uint32_t *flags) { xmlNode *xml = NULL; @@ -613,8 +624,8 @@ pcmk__client_data2xml(pcmk__client_t *c, void *data, size_t size, uint32_t *id, CRM_ASSERT(text[header->size_uncompressed - 1] == 0); - crm_trace("Received %.200s", text); xml = string2xml(text); + crm_log_xml_trace(xml, "[IPC received]"); free(uncompressed); return xml; diff --git a/maint/mocked/based.c b/maint/mocked/based.c index fa5797f0f5d..ab9290cdc87 100644 --- a/maint/mocked/based.c +++ b/maint/mocked/based.c @@ -182,8 +182,7 @@ mock_based_dispatch_command(qb_ipcs_connection_t *c, void *data, size_t size) uint32_t id = 0, flags = 0; int call_options = 0; pcmk__client_t *cib_client = pcmk__find_client(c); - xmlNode *op_request = pcmk__client_data2xml(cib_client, data, size, &id, - &flags); + xmlNode *op_request = pcmk__client_data2xml(cib_client, data, &id, &flags); crm_notice("Got connection %p", c); assert(op_request != NULL); From 0899a165cd12185a32c9c30884e5775ceb9bdc29 Mon Sep 17 00:00:00 2001 From: Ken Gaillot Date: Fri, 21 Feb 2020 18:05:51 -0600 Subject: [PATCH 10/10] Log: controller: improve message routing messages --- daemons/controld/controld_execd.c | 35 +++++----- daemons/controld/controld_messages.c | 99 ++++++++++------------------ daemons/controld/controld_messages.h | 2 - 3 files changed, 55 insertions(+), 81 deletions(-) diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c index 63b4ec74e07..0656457b704 100644 --- a/daemons/controld/controld_execd.c +++ b/daemons/controld/controld_execd.c @@ -954,14 +954,13 @@ delete_rsc_entry(lrm_state_t * lrm_state, ha_msg_input_t * input, const char *rs if (rc == pcmk_ok) { char *rsc_id_copy = strdup(rsc_id); - if (rsc_gIter) + if (rsc_gIter) { g_hash_table_iter_remove(rsc_gIter); - else + } else { g_hash_table_remove(lrm_state->resource_history, rsc_id_copy); - crm_debug("sync: Sending delete op for %s", rsc_id_copy); + } controld_delete_resource_history(rsc_id_copy, lrm_state->node_name, user_name, crmd_cib_smart_opt()); - g_hash_table_foreach_remove(lrm_state->pending_ops, lrm_remove_deleted_op, rsc_id_copy); free(rsc_id_copy); } @@ -1311,23 +1310,23 @@ delete_resource(lrm_state_t * lrm_state, lrmd_rsc_info_t * rsc, GHashTableIter * gIter, const char *sys, - const char *host, const char *user, ha_msg_input_t * request, gboolean unregister) { int rc = pcmk_ok; - crm_info("Removing resource %s for %s (%s) on %s", id, sys, user ? user : "internal", host); + crm_info("Removing resource %s from executor for %s%s%s", + id, sys, (user? " as " : ""), (user? user : "")); if (rsc && unregister) { rc = lrm_state_unregister_rsc(lrm_state, id, 0); } if (rc == pcmk_ok) { - crm_trace("Resource '%s' deleted", id); + crm_trace("Resource %s deleted from executor", id); } else if (rc == -EINPROGRESS) { - crm_info("Deletion of resource '%s' pending", id); + crm_info("Deletion of resource '%s' from executor is pending", id); if (request) { struct pending_deletion_op_s *op = NULL; char *ref = crm_element_value_copy(request->msg, XML_ATTR_REFERENCE); @@ -1339,8 +1338,9 @@ delete_resource(lrm_state_t * lrm_state, } return; } else { - crm_warn("Deletion of resource '%s' for %s (%s) on %s failed: %d", - id, sys, user ? user : "internal", host, rc); + crm_warn("Could not delete '%s' from executor for %s%s%s: %s " + CRM_XS " rc=%d", id, sys, (user? " as " : ""), + (user? user : ""), pcmk_strerror(rc), rc); } delete_rsc_entry(lrm_state, request, id, gIter, rc, user); @@ -1406,7 +1406,7 @@ force_reprobe(lrm_state_t *lrm_state, const char *from_sys, unregister = FALSE; } - delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, from_host, + delete_resource(lrm_state, entry->id, &entry->rsc, &gIter, from_sys, user_name, NULL, unregister); } @@ -1723,7 +1723,7 @@ do_lrm_delete(ha_msg_input_t *input, lrm_state_t *lrm_state, unregister = FALSE; } - delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, from_host, + delete_resource(lrm_state, rsc->id, rsc, NULL, from_sys, user_name, input, unregister); } @@ -1760,7 +1760,6 @@ do_lrm_invoke(long long action, #if ENABLE_ACL user_name = crm_acl_get_set_user(input->msg, F_CRM_USER, NULL); - crm_trace("Executor command from user '%s'", user_name); #endif crm_op = crm_element_value(input->msg, F_CRM_TASK); @@ -1768,7 +1767,13 @@ do_lrm_invoke(long long action, if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { from_host = crm_element_value(input->msg, F_CRM_HOST_FROM); } - crm_trace("Executor %s command from %s", crm_op, from_sys); +#if ENABLE_ACL + crm_trace("Executor %s command from %s as user %s", + crm_op, from_sys, user_name); +#else + crm_trace("Executor %s command from %s", + crm_op, from_sys); +#endif if (safe_str_eq(crm_op, CRM_OP_LRM_DELETE)) { if (safe_str_neq(from_sys, CRM_SYSTEM_TENGINE)) { @@ -2073,7 +2078,7 @@ controld_ack_event_directly(const char *to_host, const char *to_sys, build_operation_update(iter, rsc, op, fsa_our_uname, __FUNCTION__); reply = create_request(CRM_OP_INVOKE_LRM, update, to_host, to_sys, CRM_SYSTEM_LRMD, NULL); - crm_log_xml_trace(update, "ACK Update"); + crm_log_xml_trace(update, "[direct ACK]"); crm_debug("ACK'ing resource op " CRM_OP_FMT " from %s: %s", op->rsc_id, op->op_type, op->interval_ms, op->user_data, diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c index 1065a48f4e4..0a7b079236a 100644 --- a/daemons/controld/controld_messages.c +++ b/daemons/controld/controld_messages.c @@ -31,8 +31,7 @@ static void handle_response(xmlNode *stored_msg); static enum crmd_fsa_input handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause); static enum crmd_fsa_input handle_shutdown_request(xmlNode *stored_msg); - -#define ROUTER_RESULT(x) crm_trace("Router result: %s", x) +static void send_msg_via_ipc(xmlNode * msg, const char *sys); /* debug only, can wrap all it likes */ int last_data_id = 0; @@ -97,9 +96,10 @@ register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, } last_data_id++; - crm_trace("%s %s FSA input %d (%s) (cause=%s) %s data", - raised_from, prepend ? "prepended" : "appended", last_data_id, - fsa_input2string(input), fsa_cause2string(cause), data ? "with" : "without"); + crm_trace("%s %s FSA input %d (%s) due to %s, %s data", + raised_from, (prepend? "prepended" : "appended"), last_data_id, + fsa_input2string(input), fsa_cause2string(cause), + (data? "with" : "without")); fsa_data = calloc(1, sizeof(fsa_data_t)); fsa_data->id = last_data_id; @@ -120,10 +120,10 @@ register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, case C_CRMD_STATUS_CALLBACK: case C_IPC_MESSAGE: case C_HA_MESSAGE: - crm_trace("Copying %s data from %s as a HA msg", - fsa_cause2string(cause), raised_from); CRM_CHECK(((ha_msg_input_t *) data)->msg != NULL, crm_err("Bogus data from %s", raised_from)); + crm_trace("Copying %s data from %s as cluster message data", + fsa_cause2string(cause), raised_from); fsa_data->data = copy_ha_msg_input(data); fsa_data->data_type = fsa_dt_ha_msg; break; @@ -139,23 +139,22 @@ register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, case C_SHUTDOWN: case C_UNKNOWN: case C_STARTUP: - crm_err("Copying %s data (from %s)" - " not yet implemented", fsa_cause2string(cause), raised_from); + crm_crit("Copying %s data (from %s) is not yet implemented", + fsa_cause2string(cause), raised_from); crmd_exit(CRM_EX_SOFTWARE); break; } - crm_trace("%s data copied", fsa_cause2string(fsa_data->fsa_cause)); } /* make sure to free it properly later */ if (prepend) { - crm_trace("Prepending input"); fsa_message_queue = g_list_prepend(fsa_message_queue, fsa_data); } else { fsa_message_queue = g_list_append(fsa_message_queue, fsa_data); } - crm_trace("Queue len: %d", g_list_length(fsa_message_queue)); + crm_trace("FSA message queue length is %d", + g_list_length(fsa_message_queue)); /* fsa_dump_queue(LOG_TRACE); */ @@ -164,7 +163,7 @@ register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, } if (fsa_source && input != I_WAIT_FOR_EVENT) { - crm_trace("Triggering FSA: %s", __FUNCTION__); + crm_trace("Triggering FSA"); mainloop_set_trigger(fsa_source); } return last_data_id; @@ -190,20 +189,11 @@ fsa_dump_queue(int log_level) ha_msg_input_t * copy_ha_msg_input(ha_msg_input_t * orig) { - ha_msg_input_t *copy = NULL; - xmlNodePtr data = NULL; - - if (orig != NULL) { - crm_trace("Copy msg"); - data = copy_xml(orig->msg); + ha_msg_input_t *copy = calloc(1, sizeof(ha_msg_input_t)); - } else { - crm_trace("No message to copy"); - } - copy = new_ha_msg_input(data); - if (orig && orig->msg != NULL) { - CRM_CHECK(copy->msg != NULL, crm_err("copy failed")); - } + CRM_ASSERT(copy != NULL); + copy->msg = (orig && orig->msg)? copy_xml(orig->msg) : NULL; + copy->xml = get_message_xml(copy->msg, F_CRM_DATA); return copy; } @@ -418,39 +408,39 @@ relay_message(xmlNode * msg, gboolean originated_locally) if (is_for_dc || is_for_dcib || is_for_te) { if (AM_I_DC && is_for_te) { - ROUTER_RESULT("Message result: Local relay"); + crm_trace("Message routing: Process locally as transition request"); send_msg_via_ipc(msg, sys_to); } else if (AM_I_DC) { - ROUTER_RESULT("Message result: DC/controller process"); + crm_trace("Message routing: Process locally as DC request"); processing_complete = FALSE; /* more to be done by caller */ } else if (originated_locally && safe_str_neq(sys_from, CRM_SYSTEM_PENGINE) && safe_str_neq(sys_from, CRM_SYSTEM_TENGINE)) { - /* Neither the TE nor the scheduler should be sending messages - * to DCs on other nodes. By definition, if we are no longer the DC, - * then the scheduler's or TE's data should be discarded. - */ - #if SUPPORT_COROSYNC if (is_corosync_cluster()) { dest = text2msg_type(sys_to); } #endif - ROUTER_RESULT("Message result: External relay to DC"); + crm_trace("Message routing: Relay to DC"); send_cluster_message(host_to ? crm_get_peer(0, host_to) : NULL, dest, msg, TRUE); } else { - /* discard */ - ROUTER_RESULT("Message result: Discard, not DC"); + /* Neither the TE nor the scheduler should be sending messages + * to DCs on other nodes. By definition, if we are no longer the DC, + * then the scheduler's or TE's data should be discarded. + */ + crm_trace("Message routing: Discard because we are not DC"); } } else if (is_local && (is_for_crm || is_for_cib)) { - ROUTER_RESULT("Message result: controller process"); + crm_trace("Message routing: Process locally as controller request"); processing_complete = FALSE; /* more to be done by caller */ } else if (is_local) { - ROUTER_RESULT("Message result: Local relay"); + crm_trace("Message routing: Local relay to %s", + (sys_to? sys_to : "unknown client")); + crm_log_xml_trace(msg, "[IPC relay]"); send_msg_via_ipc(msg, sys_to); } else { @@ -472,9 +462,11 @@ relay_message(xmlNode * msg, gboolean originated_locally) crm_err("Cannot route message to unknown node %s", host_to); return TRUE; } + crm_trace("Message routing: Relay to %s", + (node_to->uname? node_to->uname : "peer")); + } else { + crm_trace("Message routing: Relay to all peers"); } - - ROUTER_RESULT("Message result: External relay"); send_cluster_message(host_to ? node_to : NULL, dest, msg, TRUE); } @@ -877,7 +869,7 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) /* Optimize this for the DC - it has the most to do */ if (op == NULL) { - crm_log_xml_err(stored_msg, "Bad message"); + crm_log_xml_warn(stored_msg, "[request without " F_CRM_TASK "]"); return I_NULL; } @@ -1133,10 +1125,9 @@ handle_shutdown_request(xmlNode * stored_msg) /* msg is deleted by the time this returns */ extern gboolean process_te_message(xmlNode * msg, xmlNode * xml_data); -gboolean +static void send_msg_via_ipc(xmlNode * msg, const char *sys) { - gboolean send_ok = TRUE; pcmk__client_t *client_channel = pcmk__find_client_by_id(sys); if (crm_element_value(msg, F_CRM_HOST_FROM) == NULL) { @@ -1145,10 +1136,7 @@ send_msg_via_ipc(xmlNode * msg, const char *sys) if (client_channel != NULL) { /* Transient clients such as crmadmin */ - if (pcmk__ipc_send_xml(client_channel, 0, msg, - crm_ipc_server_event) != pcmk_rc_ok) { - send_ok = FALSE; - } + pcmk__ipc_send_xml(client_channel, 0, msg, crm_ipc_server_event); } else if (sys != NULL && strcmp(sys, CRM_SYSTEM_TENGINE) == 0) { xmlNode *data = get_message_xml(msg, F_CRM_DATA); @@ -1170,9 +1158,6 @@ send_msg_via_ipc(xmlNode * msg, const char *sys) fsa_data.origin = __FUNCTION__; fsa_data.data_type = fsa_dt_ha_msg; -#ifdef FSA_TRACE - crm_trace("Invoking action A_LRM_INVOKE (%.16llx)", A_LRM_INVOKE); -#endif do_lrm_invoke(A_LRM_INVOKE, C_IPC_MESSAGE, fsa_state, I_MESSAGE, &fsa_data); } else if (sys != NULL && crmd_is_proxy_session(sys)) { @@ -1180,21 +1165,7 @@ send_msg_via_ipc(xmlNode * msg, const char *sys) } else { crm_debug("Unknown Sub-system (%s)... discarding message.", crm_str(sys)); - send_ok = FALSE; } - - return send_ok; -} - -ha_msg_input_t * -new_ha_msg_input(xmlNode * orig) -{ - ha_msg_input_t *input_copy = NULL; - - input_copy = calloc(1, sizeof(ha_msg_input_t)); - input_copy->msg = orig; - input_copy->xml = get_message_xml(input_copy->msg, F_CRM_DATA); - return input_copy; } void diff --git a/daemons/controld/controld_messages.h b/daemons/controld/controld_messages.h index cd71c0313fd..db3ade3e79f 100644 --- a/daemons/controld/controld_messages.h +++ b/daemons/controld/controld_messages.h @@ -75,8 +75,6 @@ fsa_data_t *get_message(void); extern gboolean relay_message(xmlNode * relay_message, gboolean originated_locally); -extern gboolean send_msg_via_ipc(xmlNode * msg, const char *sys); - gboolean crmd_is_proxy_session(const char *session); void crmd_proxy_send(const char *session, xmlNode *msg);