diff --git a/tools/Makefile.am b/tools/Makefile.am index 37498147c2f..d3d08f394a7 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -12,7 +12,7 @@ if BUILD_SYSTEMD systemdsystemunit_DATA = crm_mon.service endif -noinst_HEADERS = crm_mon.h crm_resource.h +noinst_HEADERS = crm_mon.h crm_resource.h crm_resource_controller.h pcmkdir = $(datadir)/$(PACKAGE) pcmk_DATA = report.common report.collector @@ -110,7 +110,11 @@ crm_attribute_LDADD = $(top_builddir)/lib/cluster/libcrmcluster.la \ $(top_builddir)/lib/cib/libcib.la \ $(top_builddir)/lib/common/libcrmcommon.la -crm_resource_SOURCES = crm_resource.c crm_resource_ban.c crm_resource_runtime.c crm_resource_print.c +crm_resource_SOURCES = crm_resource.c \ + crm_resource_ban.c \ + crm_resource_controller.c \ + crm_resource_print.c \ + crm_resource_runtime.c crm_resource_LDADD = $(top_builddir)/lib/pengine/libpe_rules.la \ $(top_builddir)/lib/fencing/libstonithd.la \ $(top_builddir)/lib/lrmd/liblrmd.la \ diff --git a/tools/crm_resource.c b/tools/crm_resource.c index 62a9e755a1d..b7ce90b6c71 100644 --- a/tools/crm_resource.c +++ b/tools/crm_resource.c @@ -43,50 +43,38 @@ resource_ipc_timeout(gpointer data) } static void -resource_ipc_connection_destroy(gpointer user_data) +handle_controller_reply(pcmk_controld_api_t *controld_api, void *api_data, + void *user_data) { - crm_info("Connection to controller was terminated"); - crm_exit(CRM_EX_DISCONNECT); + fprintf(stderr, "."); + if ((controld_api->replies_expected(controld_api) == 0) + && mainloop && g_main_loop_is_running(mainloop)) { + fprintf(stderr, " OK\n"); + crm_debug("Got all the replies we expected"); + crm_exit(CRM_EX_OK); + } } static void -start_mainloop(void) +handle_controller_drop(pcmk_controld_api_t *controld_api, void *api_data, + void *user_data) { - if (crmd_replies_needed == 0) { - return; - } - - mainloop = g_main_loop_new(NULL, FALSE); - fprintf(stderr, "Waiting for %d %s from the controller", - crmd_replies_needed, - pcmk__plural_alt(crmd_replies_needed, "reply", "replies")); - crm_debug("Waiting for %d %s from the controller", - crmd_replies_needed, - pcmk__plural_alt(crmd_replies_needed, "reply", "replies")); - - g_timeout_add(MESSAGE_TIMEOUT_S * 1000, resource_ipc_timeout, NULL); - g_main_loop_run(mainloop); + crm_info("Connection to controller was terminated"); + crm_exit(CRM_EX_DISCONNECT); } -static int -resource_ipc_callback(const char *buffer, ssize_t length, gpointer userdata) +static void +start_mainloop(pcmk_controld_api_t *controld_api) { - xmlNode *msg = string2xml(buffer); - - fprintf(stderr, "."); - crm_log_xml_trace(msg, "[inbound]"); - - crmd_replies_needed--; - if ((crmd_replies_needed == 0) && mainloop - && g_main_loop_is_running(mainloop)) { - - fprintf(stderr, " OK\n"); - crm_debug("Got all the replies we expected"); - crm_exit(CRM_EX_OK); + if (controld_api->replies_expected(controld_api) > 0) { + unsigned int count = controld_api->replies_expected(controld_api); + + fprintf(stderr, "Waiting for %d %s from the controller", + count, pcmk__plural_alt(count, "reply", "replies")); + mainloop = g_main_loop_new(NULL, FALSE); + g_timeout_add(MESSAGE_TIMEOUT_S * 1000, resource_ipc_timeout, NULL); + g_main_loop_run(mainloop); } - - free_xml(msg); - return 0; } static int @@ -115,12 +103,6 @@ build_constraint_list(xmlNode *root) return retval; } -struct ipc_client_callbacks crm_callbacks = { - .dispatch = resource_ipc_callback, - .destroy = resource_ipc_connection_destroy, -}; - - /* short option letters still available: eEJkKXyYZ */ /* *INDENT-OFF* */ @@ -484,13 +466,12 @@ main(int argc, char **argv) GHashTable *override_params = NULL; char *xml_file = NULL; - crm_ipc_t *crmd_channel = NULL; + pcmk_controld_api_t *controld_api = NULL; pe_working_set_t *data_set = NULL; xmlNode *cib_xml_copy = NULL; cib_t *cib_conn = NULL; resource_t *rsc = NULL; bool recursive = FALSE; - char *our_pid = NULL; bool validate_cmdline = FALSE; /* whether we are just validating based on command line options */ bool require_resource = TRUE; /* whether command requires that resource be specified */ @@ -924,8 +905,6 @@ main(int argc, char **argv) crm_exit(CRM_EX_USAGE); } - our_pid = crm_getpid_s(); - if (do_force) { crm_debug("Forcing..."); cib_options |= cib_quorum_override; @@ -990,20 +969,26 @@ main(int argc, char **argv) // Establish a connection to the controller if needed if (require_crmd) { - xmlNode *xml = NULL; - mainloop_io_t *source = - mainloop_add_ipc_client(CRM_SYSTEM_CRMD, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks); - crmd_channel = mainloop_get_ipc_client(source); - - if (crmd_channel == NULL) { - CMD_ERR("Error connecting to the controller"); - rc = -ENOTCONN; + char *client_uuid; + pcmk_controld_api_cb_t dispatch_cb = { + handle_controller_reply, NULL + }; + pcmk_controld_api_cb_t destroy_cb = { + handle_controller_drop, NULL + }; + + + client_uuid = crm_getpid_s(); + controld_api = pcmk_new_controld_api(crm_system_name, client_uuid); + free(client_uuid); + + rc = controld_api->connect(controld_api, true, &dispatch_cb, + &destroy_cb); + if (rc != pcmk_rc_ok) { + CMD_ERR("Error connecting to the controller: %s", pcmk_rc_str(rc)); + rc = pcmk_rc2legacy(rc); goto bail; } - - xml = create_hello_message(our_pid, crm_system_name, "0", "1"); - crm_ipc_send(crmd_channel, xml, 0, 0, NULL); - free_xml(xml); } /* Handle rsc_cmd appropriately */ @@ -1086,10 +1071,11 @@ main(int argc, char **argv) cli_resource_print_cts_constraints(data_set); } else if (rsc_cmd == 'F') { - rc = cli_resource_fail(crmd_channel, host_uname, rsc_id, data_set); - if (rc == pcmk_ok) { - start_mainloop(); + rc = cli_resource_fail(controld_api, host_uname, rsc_id, data_set); + if (rc == pcmk_rc_ok) { + start_mainloop(controld_api); } + rc = pcmk_rc2legacy(rc); } else if (rsc_cmd == 'O') { rc = cli_resource_print_operations(rsc_id, host_uname, TRUE, data_set); @@ -1283,12 +1269,11 @@ main(int argc, char **argv) if (do_force == FALSE) { rsc = uber_parent(rsc); } - crmd_replies_needed = 0; crm_debug("Erasing failures of %s (%s requested) on %s", rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); - rc = cli_resource_delete(crmd_channel, host_uname, rsc, - operation, interval_spec, TRUE, data_set); + rc = cli_resource_delete(controld_api, host_uname, rsc, operation, + interval_spec, TRUE, data_set); if ((rc == pcmk_ok) && !BE_QUIET) { // Show any reasons why resource might stay stopped @@ -1296,26 +1281,25 @@ main(int argc, char **argv) } if (rc == pcmk_ok) { - start_mainloop(); + start_mainloop(controld_api); } } else if (rsc_cmd == 'C') { - rc = cli_cleanup_all(crmd_channel, host_uname, operation, interval_spec, + rc = cli_cleanup_all(controld_api, host_uname, operation, interval_spec, data_set); if (rc == pcmk_ok) { - start_mainloop(); + start_mainloop(controld_api); } } else if ((rsc_cmd == 'R') && rsc) { if (do_force == FALSE) { rsc = uber_parent(rsc); } - crmd_replies_needed = 0; crm_debug("Re-checking the state of %s (%s requested) on %s", rsc->id, rsc_id, (host_uname? host_uname: "all nodes")); - rc = cli_resource_delete(crmd_channel, host_uname, rsc, - NULL, 0, FALSE, data_set); + rc = cli_resource_delete(controld_api, host_uname, rsc, NULL, 0, FALSE, + data_set); if ((rc == pcmk_ok) && !BE_QUIET) { // Show any reasons why resource might stay stopped @@ -1323,13 +1307,11 @@ main(int argc, char **argv) } if (rc == pcmk_ok) { - start_mainloop(); + start_mainloop(controld_api); } } else if (rsc_cmd == 'R') { const char *router_node = host_uname; - xmlNode *msg_data = NULL; - xmlNode *cmd = NULL; int attr_options = pcmk__node_attr_none; if (host_uname) { @@ -1348,35 +1330,24 @@ main(int argc, char **argv) } } - if (crmd_channel == NULL) { + if (controld_api == NULL) { printf("Dry run: skipping clean-up of %s due to CIB_file\n", host_uname? host_uname : "all nodes"); rc = pcmk_ok; goto bail; } - msg_data = create_xml_node(NULL, "crm-resource-reprobe-op"); - crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); - if (safe_str_neq(router_node, host_uname)) { - crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); - } - - cmd = create_request(CRM_OP_REPROBE, msg_data, router_node, - CRM_SYSTEM_CRMD, crm_system_name, our_pid); - free_xml(msg_data); - crm_debug("Re-checking the state of all resources on %s", host_uname?host_uname:"all nodes"); rc = pcmk_rc2legacy(pcmk__node_attr_request_clear(NULL, host_uname, NULL, NULL, NULL, NULL, attr_options)); - if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { - start_mainloop(); + if (controld_api->reprobe(controld_api, host_uname, + router_node) == pcmk_rc_ok) { + start_mainloop(controld_api); } - free_xml(cmd); - } else if (rsc_cmd == 'D') { xmlNode *msg_data = NULL; @@ -1398,12 +1369,14 @@ main(int argc, char **argv) bail: - free(our_pid); pe_free_working_set(data_set); if (cib_conn != NULL) { cib_conn->cmds->signoff(cib_conn); cib_delete(cib_conn); } + if (controld_api != NULL) { + pcmk_free_controld_api(controld_api); + } if (is_ocf_rc) { exit_code = rc; diff --git a/tools/crm_resource.h b/tools/crm_resource.h index 84b094b1b6b..d5bfff35c4f 100644 --- a/tools/crm_resource.h +++ b/tools/crm_resource.h @@ -1,5 +1,5 @@ /* - * Copyright 2004-2019 the Pacemaker project contributors + * Copyright 2004-2020 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -21,6 +21,7 @@ #include #include #include +#include "crm_resource_controller.h" extern bool print_pending; @@ -30,12 +31,13 @@ extern bool BE_QUIET; extern int resource_verbose; extern int cib_options; -extern int crmd_replies_needed; extern char *move_lifetime; extern const char *attr_set_type; +extern pcmk_controld_api_cb_t controld_api_cb; + /* ban */ int cli_resource_prefer(const char *rsc_id, const char *host, cib_t * cib_conn); int cli_resource_ban(const char *rsc_id, const char *host, GListPtr allnodes, cib_t * cib_conn); @@ -61,14 +63,16 @@ int cli_resource_print_operations(const char *rsc_id, const char *host_uname, bo /* runtime */ void cli_resource_check(cib_t * cib, resource_t *rsc); -int cli_resource_fail(crm_ipc_t * crmd_channel, const char *host_uname, const char *rsc_id, pe_working_set_t * data_set); +int cli_resource_fail(pcmk_controld_api_t *controld_api, + const char *host_uname, const char *rsc_id, + pe_working_set_t *data_set); int cli_resource_search(resource_t *rsc, const char *requested_name, pe_working_set_t *data_set); -int cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, - resource_t *rsc, const char *operation, - const char *interval_spec, bool just_failures, - pe_working_set_t *data_set); -int cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, +int cli_resource_delete(pcmk_controld_api_t *controld_api, + const char *host_uname, pe_resource_t *rsc, + const char *operation, const char *interval_spec, + bool just_failures, pe_working_set_t *data_set); +int cli_cleanup_all(pcmk_controld_api_t *controld_api, const char *node_name, const char *operation, const char *interval_spec, pe_working_set_t *data_set); int cli_resource_restart(pe_resource_t *rsc, const char *host, int timeout_ms, diff --git a/tools/crm_resource_controller.c b/tools/crm_resource_controller.c new file mode 100644 index 00000000000..845791632d4 --- /dev/null +++ b/tools/crm_resource_controller.c @@ -0,0 +1,425 @@ +/* + * Copyright 2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU General Public License version 2 + * or later (GPLv2+) WITHOUT ANY WARRANTY. + */ + +#include +#include +#include +#include "crm_resource.h" + +// API object's private members +struct controller_private { + char *client_name; // Client name to use with IPC + char *client_uuid; // Client UUID to use with IPC + mainloop_io_t *source; // If main loop used, I/O source for IPC + crm_ipc_t *ipc; // IPC connection to controller + int replies_expected; // How many controller replies are expected + pcmk_controld_api_cb_t dispatch_cb; // Caller's registered dispatch callback + pcmk_controld_api_cb_t destroy_cb; // Caller's registered destroy callback +}; + +static void +call_client_callback(pcmk_controld_api_t *api, pcmk_controld_api_cb_t *cb, + void *api_data) +{ + if ((cb != NULL) && (cb->callback != NULL)) { + cb->callback(api, api_data, cb->user_data); + } +} + +/* + * IPC callbacks when used with main loop + */ + +static void +controller_ipc_destroy(gpointer user_data) +{ + pcmk_controld_api_t *api = user_data; + struct controller_private *private = api->private; + + private->ipc = NULL; + private->source = NULL; + call_client_callback(api, &(private->destroy_cb), NULL); +} + +// \return < 0 if connection is no longer required, >= 0 if it is +static int +controller_ipc_dispatch(const char *buffer, ssize_t length, gpointer user_data) +{ + xmlNode *msg = NULL; + pcmk_controld_api_t *api = user_data; + + CRM_CHECK(buffer && api && api->private, return 0); + + msg = string2xml(buffer); + if (msg == NULL) { + crm_warn("Received malformed controller IPC message"); + } else { + struct controller_private *private = api->private; + + crm_log_xml_trace(msg, "controller-reply"); + private->replies_expected--; + call_client_callback(api, &(private->dispatch_cb), + get_message_xml(msg, F_CRM_DATA)); + free_xml(msg); + } + return 0; +} + +/* + * IPC utilities + */ + +// \return Standard Pacemaker return code +static int +send_hello(crm_ipc_t *ipc, const char *client_name, const char *client_uuid) +{ + xmlNode *hello = create_hello_message(client_uuid, client_name, "0", "1"); + int rc = crm_ipc_send(ipc, hello, 0, 0, NULL); + + free_xml(hello); + if (rc < 0) { + rc = pcmk_legacy2rc(rc); + crm_info("Could not send IPC hello to %s: %s " CRM_XS " rc=%s", + CRM_SYSTEM_CRMD /* ipc->name */, + pcmk_rc_str(rc), rc); + return rc; + } + crm_debug("Sent IPC hello to %s", CRM_SYSTEM_CRMD /* ipc->name */); + return pcmk_rc_ok; +} + +// \return Standard Pacemaker return code +static int +send_controller_request(pcmk_controld_api_t *api, const char *op, + xmlNode *msg_data, const char *node) +{ + int rc; + struct controller_private *private = api->private; + xmlNode *cmd = create_request(op, msg_data, node, CRM_SYSTEM_CRMD, + private->client_name, private->client_uuid); + const char *reference = crm_element_value(cmd, XML_ATTR_REFERENCE); + + if ((cmd == NULL) || (reference == NULL)) { + return EINVAL; + } + + //@TODO pass as args? 0=crm_ipc_flags, 0=timeout_ms (default 5s), NULL=reply + crm_log_xml_trace(cmd, "controller-request"); + rc = crm_ipc_send(private->ipc, cmd, 0, 0, NULL); + free_xml(cmd); + if (rc < 0) { + return pcmk_legacy2rc(rc); + } + private->replies_expected++; + return pcmk_rc_ok; +} + +/* + * pcmk_controld_api_t methods + */ + +static int +controller_connect_mainloop(pcmk_controld_api_t *api) +{ + struct controller_private *private = api->private; + struct ipc_client_callbacks callbacks = { + .dispatch = controller_ipc_dispatch, + .destroy = controller_ipc_destroy, + }; + + private->source = mainloop_add_ipc_client(CRM_SYSTEM_CRMD, + G_PRIORITY_DEFAULT, 0, api, + &callbacks); + if (private->source == NULL) { + return ENOTCONN; + } + + private->ipc = mainloop_get_ipc_client(private->source); + if (private->ipc == NULL) { + (void) api->disconnect(api); + return ENOTCONN; + } + + crm_debug("Connected to %s IPC (attaching to main loop)", CRM_SYSTEM_CRMD); + return pcmk_rc_ok; +} + +static int +controller_connect_no_mainloop(pcmk_controld_api_t *api) +{ + struct controller_private *private = api->private; + + private->ipc = crm_ipc_new(CRM_SYSTEM_CRMD, 0); + if (private->ipc == NULL) { + return ENOTCONN; + } + if (!crm_ipc_connect(private->ipc)) { + crm_ipc_close(private->ipc); + crm_ipc_destroy(private->ipc); + private->ipc = NULL; + return errno; + } + /* @TODO caller needs crm_ipc_get_fd(private->ipc); either add method for + * that, or replace use_mainloop with int *fd + */ + crm_debug("Connected to %s IPC", CRM_SYSTEM_CRMD); + return pcmk_rc_ok; +} + +static void +set_callback(pcmk_controld_api_cb_t *dest, pcmk_controld_api_cb_t *source) +{ + if (source) { + dest->callback = source->callback; + dest->user_data = source->user_data; + } +} + +static int +controller_api_connect(pcmk_controld_api_t *api, bool use_mainloop, + pcmk_controld_api_cb_t *dispatch_cb, + pcmk_controld_api_cb_t *destroy_cb) +{ + int rc = pcmk_rc_ok; + struct controller_private *private; + + if (api == NULL) { + return EINVAL; + } + private = api->private; + + set_callback(&(private->dispatch_cb), dispatch_cb); + set_callback(&(private->destroy_cb), destroy_cb); + + if (private->ipc != NULL) { + return pcmk_rc_ok; // already connected + } + + if (use_mainloop) { + rc = controller_connect_mainloop(api); + } else { + rc = controller_connect_no_mainloop(api); + } + if (rc != pcmk_rc_ok) { + return rc; + } + + rc = send_hello(private->ipc, private->client_name, private->client_uuid); + if (rc != pcmk_rc_ok) { + (void) api->disconnect(api); + } + return rc; +} + +static int +controller_api_disconnect(pcmk_controld_api_t *api) +{ + struct controller_private *private = api->private; + + if (private->source != NULL) { + // Attached to main loop + mainloop_del_ipc_client(private->source); + private->source = NULL; + private->ipc = NULL; + + } else if (private->ipc != NULL) { + // Not attached to main loop + crm_ipc_t *ipc = private->ipc; + + private->ipc = NULL; + crm_ipc_close(ipc); + crm_ipc_destroy(ipc); + } + crm_debug("Disconnected from %s IPC", CRM_SYSTEM_CRMD /* ipc->name */); + return pcmk_rc_ok; +} + +//@TODO dispatch function for non-mainloop a la stonith_dispatch() +//@TODO convenience retry-connect function a la stonith_api_connect_retry() + +static unsigned int +controller_api_replies_expected(pcmk_controld_api_t *api) +{ + if (api != NULL) { + struct controller_private *private = api->private; + + return private->replies_expected; + } + return 0; +} + +static xmlNode * +create_reprobe_message_data(const char *target_node, const char *router_node) +{ + xmlNode *msg_data; + + msg_data = create_xml_node(NULL, "data_for_" CRM_OP_REPROBE); + crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); + if ((router_node != NULL) && safe_str_neq(router_node, target_node)) { + crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); + } + return msg_data; +} + +static int +controller_api_reprobe(pcmk_controld_api_t *api, const char *target_node, + const char *router_node) +{ + int rc = EINVAL; + + if (api != NULL) { + xmlNode *msg_data; + + crm_debug("Sending %s IPC request to reprobe %s via %s", + CRM_SYSTEM_CRMD, crm_str(target_node), crm_str(router_node)); + msg_data = create_reprobe_message_data(target_node, router_node); + rc = send_controller_request(api, CRM_OP_REPROBE, msg_data, + (router_node? router_node : target_node)); + free_xml(msg_data); + } + return rc; +} + +// \return Standard Pacemaker return code +static int +controller_resource_op(pcmk_controld_api_t *api, const char *op, + const char *target_node, const char *router_node, + bool cib_only, const char *rsc_id, + const char *rsc_long_id, const char *standard, + const char *provider, const char *type) +{ + int rc; + char *key; + xmlNode *msg_data, *xml_rsc, *params; + + if (api == NULL) { + return EINVAL; + } + if (router_node == NULL) { + router_node = target_node; + } + + msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); + + /* The controller logs the transition key from resource op requests, so we + * need to have *something* for it. + */ + key = generate_transition_key(0, getpid(), 0, + "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); + crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); + free(key); + + crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, target_node); + if (safe_str_neq(router_node, target_node)) { + crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); + } + + if (cib_only) { + // Indicate that only the CIB needs to be cleaned + crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB); + } + + xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); + crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id); + crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc_long_id); + crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, standard); + crm_xml_add(xml_rsc, XML_AGENT_ATTR_PROVIDER, provider); + crm_xml_add(xml_rsc, XML_ATTR_TYPE, type); + + params = create_xml_node(msg_data, XML_TAG_ATTRS); + crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + + // The controller parses the timeout from the request + key = crm_meta_name(XML_ATTR_TIMEOUT); + crm_xml_add(params, key, "60000"); /* 1 minute */ //@TODO pass as arg + free(key); + + rc = send_controller_request(api, op, msg_data, router_node); + free_xml(msg_data); + return rc; +} + +static int +controller_api_fail_resource(pcmk_controld_api_t *api, + const char *target_node, const char *router_node, + const char *rsc_id, const char *rsc_long_id, + const char *standard, const char *provider, + const char *type) +{ + crm_debug("Sending %s IPC request to fail %s (a.k.a. %s) on %s via %s", + CRM_SYSTEM_CRMD, crm_str(rsc_id), crm_str(rsc_long_id), + crm_str(target_node), crm_str(router_node)); + return controller_resource_op(api, CRM_OP_LRM_FAIL, target_node, + router_node, false, rsc_id, rsc_long_id, + standard, provider, type); +} + +static int +controller_api_refresh_resource(pcmk_controld_api_t *api, + const char *target_node, + const char *router_node, + const char *rsc_id, const char *rsc_long_id, + const char *standard, const char *provider, + const char *type, bool cib_only) +{ + crm_debug("Sending %s IPC request to refresh %s (a.k.a. %s) on %s via %s", + CRM_SYSTEM_CRMD, crm_str(rsc_id), crm_str(rsc_long_id), + crm_str(target_node), crm_str(router_node)); + return controller_resource_op(api, CRM_OP_LRM_DELETE, target_node, + router_node, cib_only, rsc_id, rsc_long_id, + standard, provider, type); +} + +pcmk_controld_api_t * +pcmk_new_controld_api(const char *client_name, const char *client_uuid) +{ + struct controller_private *private; + pcmk_controld_api_t *api = calloc(1, sizeof(pcmk_controld_api_t)); + + CRM_ASSERT(api != NULL); + + api->private = calloc(1, sizeof(struct controller_private)); + CRM_ASSERT(api->private != NULL); + private = api->private; + + if (client_name == NULL) { + client_name = crm_system_name? crm_system_name : "client"; + } + private->client_name = strdup(client_name); + CRM_ASSERT(private->client_name != NULL); + + if (client_uuid == NULL) { + private->client_uuid = crm_generate_uuid(); + } else { + private->client_uuid = strdup(client_uuid); + } + CRM_ASSERT(private->client_uuid != NULL); + + api->connect = controller_api_connect; + api->disconnect = controller_api_disconnect; + api->replies_expected = controller_api_replies_expected; + api->reprobe = controller_api_reprobe; + api->fail_resource = controller_api_fail_resource; + api->refresh_resource = controller_api_refresh_resource; + return api; +} + +void +pcmk_free_controld_api(pcmk_controld_api_t *api) +{ + if (api != NULL) { + struct controller_private *private = api->private; + + api->disconnect(api); + free(private->client_name); + free(private->client_uuid); + free(api->private); + free(api); + } +} diff --git a/tools/crm_resource_controller.h b/tools/crm_resource_controller.h new file mode 100644 index 00000000000..50e20b43f79 --- /dev/null +++ b/tools/crm_resource_controller.h @@ -0,0 +1,198 @@ +/* + * Copyright 2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ +#ifndef PCMK__CONTROLD_API_H +#define PCMK__CONTROLD_API_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include // bool + +/* This is a demonstration of an abstracted controller IPC API. It is expected + * that this will be improved and moved to libcrmcommon. + * + * @TODO We could consider whether it's reasonable to have a single type for + * all daemons' IPC APIs (e.g. pcmk_ipc_api_t instead of pcmk_*_api_t). They + * could potentially have common connect/disconnect methods and then a void* to + * a group of API-specific methods. + * + * In that case, the callback type would also need to be generic, taking + * (pcmk_ipc_api_t *api, void *api_data, void *user_data), with individual APIs + * having functions for getting useful info from api_data. If all APIs followed + * the call_id model, we could use int call_id instead of api_data. + * + * A major annoyance is that the controller IPC protocol currently does not have + * any way to tie a particular reply to a particular request. The current + * clients (crmadmin, crm_node, and crm_resource) simply know what kind of reply + * to expect for the kind of request they sent. In crm_resource's case, all it + * does is count replies, ignoring their content altogether. + * + * That really forces us to have a single callback for all events rather than a + * per-request callback. That in turn implies that callers can only provide a + * single user data pointer. + * + * @TODO Define protocol version constants to use in hello message. + * @TODO Allow callers to specify timeouts. + * @TODO Define call IDs for controller ops, while somehow maintaining backward + * compatibility, since a client running on a Pacemaker Remote node could + * be older or newer than the controller on the connection's cluster + * node. + * @TODO The controller currently does not respond to hello messages. We should + * establish a common connection handshake protocol for all daemons that + * involves a hello message and acknowledgement. We should support sync + * or async connection (i.e. block until the ack is received, or return + * after the hello is sent and call a connection callback when the hello + * ack is received). + */ + +//! \internal +typedef struct pcmk_controld_api_s pcmk_controld_api_t; + +//! \internal +typedef struct pcmk_controld_api_callback_s { + void (*callback)(pcmk_controld_api_t *api, void *api_data, void *user_data); + void *user_data; +} pcmk_controld_api_cb_t; + +//! \internal +struct pcmk_controld_api_s { + //! \internal + void *private; + + /*! + * \internal + * \brief Connect to the local controller + * + * \param[in] api Controller API instance + * \param[in] use_mainloop If true, attach IPC to main loop + * \param[in] dispatch_cb If not NULL, call this when replies are received + * \param[in] destroy_cb If not NULL, call this if connection drops + * + * \return Standard Pacemaker return code + * \note Only the pointers inside the callback objects need to be + * persistent, not the callback objects themselves. The destroy_cb + * will be called only for unrequested disconnects. + */ + int (*connect)(pcmk_controld_api_t *api, bool use_mainloop, + pcmk_controld_api_cb_t *dispatch_cb, + pcmk_controld_api_cb_t *destroy_cb); + + /*! + * \internal + * \brief Disconnect from the local controller + * + * \param[in] api Controller API instance + * + * \return Standard Pacemaker return code + */ + int (*disconnect)(pcmk_controld_api_t *api); + + /*! + * \internal + * \brief Check number of replies still expected from controller + * + * \param[in] api Controller API instance + * + * \return Number of expected replies + */ + unsigned int (*replies_expected)(pcmk_controld_api_t *api); + + /*! + * \internal + * \brief Send a reprobe controller operation + * + * \param[in] api Controller API instance + * \param[in] target_node Name of node to reprobe + * \param[in] router_node Router node for host + * + * \return Standard Pacemaker return code + */ + int (*reprobe)(pcmk_controld_api_t *api, const char *target_node, + const char *router_node); + + /* @TODO These methods have a lot of arguments. One possibility would be to + * make a struct for agent info (standard/provider/type), which theortically + * could be used throughout pacemaker code. However that would end up being + * really awkward to use generically, since sometimes you need to allocate + * those strings (char *) and other times you only have references into XML + * (const char *). We could make some structs just for this API. + */ + + /*! + * \internal + * \brief Ask the controller to fail a resource + * + * \param[in] api Controller API instance + * \param[in] target_node Name of node resource is on + * \param[in] router_node Router node for target + * \param[in] rsc_id ID of resource to fail + * \param[in] rsc_long_id Long ID of resource (if any) + * \param[in] standard Standard of resource + * \param[in] provider Provider of resource (if any) + * \param[in] type Type of resource to fail + * + * \return Standard Pacemaker return code + */ + int (*fail_resource)(pcmk_controld_api_t *api, const char *target_node, + const char *router_node, const char *rsc_id, + const char *rsc_long_id, const char *standard, + const char *provider, const char *type); + + /*! + * \internal + * \brief Ask the controller to refresh a resource + * + * \param[in] api Controller API instance + * \param[in] target_node Name of node resource is on + * \param[in] router_node Router node for target + * \param[in] rsc_id ID of resource to refresh + * \param[in] rsc_long_id Long ID of resource (if any) + * \param[in] standard Standard of resource + * \param[in] provider Provider of resource (if any) + * \param[in] type Type of resource + * \param[in] cib_only If true, clean resource from CIB only + * + * \return Standard Pacemaker return code + */ + int (*refresh_resource)(pcmk_controld_api_t *api, const char *target_node, + const char *router_node, const char *rsc_id, + const char *rsc_long_id, const char *standard, + const char *provider, const char *type, + bool cib_only); +}; + +/*! + * \internal + * \brief Create new controller IPC API object for clients + * + * \param[in] client_name Client name to use with IPC + * \param[in] client_uuid Client UUID to use with IPC + * + * \return Newly allocated object + * \note This function asserts on errors, so it will never return NULL. + * The caller is responsible for freeing the result with + * pcmk_free_controld_api(). + */ +pcmk_controld_api_t *pcmk_new_controld_api(const char *client_name, + const char *client_uuid); + +/*! + * \internal + * \brief Free a controller IPC API object + * + * \param[in] api Controller IPC API object to free + */ +void pcmk_free_controld_api(pcmk_controld_api_t *api); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 305ec56b5d0..a601e76518c 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -11,7 +11,6 @@ int resource_verbose = 0; bool do_force = FALSE; -int crmd_replies_needed = 1; /* The welcome message */ const char *attr_set_type = XML_TAG_ATTR_SETS; @@ -458,58 +457,56 @@ cli_resource_delete_attribute(resource_t *rsc, const char *requested_name, return rc; } +// \return Standard Pacemaker return code static int -send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, +send_lrm_rsc_op(pcmk_controld_api_t *controld_api, bool do_fail_resource, const char *host_uname, const char *rsc_id, - bool only_failed, pe_working_set_t * data_set) + pe_working_set_t *data_set) { - char *our_pid = NULL; - char *key = NULL; - int rc = -ECOMM; - xmlNode *cmd = NULL; - xmlNode *xml_rsc = NULL; const char *router_node = host_uname; + const char *rsc_api_id = NULL; + const char *rsc_long_id = NULL; const char *rsc_class = NULL; + const char *rsc_provider = NULL; const char *rsc_type = NULL; - xmlNode *params = NULL; - xmlNode *msg_data = NULL; bool cib_only = false; resource_t *rsc = pe_find_resource(data_set->resources, rsc_id); if (rsc == NULL) { CMD_ERR("Resource %s not found", rsc_id); - return -ENXIO; + return ENXIO; } else if (rsc->variant != pe_native) { CMD_ERR("We can only process primitive resources, not %s", rsc_id); - return -EINVAL; + return EINVAL; } rsc_class = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); + rsc_provider = crm_element_value(rsc->xml, XML_AGENT_ATTR_PROVIDER), rsc_type = crm_element_value(rsc->xml, XML_ATTR_TYPE); if ((rsc_class == NULL) || (rsc_type == NULL)) { CMD_ERR("Resource %s does not have a class and type", rsc_id); - return -EINVAL; + return EINVAL; } if (host_uname == NULL) { CMD_ERR("Please specify a node name"); - return -EINVAL; + return EINVAL; } else { pe_node_t *node = pe_find_node(data_set->nodes, host_uname); if (node == NULL) { CMD_ERR("Node %s not found", host_uname); - return -pcmk_err_node_unknown; + return pcmk_rc_node_unknown; } if (!(node->details->online)) { - if (strcmp(op, CRM_OP_LRM_DELETE) == 0) { - cib_only = true; - } else { + if (do_fail_resource) { CMD_ERR("Node %s is not online", host_uname); - return -ENOTCONN; + return ENOTCONN; + } else { + cib_only = true; } } if (!cib_only && pe__is_guest_or_remote_node(node)) { @@ -517,67 +514,28 @@ send_lrm_rsc_op(crm_ipc_t * crmd_channel, const char *op, if (node == NULL) { CMD_ERR("No cluster connection to Pacemaker Remote node %s detected", host_uname); - return -ENOTCONN; + return ENOTCONN; } router_node = node->details->uname; } } - msg_data = create_xml_node(NULL, XML_GRAPH_TAG_RSC_OP); - - /* The controller logs the transition key from requests, so we need to have - * *something* for it. - */ - key = generate_transition_key(0, getpid(), 0, - "xxxxxxxx-xrsc-opxx-xcrm-resourcexxxx"); - crm_xml_add(msg_data, XML_ATTR_TRANSITION_KEY, key); - free(key); - - crm_xml_add(msg_data, XML_LRM_ATTR_TARGET, host_uname); - if (safe_str_neq(router_node, host_uname)) { - crm_xml_add(msg_data, XML_LRM_ATTR_ROUTER_NODE, router_node); - } - - if (cib_only) { - // Indicate that only the CIB needs to be cleaned - crm_xml_add(msg_data, PCMK__XA_MODE, XML_TAG_CIB); - } - - xml_rsc = create_xml_node(msg_data, XML_CIB_TAG_RESOURCE); if (rsc->clone_name) { - crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->clone_name); - crm_xml_add(xml_rsc, XML_ATTR_ID_LONG, rsc->id); - + rsc_api_id = rsc->clone_name; + rsc_long_id = rsc->id; } else { - crm_xml_add(xml_rsc, XML_ATTR_ID, rsc->id); + rsc_api_id = rsc->id; } - - crm_xml_add(xml_rsc, XML_AGENT_ATTR_CLASS, rsc_class); - crm_copy_xml_element(rsc->xml, xml_rsc, XML_AGENT_ATTR_PROVIDER); - crm_xml_add(xml_rsc, XML_ATTR_TYPE, rsc_type); - - params = create_xml_node(msg_data, XML_TAG_ATTRS); - crm_xml_add(params, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); - - // The controller parses the timeout from the request - key = crm_meta_name(XML_ATTR_TIMEOUT); - crm_xml_add(params, key, "60000"); /* 1 minute */ - free(key); - - our_pid = crm_getpid_s(); - cmd = create_request(op, msg_data, router_node, CRM_SYSTEM_CRMD, crm_system_name, our_pid); - free_xml(msg_data); - - if (crm_ipc_send(crmd_channel, cmd, 0, 0, NULL) > 0) { - rc = 0; - + if (do_fail_resource) { + return controld_api->fail_resource(controld_api, host_uname, + router_node, rsc_api_id, rsc_long_id, + rsc_class, rsc_provider, rsc_type); } else { - crm_debug("Could not send %s op to the controller", op); - rc = -ENOTCONN; + return controld_api->refresh_resource(controld_api, host_uname, + router_node, rsc_api_id, + rsc_long_id, rsc_class, + rsc_provider, rsc_type, cib_only); } - - free_xml(cmd); - return rc; } /*! @@ -597,8 +555,9 @@ rsc_fail_name(resource_t *rsc) return is_set(rsc->flags, pe_rsc_unique)? strdup(name) : clone_strip(name); } +// \return Standard Pacemaker return code static int -clear_rsc_history(crm_ipc_t *crmd_channel, const char *host_uname, +clear_rsc_history(pcmk_controld_api_t *controld_api, const char *host_uname, const char *rsc_id, pe_working_set_t *data_set) { int rc = pcmk_ok; @@ -608,27 +567,22 @@ clear_rsc_history(crm_ipc_t *crmd_channel, const char *host_uname, * single operation, we might wind up with a wrong idea of the current * resource state, and we might not re-probe the resource. */ - rc = send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_DELETE, host_uname, rsc_id, - TRUE, data_set); - if (rc != pcmk_ok) { + rc = send_lrm_rsc_op(controld_api, false, host_uname, rsc_id, data_set); + if (rc != pcmk_rc_ok) { return rc; } - crmd_replies_needed++; - crm_trace("Processing %d mainloop inputs", crmd_replies_needed); + crm_trace("Processing %d mainloop inputs", + controld_api->replies_expected(controld_api)); while (g_main_context_iteration(NULL, FALSE)) { crm_trace("Processed mainloop input, %d still remaining", - crmd_replies_needed); - } - - if (crmd_replies_needed < 0) { - crmd_replies_needed = 0; + controld_api->replies_expected(controld_api)); } return rc; } static int -clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, +clear_rsc_failures(pcmk_controld_api_t *controld_api, const char *node_name, const char *rsc_id, const char *operation, const char *interval_spec, pe_working_set_t *data_set) { @@ -700,9 +654,9 @@ clear_rsc_failures(crm_ipc_t *crmd_channel, const char *node_name, g_hash_table_iter_init(&iter, rscs); while (g_hash_table_iter_next(&iter, (gpointer *) &failed_id, NULL)) { crm_debug("Erasing failures of %s on %s", failed_id, node_name); - rc = clear_rsc_history(crmd_channel, node_name, failed_id, data_set); - if (rc != pcmk_ok) { - return rc; + rc = clear_rsc_history(controld_api, node_name, failed_id, data_set); + if (rc != pcmk_rc_ok) { + return pcmk_rc2legacy(rc); } } g_hash_table_destroy(rscs); @@ -728,7 +682,7 @@ clear_rsc_fail_attrs(resource_t *rsc, const char *operation, } int -cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, +cli_resource_delete(pcmk_controld_api_t *controld_api, const char *host_uname, resource_t *rsc, const char *operation, const char *interval_spec, bool just_failures, pe_working_set_t *data_set) @@ -745,7 +699,7 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, for (lpc = rsc->children; lpc != NULL; lpc = lpc->next) { resource_t *child = (resource_t *) lpc->data; - rc = cli_resource_delete(crmd_channel, host_uname, child, operation, + rc = cli_resource_delete(controld_api, host_uname, child, operation, interval_spec, just_failures, data_set); if (rc != pcmk_ok) { return rc; @@ -779,7 +733,7 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, node = (node_t *) lpc->data; if (node->details->online) { - rc = cli_resource_delete(crmd_channel, node->details->uname, + rc = cli_resource_delete(controld_api, node->details->uname, rsc, operation, interval_spec, just_failures, data_set); } @@ -807,7 +761,7 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, return -EOPNOTSUPP; } - if (crmd_channel == NULL) { + if (controld_api == NULL) { printf("Dry run: skipping clean-up of %s on %s due to CIB_file\n", rsc->id, host_uname); return pcmk_ok; @@ -821,10 +775,11 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, } if (just_failures) { - rc = clear_rsc_failures(crmd_channel, host_uname, rsc->id, operation, + rc = clear_rsc_failures(controld_api, host_uname, rsc->id, operation, interval_spec, data_set); } else { - rc = clear_rsc_history(crmd_channel, host_uname, rsc->id, data_set); + rc = clear_rsc_history(controld_api, host_uname, rsc->id, data_set); + rc = pcmk_rc2legacy(rc); } if (rc != pcmk_ok) { printf("Cleaned %s failures on %s, but unable to clean history: %s\n", @@ -836,7 +791,7 @@ cli_resource_delete(crm_ipc_t *crmd_channel, const char *host_uname, } int -cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, +cli_cleanup_all(pcmk_controld_api_t *controld_api, const char *node_name, const char *operation, const char *interval_spec, pe_working_set_t *data_set) { @@ -844,12 +799,11 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, int attr_options = pcmk__node_attr_none; const char *display_name = node_name? node_name : "all nodes"; - if (crmd_channel == NULL) { + if (controld_api == NULL) { printf("Dry run: skipping clean-up of %s due to CIB_file\n", display_name); return pcmk_ok; } - crmd_replies_needed = 0; if (node_name) { node_t *node = pe_find_node(data_set->nodes, node_name); @@ -872,7 +826,7 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, } if (node_name) { - rc = clear_rsc_failures(crmd_channel, node_name, NULL, + rc = clear_rsc_failures(controld_api, node_name, NULL, operation, interval_spec, data_set); if (rc != pcmk_ok) { printf("Cleaned all resource failures on %s, but unable to clean history: %s\n", @@ -883,7 +837,7 @@ cli_cleanup_all(crm_ipc_t *crmd_channel, const char *node_name, for (GList *iter = data_set->nodes; iter; iter = iter->next) { pe_node_t *node = (pe_node_t *) iter->data; - rc = clear_rsc_failures(crmd_channel, node->details->uname, NULL, + rc = clear_rsc_failures(controld_api, node->details->uname, NULL, operation, interval_spec, data_set); if (rc != pcmk_ok) { printf("Cleaned all resource failures on all nodes, but unable to clean history: %s\n", @@ -948,12 +902,13 @@ cli_resource_check(cib_t * cib_conn, resource_t *rsc) } } +// \return Standard Pacemaker return code int -cli_resource_fail(crm_ipc_t * crmd_channel, const char *host_uname, - const char *rsc_id, pe_working_set_t * data_set) +cli_resource_fail(pcmk_controld_api_t *controld_api, const char *host_uname, + const char *rsc_id, pe_working_set_t *data_set) { - crm_warn("Failing: %s", rsc_id); - return send_lrm_rsc_op(crmd_channel, CRM_OP_LRM_FAIL, host_uname, rsc_id, FALSE, data_set); + crm_notice("Failing %s on %s", rsc_id, host_uname); + return send_lrm_rsc_op(controld_api, true, host_uname, rsc_id, data_set); } static GHashTable *