Skip to content

Commit

Permalink
sync pacemakerd with sbd
Browse files Browse the repository at this point in the history
  • Loading branch information
wenningerk committed Dec 9, 2019
1 parent 33e28dc commit 4a37fbb
Show file tree
Hide file tree
Showing 3 changed files with 173 additions and 43 deletions.
125 changes: 99 additions & 26 deletions daemons/pacemakerd/pacemakerd.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,14 @@ static bool global_keep_tracking = false;
static const char *local_name = NULL;
static uint32_t local_nodeid = 0;
static crm_trigger_t *shutdown_trigger = NULL;
static crm_trigger_t *startup_trigger = NULL;
static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid";

static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
static gboolean running_with_sbd = FALSE;
static gboolean first_state_query_seen = FALSE;
static gboolean shutdown_complete_state_reported = FALSE;

typedef struct pcmk_child_s {
int pid;
long flag;
Expand Down Expand Up @@ -444,6 +450,7 @@ pcmk_shutdown_worker(gpointer user_data)
if (phase == 0) {
crm_notice("Shutting down Pacemaker");
phase = max;
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
}

for (; phase > 0; phase--) {
Expand Down Expand Up @@ -497,6 +504,10 @@ pcmk_shutdown_worker(gpointer user_data)

/* send_cluster_id(); */
crm_notice("Shutdown complete");
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
if (running_with_sbd && !shutdown_complete_state_reported) {
return TRUE;
}

{
const char *delay = daemon_option("shutdown_delay");
Expand Down Expand Up @@ -563,35 +574,88 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
crm_client_t *c = crm_client_get(qbc);
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);

crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);
if (msg == NULL) {
return 0;
if (msg != NULL) {
task = crm_element_value(msg, F_CRM_TASK);
}

task = crm_element_value(msg, F_CRM_TASK);
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
/* Time to quit */
crm_notice("Shutting down in response to ticket %s (%s)",
crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
pcmk_shutdown(15);
if (crm_str_eq(task, CRM_OP_PING, TRUE)) {
const char *value = NULL;
xmlNode *ping = NULL;
xmlNode *reply = NULL;
time_t pinged = time(NULL);

/* Pinged for status */
crm_trace("Pinged from %s.%s",
crm_element_value(msg, F_CRM_ORIGIN),
crm_element_value(msg, F_CRM_SYS_FROM));
first_state_query_seen = TRUE;
ping = create_xml_node(NULL, XML_CRM_TAG_PING);
value = crm_element_value(msg, F_CRM_SYS_TO);
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
crm_xml_add_int(ping, XML_ATTR_TSTAMP, (int) pinged);
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
reply = create_reply(msg, ping);
free_xml(ping);
if (reply) {
const char *local_name = get_local_node_name();

if ((crm_element_value(reply, F_CRM_HOST_FROM) == NULL) &&
local_name) {
crm_xml_add(reply, F_CRM_HOST_FROM, local_name);
}
if (crm_ipcs_send(c, id, reply, crm_ipc_server_event) <= 0) {
crm_err("Failed sending ping-reply");
}
free_xml(reply);
} else {
crm_err("Failed building ping-reply");
}
if (crm_str_eq(pacemakerd_state,
XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, TRUE)) {
sleep(5); /* get out message - less ugly alternative? */
shutdown_complete_state_reported = TRUE;
if (shutdown_trigger) {
mainloop_set_trigger(shutdown_trigger);
}
} else if (crm_str_eq(pacemakerd_state,
XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
TRUE)) {
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
mainloop_set_trigger(startup_trigger);
}
} else {
crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);

} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
/* Send to everyone */
struct iovec *iov;
int id = 0;
const char *name = NULL;
if (msg == NULL) {
return 0;
}

crm_element_value_int(msg, XML_ATTR_ID, &id);
name = crm_element_value(msg, XML_ATTR_UNAME);
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
/* Time to quit */
crm_notice("Shutting down in response to ticket %s (%s)",
crm_element_value(msg, F_CRM_REFERENCE),
crm_element_value(msg, F_CRM_ORIGIN));
pcmk_shutdown(15);

iov = calloc(1, sizeof(struct iovec));
iov->iov_base = dump_xml_unformatted(msg);
iov->iov_len = 1 + strlen(iov->iov_base);
send_cpg_iov(iov);
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
/* Send to everyone */
struct iovec *iov;
int id = 0;
const char *name = NULL;

} else {
update_process_clients(c);
crm_element_value_int(msg, XML_ATTR_ID, &id);
name = crm_element_value(msg, XML_ATTR_UNAME);
crm_notice("Instructing peers to remove references to node %s/%u", name, id);

iov = calloc(1, sizeof(struct iovec));
iov->iov_base = dump_xml_unformatted(msg);
iov->iov_len = 1 + strlen(iov->iov_base);
send_cpg_iov(iov);

} else {
update_process_clients(c);
}
}

free_xml(msg);
Expand Down Expand Up @@ -1051,8 +1115,8 @@ find_and_track_existing_processes(void)
return (tracking > INT_MAX) ? INT_MAX : tracking;
}

static void
init_children_processes(void)
static gboolean
init_children_processes(gpointer user_data)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
Expand All @@ -1078,6 +1142,8 @@ init_children_processes(void)
* This may be useful for the daemons to know
*/
setenv("PCMK_respawned", "true", 1);
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
return TRUE;
}

static void
Expand Down Expand Up @@ -1356,6 +1422,7 @@ main(int argc, char **argv)

if(pcmk_locate_sbd() > 0) {
setenv("PCMK_watchdog", "true", 1);
running_with_sbd = TRUE;
} else {
setenv("PCMK_watchdog", "false", 1);
}
Expand Down Expand Up @@ -1394,7 +1461,13 @@ main(int argc, char **argv)
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);

init_children_processes();
if (running_with_sbd) {
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
} else {
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
init_children_processes(NULL);
}

crm_notice("Pacemaker daemon successfully started and accepting connections");
g_main_loop_run(mainloop);
Expand Down
7 changes: 7 additions & 0 deletions include/crm/msg_xml.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,13 @@ extern "C" {
# define XML_PING_ATTR_STATUS "result"
# define XML_PING_ATTR_SYSFROM "crm_subsystem"
# define XML_PING_ATTR_CRMDSTATE "crmd_state"
# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"

# define XML_TAG_FRAGMENT "cib_fragment"

Expand Down
84 changes: 67 additions & 17 deletions tools/crmadmin.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/iso8601.h>

#include <crm/common/mainloop.h>

Expand All @@ -33,6 +34,7 @@ static int message_timeout_ms = 30 * 1000;

static GMainLoop *mainloop = NULL;
static crm_ipc_t *crmd_channel = NULL;
static crm_ipc_t *pacemakerd_channel = NULL;
static char *admin_uuid = NULL;

gboolean do_init(void);
Expand All @@ -46,6 +48,7 @@ static gboolean BE_VERBOSE = FALSE;
static int expected_responses = 1;
static gboolean BASH_EXPORT = FALSE;
static gboolean DO_HEALTH = FALSE;
static gboolean DO_PACEMAKERD_HEALTH = FALSE;
static gboolean DO_RESET = FALSE;
static gboolean DO_RESOURCE = FALSE;
static gboolean DO_ELECT_DC = FALSE;
Expand All @@ -70,6 +73,8 @@ static struct crm_option long_options[] = {
/* daemon options */
{"status", 1, 0, 'S', "Display the status of the specified node." },
{"-spacer-", 1, 0, '-', "\n\tResult is the node's internal FSM state which can be useful for debugging\n"},
{"pacemakerd",0, 0, 'P', "Display the status of local pacemakerd."},
{"-spacer-", 1, 0, '-', "\n\tResult is the state of the sub-daemons watched by pacemakerd\n"},
{"dc_lookup", 0, 0, 'D', "Display the uname of the node co-ordinating the cluster."},
{"-spacer-", 1, 0, '-', "\n\tThis is an internal detail and is rarely useful to administrators except when deciding on which node to examine the logs.\n"},
{"nodes", 0, 0, 'N', "\tDisplay the uname of all member nodes"},
Expand Down Expand Up @@ -142,6 +147,9 @@ main(int argc, char **argv)
case 'q':
BE_SILENT = TRUE;
break;
case 'P':
DO_PACEMAKERD_HEALTH = TRUE;
break;
case 'S':
DO_HEALTH = TRUE;
crm_trace("Option %c => %s", flag, optarg);
Expand Down Expand Up @@ -215,19 +223,26 @@ do_work(void)
xmlNode *msg_data = NULL;
gboolean all_is_good = TRUE;

if (DO_HEALTH == TRUE) {
if ((DO_HEALTH == TRUE) || (DO_PACEMAKERD_HEALTH == TRUE)) {
crm_trace("Querying the system");

sys_to = CRM_SYSTEM_DC;

if (dest_node != NULL) {
if ((DO_HEALTH == TRUE) && (dest_node != NULL)) {
sys_to = CRM_SYSTEM_CRMD;
crmd_operation = CRM_OP_PING;

if (BE_VERBOSE) {
expected_responses = 1;
}

} else if (DO_PACEMAKERD_HEALTH == TRUE) {
sys_to = CRM_SYSTEM_MCP;
crmd_operation = CRM_OP_PING;

if (BE_VERBOSE) {
expected_responses = 1;
}
} else {
crm_info("Cluster-wide health not available yet");
all_is_good = FALSE;
Expand Down Expand Up @@ -286,7 +301,7 @@ do_work(void)
}

/* send it */
if (crmd_channel == NULL) {
if ((DO_PACEMAKERD_HEALTH?pacemakerd_channel:crmd_channel) == NULL) {
crm_err("The IPC connection is not valid, cannot send anything");
return -1;
}
Expand All @@ -303,7 +318,8 @@ do_work(void)
xmlNode *cmd = create_request(crmd_operation, msg_data, dest_node, sys_to,
crm_system_name, admin_uuid);

crm_ipc_send(crmd_channel, cmd, 0, 0, NULL);
crm_ipc_send(DO_PACEMAKERD_HEALTH?pacemakerd_channel:crmd_channel,
cmd, 0, 0, NULL);
free_xml(cmd);
}

Expand All @@ -329,21 +345,39 @@ struct ipc_client_callbacks crm_callbacks = {
gboolean
do_init(void)
{
mainloop_io_t *source =
mainloop_io_t *crmd_source =
mainloop_add_ipc_client(CRM_SYSTEM_CRMD, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks);
mainloop_io_t *pacemakerd_source =
mainloop_add_ipc_client(CRM_SYSTEM_MCP, G_PRIORITY_DEFAULT, 0, NULL, &crm_callbacks);

admin_uuid = crm_getpid_s();

crmd_channel = mainloop_get_ipc_client(source);
crmd_channel = mainloop_get_ipc_client(crmd_source);
pacemakerd_channel = mainloop_get_ipc_client(pacemakerd_source);

if (DO_RESOURCE || DO_RESOURCE_LIST || DO_NODE_LIST) {
if (DO_RESOURCE || DO_RESOURCE_LIST || DO_NODE_LIST || DO_PACEMAKERD_HEALTH) {
return TRUE;

} else if (crmd_channel != NULL) {
xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1");
} else {
int hellos = 0;

if (crmd_channel != NULL) {
xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1");

crm_ipc_send(crmd_channel, xml, 0, 0, NULL);
return TRUE;
crm_ipc_send(crmd_channel, xml, 0, 0, NULL);
hellos++;
}
#if 0
if (pacemakerd_channel != NULL) {
xmlNode *xml = create_hello_message(admin_uuid, crm_system_name, "0", "1");

crm_ipc_send(pacemakerd_channel, xml, 0, 0, NULL);
hellos++;
}
#endif
if (hellos == 1) {
return TRUE;
}
}
return FALSE;
}
Expand Down Expand Up @@ -394,15 +428,31 @@ admin_msg_callback(const char *buffer, ssize_t length, gpointer userdata)

} else if (validate_crm_message(xml, crm_system_name, admin_uuid, XML_ATTR_RESPONSE) == FALSE) {
crm_trace("Message was not a CRM response. Discarding.");
printf("Validation of response failed\n");

} else if (DO_HEALTH) {
} else if (DO_HEALTH || DO_PACEMAKERD_HEALTH) {
xmlNode *data = get_message_xml(xml, F_CRM_DATA);
const char *state = crm_element_value(data, XML_PING_ATTR_CRMDSTATE);

printf("Status of %s@%s: %s (%s)\n",
const char *state = DO_PACEMAKERD_HEALTH?
crm_element_value(data, XML_PING_ATTR_PACEMAKERDSTATE):
crm_element_value(data, XML_PING_ATTR_CRMDSTATE);
const char *host_from = crm_element_value(xml, F_CRM_HOST_FROM);
time_t pinged = (time_t) 0;
crm_time_t *crm_when = crm_time_new(NULL);
char *pinged_buf = NULL;

crm_element_value_int(data, XML_ATTR_TSTAMP, (int *) &pinged);
crm_time_set_timet(crm_when, &pinged);
pinged_buf = crm_time_as_string(crm_when,
crm_time_log_date | crm_time_log_timeofday | crm_time_log_with_timezone);
printf("Status of %s@%s: %s (%s%s%s)\n",
crm_element_value(data, XML_PING_ATTR_SYSFROM),
crm_element_value(xml, F_CRM_HOST_FROM),
state, crm_element_value(data, XML_PING_ATTR_STATUS));
host_from?host_from:"local",
state, crm_element_value(data, XML_PING_ATTR_STATUS),
((int) pinged)?" @ ":"",
((int) pinged)?pinged_buf:"");

free(pinged_buf);
crm_time_free(crm_when);

if (BE_SILENT && state != NULL) {
fprintf(stderr, "%s\n", state);
Expand Down

0 comments on commit 4a37fbb

Please sign in to comment.