Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sync pacemakerd with sbd #1957

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 109 additions & 31 deletions daemons/pacemakerd/pacemakerd.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,14 @@ static bool global_keep_tracking = false;
static const char *local_name = NULL;
static uint32_t local_nodeid = 0;
static crm_trigger_t *shutdown_trigger = NULL;
static crm_trigger_t *startup_trigger = NULL;
static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid";

static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
static gboolean running_with_sbd = FALSE;
static uint shutdown_complete_state_reported_to = 0;
static gboolean shutdown_complete_state_reported_client_closed = FALSE;

typedef struct pcmk_child_s {
int pid;
long flag;
Expand Down Expand Up @@ -435,21 +441,20 @@ escalate_shutdown(gpointer data)
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
static int phase = 0;
static int phase = SIZEOF(pcmk_children);
static time_t next_log = 0;
static int max = SIZEOF(pcmk_children);

int lpc = 0;

if (phase == 0) {
if (phase == SIZEOF(pcmk_children)) {
crm_notice("Shutting down Pacemaker");
phase = max;
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
kgaillot marked this conversation as resolved.
Show resolved Hide resolved
}

for (; phase > 0; phase--) {
/* Don't stop anything with start_seq < 1 */

for (lpc = max - 1; lpc >= 0; lpc--) {
for (lpc = SIZEOF(pcmk_children) - 1; lpc >= 0; lpc--) {
pcmk_child_t *child = &(pcmk_children[lpc]);

if (phase != child->start_seq) {
Expand Down Expand Up @@ -497,6 +502,11 @@ pcmk_shutdown_worker(gpointer user_data)

/* send_cluster_id(); */
crm_notice("Shutdown complete");
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
if (!fatal_error && running_with_sbd &&
!shutdown_complete_state_reported_client_closed) {
return TRUE;
kgaillot marked this conversation as resolved.
Show resolved Hide resolved
}

{
const char *delay = daemon_option("shutdown_delay");
Expand Down Expand Up @@ -553,6 +563,50 @@ pcmk_ipc_created(qb_ipcs_connection_t * c)
crm_trace("Connection %p", c);
}

static void
pcmk_handle_ping_request(crm_client_t *c, xmlNode *msg, uint32_t id)
{
const char *value = NULL;
xmlNode *ping = NULL;
xmlNode *reply = NULL;
time_t pinged = time(NULL);
const char *from = crm_element_value(msg, F_CRM_SYS_FROM);

/* Pinged for status */
crm_trace("Pinged from %s.%s",
crm_element_value(msg, F_CRM_ORIGIN),
from?from:"unknown");
ping = create_xml_node(NULL, XML_CRM_TAG_PING);
value = crm_element_value(msg, F_CRM_SYS_TO);
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged);
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
reply = create_reply(msg, ping);
free_xml(ping);
if (reply) {
if (crm_ipcs_send(c, id, reply, crm_ipc_server_event) <= 0) {
crm_err("Failed sending ping-reply");
}
free_xml(reply);
} else {
crm_err("Failed building ping-reply");
}
/* just proceed state on sbd pinging us */
if (from && strstr(from, "sbd")) {
if (crm_str_eq(pacemakerd_state,
XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE,
TRUE)) {
shutdown_complete_state_reported_to = c->pid;
} else if (crm_str_eq(pacemakerd_state,
XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
TRUE)) {
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
mainloop_set_trigger(startup_trigger);
}
kgaillot marked this conversation as resolved.
Show resolved Hide resolved
}
}

/* Exit code means? */
static int32_t
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
Expand All @@ -563,35 +617,44 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
crm_client_t *c = crm_client_get(qbc);
xmlNode *msg = crm_ipcs_recv(c, data, size, &id, &flags);

crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);
if (msg == NULL) {
return 0;
if (msg != NULL) {
task = crm_element_value(msg, F_CRM_TASK);
}

task = crm_element_value(msg, F_CRM_TASK);
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
/* Time to quit */
crm_notice("Shutting down in response to ticket %s (%s)",
crm_element_value(msg, F_CRM_REFERENCE), crm_element_value(msg, F_CRM_ORIGIN));
pcmk_shutdown(15);
if (crm_str_eq(task, CRM_OP_PING, TRUE)) {
pcmk_handle_ping_request(c, msg, id);
} else {
crm_ipcs_send_ack(c, id, flags, "ack", __FUNCTION__, __LINE__);

} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
/* Send to everyone */
struct iovec *iov;
int id = 0;
const char *name = NULL;
if (msg == NULL) {
return 0;
}

crm_element_value_int(msg, XML_ATTR_ID, &id);
name = crm_element_value(msg, XML_ATTR_UNAME);
crm_notice("Instructing peers to remove references to node %s/%u", name, id);
if (crm_str_eq(task, CRM_OP_QUIT, TRUE)) {
/* Time to quit */
crm_notice("Shutting down in response to ticket %s (%s)",
crm_element_value(msg, F_CRM_REFERENCE),
crm_element_value(msg, F_CRM_ORIGIN));
pcmk_shutdown(15);

iov = calloc(1, sizeof(struct iovec));
iov->iov_base = dump_xml_unformatted(msg);
iov->iov_len = 1 + strlen(iov->iov_base);
send_cpg_iov(iov);
} else if (crm_str_eq(task, CRM_OP_RM_NODE_CACHE, TRUE)) {
/* Send to everyone */
struct iovec *iov;
int id = 0;
const char *name = NULL;

} else {
update_process_clients(c);
crm_element_value_int(msg, XML_ATTR_ID, &id);
name = crm_element_value(msg, XML_ATTR_UNAME);
crm_notice("Instructing peers to remove references to node %s/%u", name, id);

iov = calloc(1, sizeof(struct iovec));
iov->iov_base = dump_xml_unformatted(msg);
iov->iov_len = 1 + strlen(iov->iov_base);
send_cpg_iov(iov);

} else {
update_process_clients(c);
}
}

free_xml(msg);
Expand All @@ -608,6 +671,12 @@ pcmk_ipc_closed(qb_ipcs_connection_t * c)
return 0;
}
crm_trace("Connection %p", c);
if (shutdown_complete_state_reported_to == client->pid) {
shutdown_complete_state_reported_client_closed = TRUE;
if (shutdown_trigger) {
mainloop_set_trigger(shutdown_trigger);
}
}
crm_client_destroy(client);
return 0;
}
Expand Down Expand Up @@ -1051,8 +1120,8 @@ find_and_track_existing_processes(void)
return (tracking > INT_MAX) ? INT_MAX : tracking;
}

static void
init_children_processes(void)
static gboolean
init_children_processes(gpointer user_data)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
Expand All @@ -1078,6 +1147,8 @@ init_children_processes(void)
* This may be useful for the daemons to know
*/
setenv("PCMK_respawned", "true", 1);
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
return TRUE;
}

static void
Expand Down Expand Up @@ -1356,6 +1427,7 @@ main(int argc, char **argv)

if(pcmk_locate_sbd() > 0) {
setenv("PCMK_watchdog", "true", 1);
running_with_sbd = TRUE;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be interesting to s/gboolean running_with_sbd/pid_t sbd_pid/ and add it to the ping response. Non-sbd pingers might get some value out of knowing that pacemaker is relying on sbd.

} else {
setenv("PCMK_watchdog", "false", 1);
}
Expand Down Expand Up @@ -1394,7 +1466,13 @@ main(int argc, char **argv)
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);

init_children_processes();
if (running_with_sbd) {
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
} else {
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
init_children_processes(NULL);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you traced what happens when pacemakerd itself respawns? find_and_track_existing_processes() will "adopt" any child daemons already running, so maybe we shouldn't wait for a ping in that case (the assumption would be that the ping succeeded with the previous pacemakerd instance), but I'm not sure.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was thinking about that but for simplicity and to prevent it from coming up when it shouldn't under some respawn-race I decided to wait this additional second.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

find_and_track_existing_processes() returns the number of existing daemons found, so we can distinguish three cases:

  • 0: as above
  • SIZEOF(pcmk_children): Most likely, pacemakerd itself crashed, was respawned by systemd, and found all the daemons already running. In this case it probably makes sense to go straight to RUNNING just to reflect reality.
  • Any other positive number: Most likely, corosync dropped, all the corosync-based daemons including pacemakerd exited, systemd respawned pacemakerd, and pacemakerd found the non-corosync daemons (scheduler and executor) already running. I suspect WAITPING makes sense here too, but I'm not 100% sure.


crm_notice("Pacemaker daemon successfully started and accepting connections");
g_main_loop_run(mainloop);
Expand Down
2 changes: 1 addition & 1 deletion include/crm/common/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ MAINTAINERCLEANFILES = Makefile.in
headerdir=$(pkgincludedir)/crm/common

header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \
nvpair.h
nvpair.h pacemakerd_types.h
noinst_HEADERS = cib_secrets.h ipcs.h internal.h alerts_internal.h \
iso8601_internal.h remote_internal.h xml_internal.h \
ipc_internal.h output.h cmdline_internal.h curses_internal.h
70 changes: 70 additions & 0 deletions include/crm/common/pacemakerd_types.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright 2004-2019 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
* This source code is licensed under the GNU Lesser General Public License
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
*/

#ifndef PACEMAKERD_TYPES__H
# define PACEMAKERD_TYPES__H

#ifdef __cplusplus
extern "C" {
#endif

#include <time.h>

enum pacemakerd_conn_state {
pacemakerd_conn_connected,
pacemakerd_conn_disconnected
};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm conflicted about the naming. I'd prefer that new public APIs use the "pcmk_" prefix but I can't think of a good choice for this.

Looking at existing client APIs, we have: crm_/crm_cluster_t, cib/cib_t, attrd_/crm_ipc_t, lrmd_/lrmd_t, and stonith_/stonith_t. I'm not a fan of the "d" prefixes because I'd rather use those within the daemons themselves, and be clearer that these APIs are for clients. Going forward I could see something like pcmk_cib_, pcmk_attr_, pcmk_exec_.

Maybe "pcmk_master_"?

Copy link
Contributor Author

@wenningerk wenningerk Dec 17, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All in all quite a mess as we are ending up with a mixture of the old and the new names.
We've just deprecated master/slave but I'm hesitant using master for this here still.
As we're anyway not getting away from it in the ipc what about MCP (pcmk_mcp_ then)?
For the filenames then as well?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mcp stands for master control process, so that only hides it :)
Maybe "launcher" or "supervisor"? pcmk_launcher_ping() / pcmk_super_ping()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Jeah I know what it stands for but when you see "mcp" it doesn't pattern-match "master" and after unfolding you get what it really is about.
I'm just not feeling well using a third name for the same thing unless we make pacemakerd pacemaker-launcher but I guess that would cause a lot of issues.
What about something that can't be confused with a component like pcmk_toplevel_api ... not really impressive but as an idea to develop maybe ... or nothing at all pcmk_api ...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pacemaker-launchd was a serious possibility for 2.0 but we decided against it. mcp isn't used at all publicly in 2.0, so we wouldn't be adding a third, we'd just be relabeling it like the other 1.1 names. pcmk_api_ feels redundant. I like "supervisor" as being closest to what it actually does. "global" or "core" might work.


enum pacemakerd_state {
pacemakerd_state_invalid = -1,
pacemakerd_state_init = 0,
pacemakerd_state_starting_daemons,
pacemakerd_state_wait_for_ping,
pacemakerd_state_running,
pacemakerd_state_shutting_down,
pacemakerd_state_shutdown_complete,
pacemakerd_state_max = pacemakerd_state_shutdown_complete,
};

typedef struct pacemakerd_s pacemakerd_t;

typedef struct pacemakerd_api_operations_s {
int (*connect) (pacemakerd_t *pacemakerd, const char *name);
int (*disconnect) (pacemakerd_t *pacemakerd);
void (*free) (pacemakerd_t *pacemakerd);
int (*set_ping_callback) (pacemakerd_t *pacemakerd,
void (*callback) (pacemakerd_t *pacemakerd,
time_t last_good,
enum pacemakerd_state state,
int rc, gpointer userdata),
gpointer userdata);
int (*set_disconnect_callback) (pacemakerd_t *pacemakerd,
void (*callback) (gpointer userdata),
gpointer userdata
);
int (*ping) (pacemakerd_t *pacemakerd, const char *name,
const char *admin_uuid, int call_options);
} pacemakerd_api_operations_t;

struct pacemakerd_s {
enum pacemakerd_conn_state conn_state;
void *pacemakerd_private;
pacemakerd_api_operations_t *cmds;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the methods here are invariant, I'd recommend something more like the crm_ipc_t model:

  • Keep just the typedef in the public header, and have the entire struct definition private
  • Add a getter method for conn_state if needed
  • Use regular functions as the methods rather than function pointers in a struct

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really want to break with that? crm_ipc_t is rather the exception so far while other APIs don't make much use of virtual methods either.
Getters and setters aren't optimized away as when used with C++ - right. But of course the impact is probably minor compared to the flexibility they offer over direct access.
Not seeing that we are actually using the possibilities so far but there are probably a lot of things that could be done with methods called via pointers.
Things coming to my mind is handling of deprecated stuff like on the fly insertion of a wrapper, add a new prototype of a method at the end while keeping the deprecated original with an altered name at the old place so that things compiled against the old types will still work while stuff compiled at the new versions will see the new prototype - all without version-handling and stuff.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see the value for a case like this, but I'm fine with keeping pointers if you want them. I'd still hide conn_state (callers shouldn't change it), and I'd put the pointers in the main struct rather than use a separate struct for them (since we're hiding everything else anyway).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we decide to walk away from the pointers on the longer run and with new APIs for now I have nothing against it. Just wanted to be sure we aren't overlooking some potential of what we already have before we alter it or proceed in an inconsistent way.
Having the connection-state visible was a decision for consistency and a little bit of performance. Was as well not sure if it could make sense to put a timestamp into it as well having some kind of default ping-handler that just updates this timestamp and does the
pinging/reconnecting stuff autonomously. But of course we can read that timestamp with a
getter as well - with single-threaded pacemaker we probably won't need a wrapper for atomic-access but who knows what the future will bring.
Of course there is no need for having the pointers in an extra allocated memory once we don't
want to grow the data in the base-structure and the function-pointer-list both in a compatible manner any more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I didn't think of being able to add to the bottom of each separately. Though even then, it would just be for visual organization -- you could mix new members and methods at the end of a single struct.

I like hiding the entire type partly because we have complete freedom over the implementation (don't have to add things at the end, can change types of existing members, etc.) and partly because it's impossible for callers to misuse it by statically declaring a struct or using sizeof() on it. Using the void* private technique gets most of that though.

};

pacemakerd_t * pacemakerd_api_new(void);
void pacemakerd_api_delete(pacemakerd_t * pacemakerd);
enum pacemakerd_state pacemakerd_state_text2enum(const char *state);
const char *pacemakerd_state_enum2text(enum pacemakerd_state state);

#ifdef __cplusplus
}
#endif

#endif // PACEMAKERD_TYPES__H
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

missing newline

7 changes: 7 additions & 0 deletions include/crm/msg_xml.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,13 @@ extern "C" {
# define XML_PING_ATTR_STATUS "result"
# define XML_PING_ATTR_SYSFROM "crm_subsystem"
# define XML_PING_ATTR_CRMDSTATE "crmd_state"
# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"

# define XML_TAG_FRAGMENT "cib_fragment"

Expand Down
1 change: 1 addition & 0 deletions lib/common/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ libcrmcommon_la_SOURCES += utils.c
libcrmcommon_la_SOURCES += watchdog.c
libcrmcommon_la_SOURCES += xml.c
libcrmcommon_la_SOURCES += xpath.c
libcrmcommon_la_SOURCES += pacemakerd_client.c

# It's possible to build the library adding ../gnu/md5.c directly to SOURCES,
# but distclean chokes on that because it tries to include the source's .Plo
Expand Down
Loading