Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix [#873]: Don't notify if parents down #995

35 changes: 18 additions & 17 deletions base/checks.c
Original file line number Diff line number Diff line change
Expand Up @@ -1208,6 +1208,8 @@ int handle_async_service_check_result(service *svc, check_result *cr)

host * hst = NULL;

int notification_type = NOTIFICATION_NORMAL;

log_debug_info(DEBUGL_FUNCTIONS, 0, "handle_async_service_check_result()\n");

if (svc == NULL) {
Expand Down Expand Up @@ -1590,10 +1592,16 @@ int handle_async_service_check_result(service *svc, check_result *cr)
handle_event = TRUE;
}

// Recovery notification was not sent
if(svc->notified_on != 0 && svc->current_state == STATE_OK && svc->state_type == HARD_STATE) {
notification_type = NOTIFICATION_RECOVERY;
send_notification = TRUE;
}

if (send_notification == TRUE) {

/* send notification */
if (service_notification(svc, NOTIFICATION_NORMAL, NULL, NULL, NOTIFICATION_OPTION_NONE) == OK) {
if (service_notification(svc, notification_type, NULL, NULL, NOTIFICATION_OPTION_NONE) == OK) {

/* log state due to notification event when stalking_options N is set */
if (should_stalk_notifications(svc)) {
Expand All @@ -1602,13 +1610,6 @@ int handle_async_service_check_result(service *svc, check_result *cr)
}
}

/* the service recovered, so reset the current notification number and state flags (after the recovery notification has gone out) */
/* We don't want to reset notifications if the service is currently flapping because we want recovery notifications */
if(svc->current_state == STATE_OK && svc->state_type == HARD_STATE && hard_state_change == TRUE && svc->is_flapping == FALSE) {
svc->current_notification_number = 0;
svc->notified_on = 0;
}

if (obsess_over_services == TRUE) {
obsessive_compulsive_service_check_processor(svc);
}
Expand All @@ -1634,7 +1635,6 @@ int handle_async_service_check_result(service *svc, check_result *cr)

/* Reset attempts */
if (hard_state_change == TRUE) {
svc->current_notification_number = 0;
svc->host_problem_at_last_check = FALSE;
}

Expand Down Expand Up @@ -2255,6 +2255,8 @@ int handle_async_host_check_result(host *hst, check_result *cr)

char * old_plugin_output = NULL;

int notification_type = NOTIFICATION_NORMAL;

log_debug_info(DEBUGL_FUNCTIONS, 0, "handle_async_host_check_result()\n");

if (is_valid_check_result_data(hst, cr) == FALSE) {
Expand Down Expand Up @@ -2482,10 +2484,16 @@ int handle_async_host_check_result(host *hst, check_result *cr)
hst->current_attempt = 1;
}

// Recovery notification was not sent
if(hst->notified_on != 0 && hst->current_state == HOST_UP && hst->state_type == HARD_STATE) {
notification_type = NOTIFICATION_RECOVERY;
send_notification = TRUE;
}

if (send_notification == TRUE) {

/* send notifications */
if (host_notification(hst, NOTIFICATION_NORMAL, NULL, NULL, NOTIFICATION_OPTION_NONE) == OK) {
if (host_notification(hst, notification_type, NULL, NULL, NOTIFICATION_OPTION_NONE) == OK) {

/* log state due to notification event when stalking_options N is set */
if (should_stalk_notifications(hst)) {
Expand All @@ -2494,13 +2502,6 @@ int handle_async_host_check_result(host *hst, check_result *cr)
}
}

/* the host recovered, so reset the current notification number and state flags (after the recovery notification has gone out) */
/* We don't want to reset notifications if the host is currently flapping because we want recovery notifications */
if(hst->current_state == HOST_UP && hst->state_type == HARD_STATE && hard_state_change == TRUE && hst->is_flapping == FALSE) {
hst->current_notification_number = 0;
hst->notified_on = 0;
}

if (obsess_over_hosts == TRUE) {
obsessive_compulsive_host_check_processor(hst);
}
Expand Down
10 changes: 2 additions & 8 deletions base/commands.c
Original file line number Diff line number Diff line change
Expand Up @@ -5390,10 +5390,7 @@ void clear_host_flapping_state(host *hst) {

/* should we send a recovery notification? */
if (hst->current_state == HOST_UP && hst->check_flapping_recovery_notification == TRUE) {
host_notification(hst, NOTIFICATION_NORMAL, NULL, NULL, NOTIFICATION_OPTION_NONE);
/* Similar to what happens when a host recovers in handle_async_host_check_result */
hst->current_notification_number = 0;
hst->notified_on = 0;
host_notification(hst, NOTIFICATION_RECOVERY, NULL, NULL, NOTIFICATION_OPTION_NONE);
}
}

Expand Down Expand Up @@ -5450,10 +5447,7 @@ void clear_service_flapping_state(service *svc) {

/* should we send a recovery notification? */
if (svc->current_state == STATE_OK && svc->check_flapping_recovery_notification == TRUE) {
service_notification(svc, NOTIFICATION_NORMAL, NULL, NULL, NOTIFICATION_OPTION_NONE);
/* Similar to what happens when a service recovers in handle_async_service_check_result */
svc->current_notification_number = 0;
svc->notified_on = 0;
service_notification(svc, NOTIFICATION_RECOVERY, NULL, NULL, NOTIFICATION_OPTION_NONE);
}
}

Expand Down
20 changes: 4 additions & 16 deletions base/flapping.c
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,7 @@ void clear_service_flap(service *svc, double percent_change, double high_thresho

/* should we send a recovery notification? */
if(svc->check_flapping_recovery_notification == TRUE && svc->current_state == STATE_OK) {
service_notification(svc, NOTIFICATION_NORMAL, NULL, NULL, NOTIFICATION_OPTION_NONE);
/* Similar to what happens when a service recovers in handle_async_service_check_result */
svc->current_notification_number = 0;
svc->notified_on = 0;
service_notification(svc, NOTIFICATION_RECOVERY, NULL, NULL, NOTIFICATION_OPTION_NONE);
}
}

Expand Down Expand Up @@ -470,10 +467,7 @@ void clear_host_flap(host *hst, double percent_change, double high_threshold, do

/* should we send a recovery notification? */
if(hst->check_flapping_recovery_notification == TRUE && hst->current_state == HOST_UP) {
host_notification(hst, NOTIFICATION_NORMAL, NULL, NULL, NOTIFICATION_OPTION_NONE);
/* Similar to what happens when a host recovers in handle_async_host_check_result */
hst->current_notification_number = 0;
hst->notified_on = 0;
host_notification(hst, NOTIFICATION_RECOVERY, NULL, NULL, NOTIFICATION_OPTION_NONE);
}
}

Expand Down Expand Up @@ -663,10 +657,7 @@ void handle_host_flap_detection_disabled(host *hst) {

/* should we send a recovery notification? */
if(hst->check_flapping_recovery_notification == TRUE && hst->current_state == HOST_UP) {
host_notification(hst, NOTIFICATION_NORMAL, NULL, NULL, NOTIFICATION_OPTION_NONE);
/* Similar to what happens when a host recovers in handle_async_host_check_result */
hst->current_notification_number = 0;
hst->notified_on = 0;
host_notification(hst, NOTIFICATION_RECOVERY, NULL, NULL, NOTIFICATION_OPTION_NONE);
}

/* clear the recovery notification flag */
Expand Down Expand Up @@ -781,10 +772,7 @@ void handle_service_flap_detection_disabled(service *svc) {

/* should we send a recovery notification? */
if(svc->check_flapping_recovery_notification == TRUE && svc->current_state == STATE_OK) {
service_notification(svc, NOTIFICATION_NORMAL, NULL, NULL, NOTIFICATION_OPTION_NONE);
/* Similar to what happens when a service recovers in handle_async_service_check_result */
svc->current_notification_number = 0;
svc->notified_on = 0;
service_notification(svc, NOTIFICATION_RECOVERY, NULL, NULL, NOTIFICATION_OPTION_NONE);
}

/* clear the recovery notification flag */
Expand Down
88 changes: 65 additions & 23 deletions base/notifications.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,19 @@ int service_notification(service *svc, int type, char *not_author, char *not_dat
/* check the viability of sending out a service notification */
if(check_service_notification_viability(svc, type, options) == ERROR) {
log_debug_info(DEBUGL_NOTIFICATIONS, 0, "Notification viability test failed. No notification will be sent out.\n");
// Recovery notification wasn't sent because they don't want it. Still have to modify values
if(type == NOTIFICATION_RECOVERY && ! flag_isset(svc->notification_options, OPT_RECOVERY)) {
svc->notified_on = 0;
svc->current_notification_number = 0;
}
/* Set next_notification time if we're in a downtime and
notification_interval = zero, otherwise if the service
doesn't recover after downtime ends, it will never send
the notification */
if (svc->notification_interval == 0.0) {
if (temp_host->scheduled_downtime_depth > 0 || svc->scheduled_downtime_depth > 0)
svc->next_notification = current_time;
}
if (svc->notification_interval == 0.0) {
if (temp_host->scheduled_downtime_depth > 0 || svc->scheduled_downtime_depth > 0)
svc->next_notification = current_time;
}
return ERROR;
}

Expand Down Expand Up @@ -212,7 +217,7 @@ int service_notification(service *svc, int type, char *not_author, char *not_dat
mac.x[MACRO_NOTIFICATIONTYPE] = "DOWNTIMECANCELLED";
else if(type == NOTIFICATION_CUSTOM)
mac.x[MACRO_NOTIFICATIONTYPE] = "CUSTOM";
else if(svc->current_state == STATE_OK)
else if(type == NOTIFICATION_RECOVERY)
mac.x[MACRO_NOTIFICATIONTYPE] = "RECOVERY";
else
mac.x[MACRO_NOTIFICATIONTYPE] = "PROBLEM";
Expand Down Expand Up @@ -276,7 +281,7 @@ int service_notification(service *svc, int type, char *not_author, char *not_dat
clear_servicegroup_macros_r(&mac);
clear_datetime_macros_r(&mac);

if(type == NOTIFICATION_NORMAL) {
if(type == NOTIFICATION_NORMAL || type == NOTIFICATION_RECOVERY) {

/* adjust last/next notification time and notification flags if we notified someone */
if(contacts_notified > 0) {
Expand All @@ -289,8 +294,14 @@ int service_notification(service *svc, int type, char *not_author, char *not_dat
/* update the last notification time for this service (this is needed for rescheduling later notifications) */
svc->last_notification = current_time;

/* update notifications flags */
add_notified_on(svc, svc->current_state);
/* update notifications flags, make sure to consider recovery */
if (type == NOTIFICATION_NORMAL) {
add_notified_on(svc, svc->current_state);
}
else {
svc->notified_on = 0;
svc->current_notification_number = 0;
}
}

/* we didn't end up notifying anyone */
Expand Down Expand Up @@ -603,6 +614,20 @@ int check_service_notification_viability(service *svc, int type, int options) {
return ERROR;
}

/* If any of the parents are down, don't notify */
if (temp_host->parent_hosts != NULL) {
hostsmember *temp_hostsmember = NULL;
host *parent_host = NULL;

for(temp_hostsmember = temp_host->parent_hosts; temp_hostsmember != NULL; temp_hostsmember = temp_hostsmember->next) {
parent_host = temp_hostsmember->host_ptr;
if (parent_host->current_state != HOST_UP) {
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "At least one parent (%s) is down, so we won't notify about this service.\n", parent_host->name);
return ERROR;
}
}
}

/* don't notify if we haven't waited long enough since the last time (and the service is not marked as being volatile) */
if((current_time < svc->next_notification) && svc->is_volatile == FALSE) {
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "We haven't waited long enough to re-notify contacts about this service.\n");
Expand Down Expand Up @@ -1065,6 +1090,11 @@ int host_notification(host *hst, int type, char *not_author, char *not_data, int

/* check viability of sending out a host notification */
if(check_host_notification_viability(hst, type, options) == ERROR) {
// Recovery notification wasn't sent because they don't want it. Still have to modify values
if(type == NOTIFICATION_RECOVERY && ! flag_isset(hst->notification_options, OPT_RECOVERY)) {
hst->notified_on = 0;
hst->current_notification_number = 0;
}
log_debug_info(DEBUGL_NOTIFICATIONS, 0, "Notification viability test failed. No notification will be sent out.\n");
return ERROR;
}
Expand Down Expand Up @@ -1169,7 +1199,7 @@ int host_notification(host *hst, int type, char *not_author, char *not_data, int
mac.x[MACRO_NOTIFICATIONTYPE] = "DOWNTIMECANCELLED";
else if(type == NOTIFICATION_CUSTOM)
mac.x[MACRO_NOTIFICATIONTYPE] = "CUSTOM";
else if(hst->current_state == HOST_UP)
else if(type == NOTIFICATION_RECOVERY)
mac.x[MACRO_NOTIFICATIONTYPE] = "RECOVERY";
else
mac.x[MACRO_NOTIFICATIONTYPE] = "PROBLEM";
Expand Down Expand Up @@ -1233,7 +1263,7 @@ int host_notification(host *hst, int type, char *not_author, char *not_data, int
clear_hostgroup_macros_r(&mac);
clear_datetime_macros_r(&mac);

if(type == NOTIFICATION_NORMAL) {
if(type == NOTIFICATION_NORMAL || type == NOTIFICATION_RECOVERY) {

/* adjust last/next notification time and notification flags if we notified someone */
if(contacts_notified > 0) {
Expand All @@ -1245,7 +1275,13 @@ int host_notification(host *hst, int type, char *not_author, char *not_data, int
hst->last_notification = current_time;

/* update notifications flags */
add_notified_on(hst, hst->current_state);
if(type == NOTIFICATION_NORMAL) {
add_notified_on(hst, hst->current_state);
}
else {
hst->notified_on = 0;
hst->current_notification_number = 0;
}

log_debug_info(DEBUGL_NOTIFICATIONS, 0, "%d contacts were notified. Next possible notification time: %s", contacts_notified, ctime(&hst->next_notification));
}
Expand Down Expand Up @@ -1449,21 +1485,13 @@ int check_host_notification_viability(host *hst, int type, int options) {
}

/* see if we should notify about problems with this host */
if((hst->notification_options & (1 << hst->current_state)) == FALSE) {
if(should_notify(hst) == FALSE) {
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "We shouldn't notify about %s status for this host.\n", host_state_name(hst->current_state));
return ERROR;
}
if(hst->current_state == HOST_UP) {

if((hst->notification_options & OPT_RECOVERY) == FALSE) {
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "We shouldn't notify about RECOVERY states for this host.\n");
return ERROR;
}
if(!hst->notified_on) {
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "We shouldn't notify about this recovery.\n");
return ERROR;
}

if(hst->current_state == HOST_UP && hst->notified_on == 0) {
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "We shouldn't notify about this recovery.\n");
return ERROR;
}

/* see if enough time has elapsed for first notification (Mathias Sundman) */
Expand Down Expand Up @@ -1510,6 +1538,20 @@ int check_host_notification_viability(host *hst, int type, int options) {
return ERROR;
}

/* If any of the parents are down, don't notify */
if (hst->parent_hosts != NULL) {
hostsmember *temp_hostsmember = NULL;
host *parent_host = NULL;

for(temp_hostsmember = hst->parent_hosts; temp_hostsmember != NULL; temp_hostsmember = temp_hostsmember->next) {
parent_host = temp_hostsmember->host_ptr;
if (parent_host->current_state != HOST_UP) {
log_debug_info(DEBUGL_NOTIFICATIONS, 1, "At least one parent (%s) is down, so we won't notify about this host.\n", parent_host->name);
return ERROR;
}
}
}

return OK;
}

Expand Down
1 change: 1 addition & 0 deletions include/nagios.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ extern struct load_control loadctl;
#define NOTIFICATION_DOWNTIMEEND 6
#define NOTIFICATION_DOWNTIMECANCELLED 7
#define NOTIFICATION_CUSTOM 8
#define NOTIFICATION_RECOVERY 16
tsadpbb marked this conversation as resolved.
Show resolved Hide resolved



Expand Down
1 change: 1 addition & 0 deletions t-tap/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ test_xsddefault
test_commands
test_downtime
test_strtoul
test_notifications
*.dSYM
4 changes: 4 additions & 0 deletions t-tap/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ TESTS += test_downtime
TESTS += test_nagios_config
TESTS += test_timeperiods
TESTS += test_macros
TESTS += test_notifications

XSD_OBJS = $(BLD_CGI)/statusdata-cgi.o $(BLD_CGI)/xstatusdata-cgi.o
XSD_OBJS += $(BLD_CGI)/objects-cgi.o $(BLD_CGI)/xobjects-cgi.o
Expand Down Expand Up @@ -104,6 +105,9 @@ test_timeperiods: test_timeperiods.o $(TP_OBJS) $(TAPOBJ)
test_macros: test_macros.o $(TP_OBJS) $(TAPOBJ)
$(CC) $(CFLAGS) -o $@ $^ $(BLD_BASE)/checks.o $(BROKER_LDFLAGS) $(LDFLAGS) $(MATHLIBS) $(SOCKETLIBS) $(LIBS)

test_notifications: test_notifications.o $(BLD_BASE)/notifications.o $(BLD_BASE)/checks.o $(BLD_BASE)/utils.o $(BLD_COMMON)/shared.o $(BLD_BASE)/objects-base.o $(TAPOBJ)
$(CC) $(CFLAGS) -o $@ $^ $(LIBS) $(MATHLIBS)

test_xsddefault: test_xsddefault.o $(XSD_OBJS) $(TAPOBJ)
$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)

Expand Down
9 changes: 9 additions & 0 deletions t-tap/stub_broker.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,12 @@ void broker_timed_event(int type, int flags, int attr, timed_event *event, struc

void broker_flapping_data(int type, int flags, int attr, int flapping_type, void *data, double percent_change, double high_threshold, double low_threshold, struct timeval *timestamp)
{ }

int broker_contact_notification_data(int type, int flags, int attr, int notification_type, int reason_type, struct timeval start_time, struct timeval end_time, void *data, contact *cntct, char *ack_author, char *ack_data, int escalated, struct timeval *timestamp)
{ return OK; }

int broker_contact_notification_method_data(int type, int flags, int attr, int notification_type, int reason_type, struct timeval start_time, struct timeval end_time, void *data, contact *cntct, char *cmd, char *ack_author, char *ack_data, int escalated, struct timeval *timestamp)
{ return OK; }

int broker_notification_data(int type, int flags, int attr, int notification_type, int reason_type, struct timeval start_time, struct timeval end_time, void *data, char *ack_author, char *ack_data, int escalated, int contacts_notified, struct timeval *timestamp)
{ return OK; }
Loading