[icinga-checkins] icinga.org: icinga-core/dev/core: * core: avoid insane looping through event list when rescheduling checks ( Mathias Kettner, Andreas Ericsson) #2182

git at icinga.org git at icinga.org
Fri Jan 27 19:57:26 CET 2012


Module: icinga-core
Branch: dev/core
Commit: 809e6bb3bda07df3ba084a5dca2f8267213bf09d
URL:    https://git.icinga.org/?p=icinga-core.git;a=commit;h=809e6bb3bda07df3ba084a5dca2f8267213bf09d

Author: Michael Friedrich <michael.friedrich at univie.ac.at>
Date:   Fri Jan 27 19:35:50 2012 +0100

* core: avoid insane looping through event list when rescheduling checks (Mathias Kettner, Andreas Ericsson) #2182

please refer to https://dev.icinga.org/issues/2182
for a detailed analysis of the problem, the used
algorithms and the revamped logic.

kudos to Mathias and Andreas.

refs #2182

---

 Changelog         |    1 +
 base/checks.c     |   67 +++++++++++++++++++++++++++--------------------------
 include/objects.h |   12 ++++++++-
 3 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/Changelog b/Changelog
index 8f56dec..a688776 100644
--- a/Changelog
+++ b/Changelog
@@ -10,6 +10,7 @@ ENHANCEMENTS
 * core: notifications: Create contact list after eventbroker callbacks (Andreas Ericsson) #2110
 * core: fix event removal from queues with O(1) removal from doubly linked lists (Andreas Ericsson) #2183
 * core: avoid senseless looping when free()'ing macros (Andreas Ericsson) #2184
+* core: avoid insane looping through event list when rescheduling checks (Mathias Kettner, Andreas Ericsson) #2182
 
 FIXES
 * core: Plug some macro leaks triggered when sending notifications (Andreas Ericsson) #2109
diff --git a/base/checks.c b/base/checks.c
index f23b469..f288466 100644
--- a/base/checks.c
+++ b/base/checks.c
@@ -404,6 +404,13 @@ int run_scheduled_service_check(service *svc, int check_options, double latency)
 	log_debug_info(DEBUGL_FUNCTIONS, 0, "run_scheduled_service_check() start\n");
 	log_debug_info(DEBUGL_CHECKS, 0, "Attempting to run scheduled check of service '%s' on host '%s': check options=%d, latency=%lf\n", svc->description, svc->host_name, check_options, latency);
 
+	/*
+	 * reset the next_check_event so we know it's
+	 * no longer in the scheduling queue
+	 * and can't conflict
+	 */
+	svc->next_check_event = NULL;
+	
 	/* attempt to run the check */
 	result = run_async_service_check(svc, check_options, latency, TRUE, TRUE, &time_is_valid, &preferred_time);
 
@@ -1858,7 +1865,6 @@ int handle_async_service_check_result(service *temp_service, check_result *queue
 void schedule_service_check(service *svc, time_t check_time, int options) {
 	timed_event *temp_event = NULL;
 	timed_event *new_event = NULL;
-	int found = FALSE;
 	int use_original_event = TRUE;
 
 	log_debug_info(DEBUGL_FUNCTIONS, 0, "schedule_service_check()\n");
@@ -1885,24 +1891,16 @@ void schedule_service_check(service *svc, time_t check_time, int options) {
 
 	/* default is to use the new event */
 	use_original_event = FALSE;
-	found = FALSE;
 
-#ifdef PERFORMANCE_INCREASE_BUT_VERY_BAD_IDEA_INDEED
-	/* WARNING! 1/19/07 on-demand async service checks will end up causing mutliple scheduled checks of a service to appear in the queue if the code below is skipped */
-	/* if(use_large_installation_tweaks==FALSE)... skip code below */
-#endif
-
-	/* see if there are any other scheduled checks of this service in the queue */
-	for (temp_event = event_list_low; temp_event != NULL; temp_event = temp_event->next) {
+	/* fetch possible saved next check event */
+	temp_event = (timed_event *)svc->next_check_event;
 
-		if (temp_event->event_type == EVENT_SERVICE_CHECK && svc == (service *)temp_event->event_data) {
-			found = TRUE;
-			break;
-		}
-	}
-
-	/* we found another service check event for this service in the queue - what should we do? */
-	if (found == TRUE && temp_event != NULL) {
+	/*
+	 * if the service already has a check scheduled
+	 * we need to decide wether the original or the
+	 * new event will be used
+	 */
+	if (temp_event != NULL) {
 
 		log_debug_info(DEBUGL_CHECKS, 2, "Found another service check event for this service @ %s", ctime(&temp_event->run_time));
 
@@ -1948,6 +1946,8 @@ void schedule_service_check(service *svc, time_t check_time, int options) {
 		/* else we're using the new event, so remove the old one */
 		else {
 			remove_event(temp_event, &event_list_low, &event_list_low_tail);
+			/* save new event for later */
+			svc->next_check_event = new_event;
 			my_free(temp_event);
 		}
 	}
@@ -2343,7 +2343,6 @@ int perform_scheduled_host_check(host *hst, int check_options, double latency) {
 void schedule_host_check(host *hst, time_t check_time, int options) {
 	timed_event *temp_event = NULL;
 	timed_event *new_event = NULL;
-	int found = FALSE;
 	int use_original_event = TRUE;
 
 
@@ -2370,23 +2369,16 @@ void schedule_host_check(host *hst, time_t check_time, int options) {
 
 	/* default is to use the new event */
 	use_original_event = FALSE;
-	found = FALSE;
 
-#ifdef PERFORMANCE_INCREASE_BUT_VERY_BAD_IDEA_INDEED
-	/* WARNING! 1/19/07 on-demand async host checks will end up causing mutliple scheduled checks of a host to appear in the queue if the code below is skipped */
-	/* if(use_large_installation_tweaks==FALSE)... skip code below */
-#endif
-
-	/* see if there are any other scheduled checks of this host in the queue */
-	for (temp_event = event_list_low; temp_event != NULL; temp_event = temp_event->next) {
-		if (temp_event->event_type == EVENT_HOST_CHECK && hst == (host *)temp_event->event_data) {
-			found = TRUE;
-			break;
-		}
-	}
+        /* fetch possible saved next check event */
+	temp_event = (timed_event *)hst->next_check_event;
 
-	/* we found another host check event for this host in the queue - what should we do? */
-	if (found == TRUE && temp_event != NULL) {
+	/*
+	 * if the service already has a check scheduled
+	 * we need to decide wether the original or the
+	 * new event will be used
+	 */
+	if (temp_event != NULL) {
 
 		log_debug_info(DEBUGL_CHECKS, 2, "Found another host check event for this host @ %s", ctime(&temp_event->run_time));
 
@@ -2432,6 +2424,8 @@ void schedule_host_check(host *hst, time_t check_time, int options) {
 		/* else use the new event, so remove the old */
 		else {
 			remove_event(temp_event, &event_list_low, &event_list_low_tail);
+			/* save new event for later */
+			hst->next_check_event = new_event;
 			my_free(temp_event);
 		}
 	}
@@ -3037,6 +3031,13 @@ int run_scheduled_host_check_3x(host *hst, int check_options, double latency) {
 
 	log_debug_info(DEBUGL_CHECKS, 0, "Attempting to run scheduled check of host '%s': check options=%d, latency=%lf\n", hst->name, check_options, latency);
 
+	/*
+	 * reset the next_check_event so we know it's
+	 * no longer in the scheduling queue
+	 * and can't conflict
+	 */
+	hst->next_check_event = NULL;
+	
 	/* attempt to run the check */
 	result = run_async_host_check_3x(hst, check_options, latency, TRUE, TRUE, &time_is_valid, &preferred_time);
 
diff --git a/include/objects.h b/include/objects.h
index 5d1b06f..e1635cc 100644
--- a/include/objects.h
+++ b/include/objects.h
@@ -401,7 +401,11 @@ struct host_struct{
 	objectlist *hostgroups_ptr;
 #endif
 	struct  host_struct *next;
-	struct  host_struct *nexthash;
+	/* recycle this currently unused attribute
+		struct  host_struct *nexthash;
+	 * see #2182 for more info
+	 */
+	void *next_check_event;
 	/* 2011-02-07 MF: added for keeping the command for NEB callback
 	   PROCESSED state on host|service checks  */
 	char	*processed_command;
@@ -544,7 +548,11 @@ struct service_struct{
 	objectlist *servicegroups_ptr;
 #endif
 	struct service_struct *next;
-	struct service_struct *nexthash;
+	/* recycle this currently unused attribute
+		struct service_struct *nexthash;
+	 * see #2182 for more info
+	 */
+	 void *next_check_event;
 	/* 2011-02-07 MF: added for keeping the command for NEB callback
 	   PROCESSED state on host|service checks  */
 	char	*processed_command;





More information about the icinga-checkins mailing list