summaryrefslogtreecommitdiffstats
path: root/health/health.c
diff options
context:
space:
mode:
authorMarkos Fountoulakis <44345837+mfundul@users.noreply.github.com>2020-05-14 11:57:20 +0300
committerGitHub <noreply@github.com>2020-05-14 11:57:20 +0300
commit6393b2f535c993de9f341d2245ad8ba327694281 (patch)
treed53a5b854c4075908c4c3921c372b9d75ba9617d /health/health.c
parenta606a27f164b1c704d850c838a7b89d6c6e0c17c (diff)
Improve the impact of health code on netdata scalability (#8407)
* Add support for spawning processes without pipes. * Port health_alarm_execute() from mypopen() to netdata_spawn() * Make alarm notifications asynchronous within a single health thread iteration * Initial version of spawn server. * preliminary integration of spawn client with health
Diffstat (limited to 'health/health.c')
-rw-r--r--health/health.c86
1 files changed, 72 insertions, 14 deletions
diff --git a/health/health.c b/health/health.c
index cbc7554cb4..0bf79e27d1 100644
--- a/health/health.c
+++ b/health/health.c
@@ -11,6 +11,46 @@ struct health_cmdapi_thread_status {
unsigned int default_health_enabled = 1;
char *silencers_filename;
+// the queue of executed alarm notifications that haven't been waited for yet
+static struct {
+ ALARM_ENTRY *head; // oldest
+ ALARM_ENTRY *tail; // latest
+} alarm_notifications_in_progress = {NULL, NULL};
+
+static inline void enqueue_alarm_notify_in_progress(ALARM_ENTRY *ae)
+{
+ ae->prev_in_progress = NULL;
+ ae->next_in_progress = NULL;
+
+ if (NULL != alarm_notifications_in_progress.tail) {
+ ae->prev_in_progress = alarm_notifications_in_progress.tail;
+ alarm_notifications_in_progress.tail->next_in_progress = ae;
+ }
+ if (NULL == alarm_notifications_in_progress.head) {
+ alarm_notifications_in_progress.head = ae;
+ }
+ alarm_notifications_in_progress.tail = ae;
+
+}
+
+static inline void unlink_alarm_notify_in_progress(ALARM_ENTRY *ae)
+{
+ struct alarm_entry *prev = ae->prev_in_progress;
+ struct alarm_entry *next = ae->next_in_progress;
+
+ if (NULL != prev) {
+ prev->next_in_progress = next;
+ }
+ if (NULL != next) {
+ next->prev_in_progress = prev;
+ }
+ if (ae == alarm_notifications_in_progress.head) {
+ alarm_notifications_in_progress.head = next;
+ }
+ if (ae == alarm_notifications_in_progress.tail) {
+ alarm_notifications_in_progress.tail = prev;
+ }
+}
// ----------------------------------------------------------------------------
// health initialization
@@ -265,7 +305,6 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
}
static char command_to_run[ALARM_EXEC_COMMAND_LENGTH + 1];
- pid_t command_pid;
const char *exec = (ae->exec) ? ae->exec : host->health_default_exec;
const char *recipient = (ae->recipient) ? ae->recipient : host->health_default_recipient;
@@ -321,25 +360,30 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
);
ae->flags |= HEALTH_ENTRY_FLAG_EXEC_RUN;
- ae->exec_run_timestamp = now_realtime_sec();
+ ae->exec_run_timestamp = now_realtime_sec(); /* will be updated by real time after spawning */
debug(D_HEALTH, "executing command '%s'", command_to_run);
- FILE *fp = mypopen(command_to_run, &command_pid);
- if(!fp) {
- error("HEALTH: Cannot popen(\"%s\", \"r\").", command_to_run);
- goto done;
- }
- debug(D_HEALTH, "HEALTH reading from command (discarding command's output)");
- char buffer[100 + 1];
- while(fgets(buffer, 100, fp) != NULL) ;
- ae->exec_code = mypclose(fp, command_pid);
+ ae->flags |= HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS;
+ ae->exec_spawn_serial = spawn_enq_cmd(command_to_run);
+ enqueue_alarm_notify_in_progress(ae);
+
+ return; //health_alarm_wait_for_execution
+done:
+ health_alarm_log_save(host, ae);
+}
+
+static inline void health_alarm_wait_for_execution(ALARM_ENTRY *ae) {
+ if (!(ae->flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS))
+ return;
+
+ spawn_wait_cmd(ae->exec_spawn_serial, &ae->exec_code, &ae->exec_run_timestamp);
debug(D_HEALTH, "done executing command - returned with code %d", ae->exec_code);
+ ae->flags &= ~HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS;
if(ae->exec_code != 0)
ae->flags |= HEALTH_ENTRY_FLAG_EXEC_FAILED;
-done:
- health_alarm_log_save(host, ae);
+ unlink_alarm_notify_in_progress(ae);
}
static inline void health_process_notifications(RRDHOST *host, ALARM_ENTRY *ae) {
@@ -401,6 +445,7 @@ static inline void health_alarm_log_process(RRDHOST *host) {
ALARM_ENTRY *t = ae->next;
if(likely(!alarm_entry_isrepeating(host, ae))) {
+ health_alarm_wait_for_execution(ae);
health_alarm_log_free_one_nochecks_nounlink(ae);
host->health_log.count--;
}
@@ -945,6 +990,7 @@ void *health_main(void *ptr) {
rc->rrdcalc_flags |= RRDCALC_FLAG_RUN_ONCE;
health_process_notifications(host, ae);
debug(D_HEALTH, "Notification sent for the repeating alarm %u.", ae->alarm_id);
+ health_alarm_wait_for_execution(ae);
health_alarm_log_free_one_nochecks_nounlink(ae);
}
}
@@ -959,11 +1005,23 @@ void *health_main(void *ptr) {
// and cleanup
health_alarm_log_process(host);
- if (unlikely(netdata_exit))
+ if (unlikely(netdata_exit)) {
+ // wait for all notifications to finish before allowing health to be cleaned up
+ ALARM_ENTRY *ae;
+ while (NULL != (ae = alarm_notifications_in_progress.head)) {
+ health_alarm_wait_for_execution(ae);
+ }
break;
+ }
} /* rrdhost_foreach */
+ // wait for all notifications to finish before allowing health to be cleaned up
+ ALARM_ENTRY *ae;
+ while (NULL != (ae = alarm_notifications_in_progress.head)) {
+ health_alarm_wait_for_execution(ae);
+ }
+
rrd_unlock();