diff options
author | Markos Fountoulakis <44345837+mfundul@users.noreply.github.com> | 2020-05-14 11:57:20 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-14 11:57:20 +0300 |
commit | 6393b2f535c993de9f341d2245ad8ba327694281 (patch) | |
tree | d53a5b854c4075908c4c3921c372b9d75ba9617d /health/health.c | |
parent | a606a27f164b1c704d850c838a7b89d6c6e0c17c (diff) |
Improve the impact of health code on netdata scalability (#8407)
* Add support for spawning processes without pipes.
* Port health_alarm_execute() from mypopen() to netdata_spawn()
* Make alarm notifications asynchronous within a single health thread iteration
* Initial version of spawn server.
* preliminary integration of spawn client with health
Diffstat (limited to 'health/health.c')
-rw-r--r-- | health/health.c | 86 |
1 files changed, 72 insertions, 14 deletions
diff --git a/health/health.c b/health/health.c index cbc7554cb4..0bf79e27d1 100644 --- a/health/health.c +++ b/health/health.c @@ -11,6 +11,46 @@ struct health_cmdapi_thread_status { unsigned int default_health_enabled = 1; char *silencers_filename; +// the queue of executed alarm notifications that haven't been waited for yet +static struct { + ALARM_ENTRY *head; // oldest + ALARM_ENTRY *tail; // latest +} alarm_notifications_in_progress = {NULL, NULL}; + +static inline void enqueue_alarm_notify_in_progress(ALARM_ENTRY *ae) +{ + ae->prev_in_progress = NULL; + ae->next_in_progress = NULL; + + if (NULL != alarm_notifications_in_progress.tail) { + ae->prev_in_progress = alarm_notifications_in_progress.tail; + alarm_notifications_in_progress.tail->next_in_progress = ae; + } + if (NULL == alarm_notifications_in_progress.head) { + alarm_notifications_in_progress.head = ae; + } + alarm_notifications_in_progress.tail = ae; + +} + +static inline void unlink_alarm_notify_in_progress(ALARM_ENTRY *ae) +{ + struct alarm_entry *prev = ae->prev_in_progress; + struct alarm_entry *next = ae->next_in_progress; + + if (NULL != prev) { + prev->next_in_progress = next; + } + if (NULL != next) { + next->prev_in_progress = prev; + } + if (ae == alarm_notifications_in_progress.head) { + alarm_notifications_in_progress.head = next; + } + if (ae == alarm_notifications_in_progress.tail) { + alarm_notifications_in_progress.tail = prev; + } +} // ---------------------------------------------------------------------------- // health initialization @@ -265,7 +305,6 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) { } static char command_to_run[ALARM_EXEC_COMMAND_LENGTH + 1]; - pid_t command_pid; const char *exec = (ae->exec) ? ae->exec : host->health_default_exec; const char *recipient = (ae->recipient) ? ae->recipient : host->health_default_recipient; @@ -321,25 +360,30 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) { ); ae->flags |= HEALTH_ENTRY_FLAG_EXEC_RUN; - ae->exec_run_timestamp = now_realtime_sec(); + ae->exec_run_timestamp = now_realtime_sec(); /* will be updated by real time after spawning */ debug(D_HEALTH, "executing command '%s'", command_to_run); - FILE *fp = mypopen(command_to_run, &command_pid); - if(!fp) { - error("HEALTH: Cannot popen(\"%s\", \"r\").", command_to_run); - goto done; - } - debug(D_HEALTH, "HEALTH reading from command (discarding command's output)"); - char buffer[100 + 1]; - while(fgets(buffer, 100, fp) != NULL) ; - ae->exec_code = mypclose(fp, command_pid); + ae->flags |= HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS; + ae->exec_spawn_serial = spawn_enq_cmd(command_to_run); + enqueue_alarm_notify_in_progress(ae); + + return; //health_alarm_wait_for_execution +done: + health_alarm_log_save(host, ae); +} + +static inline void health_alarm_wait_for_execution(ALARM_ENTRY *ae) { + if (!(ae->flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS)) + return; + + spawn_wait_cmd(ae->exec_spawn_serial, &ae->exec_code, &ae->exec_run_timestamp); debug(D_HEALTH, "done executing command - returned with code %d", ae->exec_code); + ae->flags &= ~HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS; if(ae->exec_code != 0) ae->flags |= HEALTH_ENTRY_FLAG_EXEC_FAILED; -done: - health_alarm_log_save(host, ae); + unlink_alarm_notify_in_progress(ae); } static inline void health_process_notifications(RRDHOST *host, ALARM_ENTRY *ae) { @@ -401,6 +445,7 @@ static inline void health_alarm_log_process(RRDHOST *host) { ALARM_ENTRY *t = ae->next; if(likely(!alarm_entry_isrepeating(host, ae))) { + health_alarm_wait_for_execution(ae); health_alarm_log_free_one_nochecks_nounlink(ae); host->health_log.count--; } @@ -945,6 +990,7 @@ void *health_main(void *ptr) { rc->rrdcalc_flags |= RRDCALC_FLAG_RUN_ONCE; health_process_notifications(host, ae); debug(D_HEALTH, "Notification sent for the repeating alarm %u.", ae->alarm_id); + health_alarm_wait_for_execution(ae); health_alarm_log_free_one_nochecks_nounlink(ae); } } @@ -959,11 +1005,23 @@ void *health_main(void *ptr) { // and cleanup health_alarm_log_process(host); - if (unlikely(netdata_exit)) + if (unlikely(netdata_exit)) { + // wait for all notifications to finish before allowing health to be cleaned up + ALARM_ENTRY *ae; + while (NULL != (ae = alarm_notifications_in_progress.head)) { + health_alarm_wait_for_execution(ae); + } break; + } } /* rrdhost_foreach */ + // wait for all notifications to finish before allowing health to be cleaned up + ALARM_ENTRY *ae; + while (NULL != (ae = alarm_notifications_in_progress.head)) { + health_alarm_wait_for_execution(ae); + } + rrd_unlock(); |