summaryrefslogtreecommitdiffstats
path: root/health/health_log.c
diff options
context:
space:
mode:
authorEmmanuel Vasilakis <mrzammler@mm.st>2021-09-19 14:11:17 +0300
committerGitHub <noreply@github.com>2021-09-19 14:11:17 +0300
commitb87473c481283bbd936661fb15c3bd63fa74dbca (patch)
tree62743e2236957dbb5ed3a0c1076ce6f89b76098d /health/health_log.c
parent686e5782d109fe79f2c5ec6aa44850f3e449b679 (diff)
Use sqlite to store the health log and alert configurations. (#11399)
* Rebased * use sql health log if it exists * store alert config in sqlite * move unlock before loop * fix warnings * remove hash message * check return from counting health log * remove check of hostname when reading log * try to create the health log table to catch accidental removals of it * fix warnings, cast values, report config_hash_id * use snprintfz, add info logging * remove unnecessary strdup and free * check if stored config hash is null * return if prepare statement fails * replace with static variables * remove replace info, free edit_command * remove setting cfg entries to NULL * change uuid_copy * check return of uuid_parse, and exit if its not valid * also free cfg * use address * removed health_alarm_entry_sql2json and sql_health_alarm_log_select_all * remove check for is_valid_alarm_id * replace lengths with GUID_LEN * use uuid_unparse_lower_fix * removed web api endopoint to get alert config * check for non null values for name, chart and family * include a date_updated field in alert_hash * for config hash, digest NULL string if value to digest is null * Use empty string instead of null
Diffstat (limited to 'health/health_log.c')
-rw-r--r--health/health_log.c57
1 files changed, 33 insertions, 24 deletions
diff --git a/health/health_log.c b/health/health_log.c
index bb9416fd2f..e5d0e0fa96 100644
--- a/health/health_log.c
+++ b/health/health_log.c
@@ -38,39 +38,41 @@ static inline void health_log_rotate(RRDHOST *host) {
}
if(unlikely(host->health_log_entries_written > rotate_every)) {
- health_alarm_log_close(host);
+ if(unlikely(host->health_log_fp)) {
+ health_alarm_log_close(host);
- char old_filename[FILENAME_MAX + 1];
- snprintfz(old_filename, FILENAME_MAX, "%s.old", host->health_log_filename);
+ char old_filename[FILENAME_MAX + 1];
+ snprintfz(old_filename, FILENAME_MAX, "%s.old", host->health_log_filename);
- if(unlink(old_filename) == -1 && errno != ENOENT)
- error("HEALTH [%s]: cannot remove old alarms log file '%s'", host->hostname, old_filename);
+ if(unlink(old_filename) == -1 && errno != ENOENT)
+ error("HEALTH [%s]: cannot remove old alarms log file '%s'", host->hostname, old_filename);
- if(link(host->health_log_filename, old_filename) == -1 && errno != ENOENT)
- error("HEALTH [%s]: cannot move file '%s' to '%s'.", host->hostname, host->health_log_filename, old_filename);
+ if(link(host->health_log_filename, old_filename) == -1 && errno != ENOENT)
+ error("HEALTH [%s]: cannot move file '%s' to '%s'.", host->hostname, host->health_log_filename, old_filename);
- if(unlink(host->health_log_filename) == -1 && errno != ENOENT)
- error("HEALTH [%s]: cannot remove old alarms log file '%s'", host->hostname, host->health_log_filename);
+ if(unlink(host->health_log_filename) == -1 && errno != ENOENT)
+ error("HEALTH [%s]: cannot remove old alarms log file '%s'", host->hostname, host->health_log_filename);
- // open it with truncate
- host->health_log_fp = fopen(host->health_log_filename, "w");
+ // open it with truncate
+ host->health_log_fp = fopen(host->health_log_filename, "w");
- if(host->health_log_fp)
- fclose(host->health_log_fp);
- else
- error("HEALTH [%s]: cannot truncate health log '%s'", host->hostname, host->health_log_filename);
+ if(host->health_log_fp)
+ fclose(host->health_log_fp);
+ else
+ error("HEALTH [%s]: cannot truncate health log '%s'", host->hostname, host->health_log_filename);
- host->health_log_fp = NULL;
+ host->health_log_fp = NULL;
- host->health_log_entries_written = 0;
- health_alarm_log_open(host);
+ host->health_log_entries_written = 0;
+ health_alarm_log_open(host);
+ }
}
}
inline void health_label_log_save(RRDHOST *host) {
health_log_rotate(host);
- if(likely(host->health_log_fp)) {
+ if(unlikely(host->health_log_fp)) {
BUFFER *wb = buffer_create(1024);
rrdhost_check_rdlock(host);
netdata_rwlock_rdlock(&host->labels.labels_rwlock);
@@ -101,7 +103,7 @@ inline void health_label_log_save(RRDHOST *host) {
inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
health_log_rotate(host);
- if(likely(host->health_log_fp)) {
+ if(unlikely(host->health_log_fp)) {
if(unlikely(fprintf(host->health_log_fp
, "%c\t%s"
"\t%08x\t%08x\t%08x\t%08x\t%08x"
@@ -155,7 +157,9 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
ae->flags |= HEALTH_ENTRY_FLAG_SAVED;
host->health_log_entries_written++;
}
- }
+ }else
+ sql_health_alarm_log_save(host, ae);
+
#ifdef ENABLE_ACLK
if (netdata_cloud_setting) {
if ((ae->new_status == RRDCALC_STATUS_WARNING || ae->new_status == RRDCALC_STATUS_CRITICAL) ||
@@ -392,9 +396,13 @@ static inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char
if(unlikely(*pointers[0] == 'A')) {
ae->next = host->health_log.alarms;
host->health_log.alarms = ae;
+ sql_health_alarm_log_insert(host, ae);
loaded++;
}
- else updated++;
+ else {
+ sql_health_alarm_log_update(host, ae);
+ updated++;
+ }
if(unlikely(ae->unique_id > host->health_max_unique_id))
host->health_max_unique_id = ae->unique_id;
@@ -444,8 +452,6 @@ inline void health_alarm_log_load(RRDHOST *host) {
health_alarm_log_read(host, fp, host->health_log_filename);
fclose(fp);
}
-
- health_alarm_log_open(host);
}
@@ -456,6 +462,7 @@ inline ALARM_ENTRY* health_create_alarm_entry(
RRDHOST *host,
uint32_t alarm_id,
uint32_t alarm_event_id,
+ uuid_t config_hash_id,
time_t when,
const char *name,
const char *chart,
@@ -487,6 +494,8 @@ inline ALARM_ENTRY* health_create_alarm_entry(
ae->hash_chart = simple_hash(ae->chart);
}
+ uuid_copy(ae->config_hash_id, *((uuid_t *) config_hash_id));
+
if(family)
ae->family = strdupz(family);