summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEmmanuel Vasilakis <mrzammler@mm.st>2023-06-29 17:20:38 +0300
committerGitHub <noreply@github.com>2023-06-29 17:20:38 +0300
commitf29145fe2b45096dd802ed3e9326c6b4f21062da (patch)
tree8cbb18c134982785955dfe85d54785b631ef8242
parent9e58153a5a0577970ca185b8d65f031703001157 (diff)
Misc alert fixes (#15274)
* rebase * proper pointer
-rw-r--r--database/rrdcalc.c8
-rw-r--r--database/rrdcalc.h2
-rw-r--r--database/sqlite/sqlite_aclk_alert.c10
-rw-r--r--database/sqlite/sqlite_health.c11
-rw-r--r--database/sqlite/sqlite_health.h2
-rw-r--r--health/health.c26
6 files changed, 32 insertions, 27 deletions
diff --git a/database/rrdcalc.c b/database/rrdcalc.c
index f808602427..97db28d2eb 100644
--- a/database/rrdcalc.c
+++ b/database/rrdcalc.c
@@ -61,13 +61,13 @@ inline const char *rrdcalc_status2string(RRDCALC_STATUS status) {
}
}
-uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id) {
+uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id) {
netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
// re-use old IDs, by looking them up in the alarm log
ALARM_ENTRY *ae = NULL;
for(ae = host->health_log.alarms; ae ;ae = ae->next) {
- if(unlikely(name == ae->name && chart == ae->chart)) {
+ if(unlikely(name == ae->name && chart == ae->chart && !uuid_memcmp(&ae->config_hash_id, config_hash_id))) {
if(next_event_id) *next_event_id = ae->alarm_event_id + 1;
break;
}
@@ -79,7 +79,7 @@ uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint3
alarm_id = ae->alarm_id;
else {
- alarm_id = sql_get_alarm_id(host, chart, name, next_event_id);
+ alarm_id = sql_get_alarm_id(host, chart, name, next_event_id, config_hash_id);
if (!alarm_id) {
if (unlikely(!host->health_log.next_alarm_id))
@@ -531,7 +531,7 @@ static void rrdcalc_rrdhost_insert_callback(const DICTIONARY_ITEM *item __maybe_
;
}
- rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id);
+ rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id, &rc->config_hash_id);
if(rc->calculation) {
rc->calculation->status = &rc->status;
diff --git a/database/rrdcalc.h b/database/rrdcalc.h
index 2ec6551a97..2081452c7a 100644
--- a/database/rrdcalc.h
+++ b/database/rrdcalc.h
@@ -240,7 +240,7 @@ const char *rrdcalc_status2string(RRDCALC_STATUS status);
void rrdcalc_free_unused_rrdcalc_loaded_from_config(RRDCALC *rc);
-uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id);
+uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id);
void rrdcalc_add_from_rrdcalctemplate(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st, const char *overwrite_alert_name, const char *overwrite_dimensions);
int rrdcalc_add_from_config(RRDHOST *host, RRDCALC *rc);
diff --git a/database/sqlite/sqlite_aclk_alert.c b/database/sqlite/sqlite_aclk_alert.c
index 7ef9ef8fb1..71836d7370 100644
--- a/database/sqlite/sqlite_aclk_alert.c
+++ b/database/sqlite/sqlite_aclk_alert.c
@@ -69,7 +69,7 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae)
return 0;
}
- if (unlikely(uuid_is_null(ae->config_hash_id)))
+ if (unlikely(uuid_is_null(ae->config_hash_id)))
return 0;
char sql[ACLK_SYNC_QUERY_SIZE];
@@ -101,7 +101,6 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae)
if (sqlite3_column_type(res, 1) != SQLITE_NULL)
uuid_copy(config_hash_id, *((uuid_t *) sqlite3_column_blob(res, 1)));
unique_id = (uint32_t) sqlite3_column_int64(res, 2);
-
} else {
send = 1;
goto done;
@@ -118,11 +117,8 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae)
}
//same status, same config
- if (ae->new_status == RRDCALC_STATUS_CLEAR || ae->new_status == RRDCALC_STATUS_UNDEFINED) {
- send = 0;
- update_filtered(ae, unique_id, uuid_str);
- goto done;
- }
+ send = 0;
+ update_filtered(ae, unique_id, uuid_str);
done:
rc = sqlite3_finalize(res);
diff --git a/database/sqlite/sqlite_health.c b/database/sqlite/sqlite_health.c
index 837e27d563..bde4b0c3a1 100644
--- a/database/sqlite/sqlite_health.c
+++ b/database/sqlite/sqlite_health.c
@@ -1612,9 +1612,9 @@ int health_migrate_old_health_log_table(char *table) {
return 1;
}
-#define SQL_GET_ALARM_ID "select alarm_id, health_log_id from health_log where host_id = @host_id and chart = @chart and name = @name"
+#define SQL_GET_ALARM_ID "select alarm_id, health_log_id from health_log where host_id = @host_id and chart = @chart and name = @name and config_hash_id = @config_hash_id"
#define SQL_GET_EVENT_ID "select max(alarm_event_id) + 1 from health_log_detail where health_log_id = @health_log_id and alarm_id = @alarm_id"
-uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id)
+uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id)
{
int rc = 0;
sqlite3_stmt *res = NULL;
@@ -1648,6 +1648,13 @@ uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *
return alarm_id;
}
+ rc = sqlite3_bind_blob(res, 4, config_hash_id, sizeof(*config_hash_id), SQLITE_STATIC);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind config_hash_id parameter for SQL_GET_ALARM_ID.");
+ sqlite3_finalize(res);
+ return alarm_id;
+ }
+
while (sqlite3_step_monitored(res) == SQLITE_ROW) {
alarm_id = (uint32_t) sqlite3_column_int64(res, 0);
health_log_id = (uint64_t) sqlite3_column_int64(res, 1);
diff --git a/database/sqlite/sqlite_health.h b/database/sqlite/sqlite_health.h
index 29ed42707c..d92204ada1 100644
--- a/database/sqlite/sqlite_health.h
+++ b/database/sqlite/sqlite_health.h
@@ -16,7 +16,7 @@ void sql_aclk_alert_clean_dead_entries(RRDHOST *host);
int sql_health_get_last_executed_event(RRDHOST *host, ALARM_ENTRY *ae, RRDCALC_STATUS *last_executed_status);
void sql_health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after, char *chart);
int health_migrate_old_health_log_table(char *table);
-uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id);
+uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id);
void sql_health_alarm_log2json_v3(BUFFER *wb, DICTIONARY *alert_instances, time_t after, time_t before, const char *transition, uint32_t max, bool debug);
bool sql_find_alert_transition(const char *transition, void (*cb)(const char *machine_guid, const char *context, time_t alert_id, void *data), void *data);
#endif //NETDATA_SQLITE_HEALTH_H
diff --git a/health/health.c b/health/health.c
index 424f091909..ca0b564d5a 100644
--- a/health/health.c
+++ b/health/health.c
@@ -448,7 +448,7 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
if (likely(ret == 1)) {
// we have executed this alarm notification in the past
- if(last_executed_status == ae->new_status) {
+ if(last_executed_status == ae->new_status && !(ae->flags & HEALTH_ENTRY_FLAG_IS_REPEATING)) {
// don't send the notification for the same status again
debug(D_HEALTH, "Health not sending again notification for alarm '%s.%s' status %s", ae_chart_name(ae), ae_name(ae)
, rrdcalc_status2string(ae->new_status));
@@ -563,7 +563,7 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
ae->old_value,
ae->source?ae_source(ae):"UNKNOWN",
(uint32_t)ae->duration,
- (uint32_t)ae->non_clear_duration,
+ (ae->flags & HEALTH_ENTRY_FLAG_IS_REPEATING && ae->new_status >= RRDCALC_STATUS_WARNING) ? (uint32_t)ae->duration : (uint32_t)ae->non_clear_duration,
ae_units(ae),
ae_info(ae),
ae_new_value_string(ae),
@@ -636,17 +636,15 @@ static inline void health_alarm_log_process(RRDHOST *host) {
ALARM_ENTRY *ae;
for(ae = host->health_log.alarms; ae && ae->unique_id >= host->health_last_processed_id; ae = ae->next) {
- if(likely(!(ae->flags & HEALTH_ENTRY_FLAG_IS_REPEATING))) {
- if(unlikely(
+ if(unlikely(
!(ae->flags & HEALTH_ENTRY_FLAG_PROCESSED) &&
!(ae->flags & HEALTH_ENTRY_FLAG_UPDATED)
- )) {
- if(unlikely(ae->unique_id < first_waiting))
- first_waiting = ae->unique_id;
+ )) {
+ if(unlikely(ae->unique_id < first_waiting))
+ first_waiting = ae->unique_id;
- if(likely(now >= ae->delay_up_to_timestamp))
- health_process_notifications(host, ae);
- }
+ if(likely(now >= ae->delay_up_to_timestamp))
+ health_process_notifications(host, ae);
}
}
@@ -1431,6 +1429,12 @@ void *health_main(void *ptr) {
rc->old_status = rc->status;
rc->status = status;
rc->ae = ae;
+
+ if(unlikely(rrdcalc_isrepeating(rc))) {
+ rc->last_repeat = now;
+ if (rc->status == RRDCALC_STATUS_CLEAR)
+ rc->run_flags |= RRDCALC_FLAG_RUN_ONCE;
+ }
}
rc->last_updated = now;
@@ -1471,7 +1475,6 @@ void *health_main(void *ptr) {
worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY);
rc->last_repeat = now;
if (likely(rc->times_repeat < UINT32_MAX)) rc->times_repeat++;
-
ALARM_ENTRY *ae = health_create_alarm_entry(
host,
rc->id,
@@ -1508,7 +1511,6 @@ void *health_main(void *ptr) {
ae->flags |= HEALTH_ENTRY_RUN_ONCE;
}
rc->run_flags |= RRDCALC_FLAG_RUN_ONCE;
- rc->ae = ae;
health_process_notifications(host, ae);
debug(D_HEALTH, "Notification sent for the repeating alarm %u.", ae->alarm_id);
health_alarm_wait_for_execution(ae);