diff options
author | Emmanuel Vasilakis <mrzammler@mm.st> | 2023-06-29 17:20:38 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-29 17:20:38 +0300 |
commit | f29145fe2b45096dd802ed3e9326c6b4f21062da (patch) | |
tree | 8cbb18c134982785955dfe85d54785b631ef8242 | |
parent | 9e58153a5a0577970ca185b8d65f031703001157 (diff) |
Misc alert fixes (#15274)
* rebase
* proper pointer
-rw-r--r-- | database/rrdcalc.c | 8 | ||||
-rw-r--r-- | database/rrdcalc.h | 2 | ||||
-rw-r--r-- | database/sqlite/sqlite_aclk_alert.c | 10 | ||||
-rw-r--r-- | database/sqlite/sqlite_health.c | 11 | ||||
-rw-r--r-- | database/sqlite/sqlite_health.h | 2 | ||||
-rw-r--r-- | health/health.c | 26 |
6 files changed, 32 insertions, 27 deletions
diff --git a/database/rrdcalc.c b/database/rrdcalc.c index f808602427..97db28d2eb 100644 --- a/database/rrdcalc.c +++ b/database/rrdcalc.c @@ -61,13 +61,13 @@ inline const char *rrdcalc_status2string(RRDCALC_STATUS status) { } } -uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id) { +uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id) { netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock); // re-use old IDs, by looking them up in the alarm log ALARM_ENTRY *ae = NULL; for(ae = host->health_log.alarms; ae ;ae = ae->next) { - if(unlikely(name == ae->name && chart == ae->chart)) { + if(unlikely(name == ae->name && chart == ae->chart && !uuid_memcmp(&ae->config_hash_id, config_hash_id))) { if(next_event_id) *next_event_id = ae->alarm_event_id + 1; break; } @@ -79,7 +79,7 @@ uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint3 alarm_id = ae->alarm_id; else { - alarm_id = sql_get_alarm_id(host, chart, name, next_event_id); + alarm_id = sql_get_alarm_id(host, chart, name, next_event_id, config_hash_id); if (!alarm_id) { if (unlikely(!host->health_log.next_alarm_id)) @@ -531,7 +531,7 @@ static void rrdcalc_rrdhost_insert_callback(const DICTIONARY_ITEM *item __maybe_ ; } - rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id); + rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id, &rc->config_hash_id); if(rc->calculation) { rc->calculation->status = &rc->status; diff --git a/database/rrdcalc.h b/database/rrdcalc.h index 2ec6551a97..2081452c7a 100644 --- a/database/rrdcalc.h +++ b/database/rrdcalc.h @@ -240,7 +240,7 @@ const char *rrdcalc_status2string(RRDCALC_STATUS status); void rrdcalc_free_unused_rrdcalc_loaded_from_config(RRDCALC *rc); -uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id); +uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id); void rrdcalc_add_from_rrdcalctemplate(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st, const char *overwrite_alert_name, const char *overwrite_dimensions); int rrdcalc_add_from_config(RRDHOST *host, RRDCALC *rc); diff --git a/database/sqlite/sqlite_aclk_alert.c b/database/sqlite/sqlite_aclk_alert.c index 7ef9ef8fb1..71836d7370 100644 --- a/database/sqlite/sqlite_aclk_alert.c +++ b/database/sqlite/sqlite_aclk_alert.c @@ -69,7 +69,7 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae) return 0; } - if (unlikely(uuid_is_null(ae->config_hash_id))) + if (unlikely(uuid_is_null(ae->config_hash_id))) return 0; char sql[ACLK_SYNC_QUERY_SIZE]; @@ -101,7 +101,6 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae) if (sqlite3_column_type(res, 1) != SQLITE_NULL) uuid_copy(config_hash_id, *((uuid_t *) sqlite3_column_blob(res, 1))); unique_id = (uint32_t) sqlite3_column_int64(res, 2); - } else { send = 1; goto done; @@ -118,11 +117,8 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae) } //same status, same config - if (ae->new_status == RRDCALC_STATUS_CLEAR || ae->new_status == RRDCALC_STATUS_UNDEFINED) { - send = 0; - update_filtered(ae, unique_id, uuid_str); - goto done; - } + send = 0; + update_filtered(ae, unique_id, uuid_str); done: rc = sqlite3_finalize(res); diff --git a/database/sqlite/sqlite_health.c b/database/sqlite/sqlite_health.c index 837e27d563..bde4b0c3a1 100644 --- a/database/sqlite/sqlite_health.c +++ b/database/sqlite/sqlite_health.c @@ -1612,9 +1612,9 @@ int health_migrate_old_health_log_table(char *table) { return 1; } -#define SQL_GET_ALARM_ID "select alarm_id, health_log_id from health_log where host_id = @host_id and chart = @chart and name = @name" +#define SQL_GET_ALARM_ID "select alarm_id, health_log_id from health_log where host_id = @host_id and chart = @chart and name = @name and config_hash_id = @config_hash_id" #define SQL_GET_EVENT_ID "select max(alarm_event_id) + 1 from health_log_detail where health_log_id = @health_log_id and alarm_id = @alarm_id" -uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id) +uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id) { int rc = 0; sqlite3_stmt *res = NULL; @@ -1648,6 +1648,13 @@ uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t * return alarm_id; } + rc = sqlite3_bind_blob(res, 4, config_hash_id, sizeof(*config_hash_id), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind config_hash_id parameter for SQL_GET_ALARM_ID."); + sqlite3_finalize(res); + return alarm_id; + } + while (sqlite3_step_monitored(res) == SQLITE_ROW) { alarm_id = (uint32_t) sqlite3_column_int64(res, 0); health_log_id = (uint64_t) sqlite3_column_int64(res, 1); diff --git a/database/sqlite/sqlite_health.h b/database/sqlite/sqlite_health.h index 29ed42707c..d92204ada1 100644 --- a/database/sqlite/sqlite_health.h +++ b/database/sqlite/sqlite_health.h @@ -16,7 +16,7 @@ void sql_aclk_alert_clean_dead_entries(RRDHOST *host); int sql_health_get_last_executed_event(RRDHOST *host, ALARM_ENTRY *ae, RRDCALC_STATUS *last_executed_status); void sql_health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after, char *chart); int health_migrate_old_health_log_table(char *table); -uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id); +uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id); void sql_health_alarm_log2json_v3(BUFFER *wb, DICTIONARY *alert_instances, time_t after, time_t before, const char *transition, uint32_t max, bool debug); bool sql_find_alert_transition(const char *transition, void (*cb)(const char *machine_guid, const char *context, time_t alert_id, void *data), void *data); #endif //NETDATA_SQLITE_HEALTH_H diff --git a/health/health.c b/health/health.c index 424f091909..ca0b564d5a 100644 --- a/health/health.c +++ b/health/health.c @@ -448,7 +448,7 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) { if (likely(ret == 1)) { // we have executed this alarm notification in the past - if(last_executed_status == ae->new_status) { + if(last_executed_status == ae->new_status && !(ae->flags & HEALTH_ENTRY_FLAG_IS_REPEATING)) { // don't send the notification for the same status again debug(D_HEALTH, "Health not sending again notification for alarm '%s.%s' status %s", ae_chart_name(ae), ae_name(ae) , rrdcalc_status2string(ae->new_status)); @@ -563,7 +563,7 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) { ae->old_value, ae->source?ae_source(ae):"UNKNOWN", (uint32_t)ae->duration, - (uint32_t)ae->non_clear_duration, + (ae->flags & HEALTH_ENTRY_FLAG_IS_REPEATING && ae->new_status >= RRDCALC_STATUS_WARNING) ? (uint32_t)ae->duration : (uint32_t)ae->non_clear_duration, ae_units(ae), ae_info(ae), ae_new_value_string(ae), @@ -636,17 +636,15 @@ static inline void health_alarm_log_process(RRDHOST *host) { ALARM_ENTRY *ae; for(ae = host->health_log.alarms; ae && ae->unique_id >= host->health_last_processed_id; ae = ae->next) { - if(likely(!(ae->flags & HEALTH_ENTRY_FLAG_IS_REPEATING))) { - if(unlikely( + if(unlikely( !(ae->flags & HEALTH_ENTRY_FLAG_PROCESSED) && !(ae->flags & HEALTH_ENTRY_FLAG_UPDATED) - )) { - if(unlikely(ae->unique_id < first_waiting)) - first_waiting = ae->unique_id; + )) { + if(unlikely(ae->unique_id < first_waiting)) + first_waiting = ae->unique_id; - if(likely(now >= ae->delay_up_to_timestamp)) - health_process_notifications(host, ae); - } + if(likely(now >= ae->delay_up_to_timestamp)) + health_process_notifications(host, ae); } } @@ -1431,6 +1429,12 @@ void *health_main(void *ptr) { rc->old_status = rc->status; rc->status = status; rc->ae = ae; + + if(unlikely(rrdcalc_isrepeating(rc))) { + rc->last_repeat = now; + if (rc->status == RRDCALC_STATUS_CLEAR) + rc->run_flags |= RRDCALC_FLAG_RUN_ONCE; + } } rc->last_updated = now; @@ -1471,7 +1475,6 @@ void *health_main(void *ptr) { worker_is_busy(WORKER_HEALTH_JOB_ALARM_LOG_ENTRY); rc->last_repeat = now; if (likely(rc->times_repeat < UINT32_MAX)) rc->times_repeat++; - ALARM_ENTRY *ae = health_create_alarm_entry( host, rc->id, @@ -1508,7 +1511,6 @@ void *health_main(void *ptr) { ae->flags |= HEALTH_ENTRY_RUN_ONCE; } rc->run_flags |= RRDCALC_FLAG_RUN_ONCE; - rc->ae = ae; health_process_notifications(host, ae); debug(D_HEALTH, "Notification sent for the repeating alarm %u.", ae->alarm_id); health_alarm_wait_for_execution(ae); |