summaryrefslogtreecommitdiffstats
path: root/database
diff options
context:
space:
mode:
authorEmmanuel Vasilakis <mrzammler@mm.st>2023-06-21 15:39:43 +0300
committerGitHub <noreply@github.com>2023-06-21 15:39:43 +0300
commit6e1e97c5e8d136162368e16d27d7aa09b047f8c6 (patch)
tree4d20904c201c87d8a1670a83018aabaf0d4f3398 /database
parentd8661b8e0ce7648037bfa5dc7f535261d98ac7c9 (diff)
Use a single health log table (#15157)
* move old health log tables to one * change table in sqlite_health * remove check for off period of agent * changes in aclk_alert * fixes * add new field insert_mark_timestamp * cleanup * remove hostname, create the health log table during sqlite init * create the health_log during migration * move source from health_log to alert_hash. Remove class, component and type field from health_log * Register now_usec sqlite function * use global_id instead of insert_mark_timestamp. Use function now_usec to populate it * create functions earlier to have them during migration * small unit test fix * create additional health_log_detail table. Do the insert of an alert event on both * do the update on health_log_detail * change more queries * more indexes, fix inject removed * change last executed and select health log queries * random uuid for sqlite * do migration from old tables * queries to send alerts to cloud * cleanup queries * get an alarm id from db if not found in memory * small fix on query * add info when migration completes * dont pick health_log_detail during migration * check proper old health_log table * safer migration * proper log sent alerts. small fix in claimed cleanup * cleanups * extra check for cleanup * also get an alarm_event_id from sql * check for empty source * remove cleanup of main health log table --------- Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Diffstat (limited to 'database')
-rw-r--r--database/rrdcalc.c10
-rw-r--r--database/rrdcalc.h1
-rw-r--r--database/sqlite/sqlite_aclk.h12
-rw-r--r--database/sqlite/sqlite_aclk_alert.c235
-rw-r--r--database/sqlite/sqlite_db_migration.c73
-rw-r--r--database/sqlite/sqlite_functions.c79
-rw-r--r--database/sqlite/sqlite_functions.h2
-rw-r--r--database/sqlite/sqlite_health.c897
-rw-r--r--database/sqlite/sqlite_health.h4
9 files changed, 890 insertions, 423 deletions
diff --git a/database/rrdcalc.c b/database/rrdcalc.c
index 948ebe8a55..69b749dd00 100644
--- a/database/rrdcalc.c
+++ b/database/rrdcalc.c
@@ -52,10 +52,14 @@ uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint3
alarm_id = ae->alarm_id;
else {
- if (unlikely(!host->health_log.next_alarm_id))
- host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
+ alarm_id = sql_get_alarm_id(host, chart, name, next_event_id);
- alarm_id = host->health_log.next_alarm_id++;
+ if (!alarm_id) {
+ if (unlikely(!host->health_log.next_alarm_id))
+ host->health_log.next_alarm_id = (uint32_t)now_realtime_sec();
+
+ alarm_id = host->health_log.next_alarm_id++;
+ }
}
netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
diff --git a/database/rrdcalc.h b/database/rrdcalc.h
index 3b48d74ec2..b9a9f627dc 100644
--- a/database/rrdcalc.h
+++ b/database/rrdcalc.h
@@ -211,6 +211,7 @@ struct alert_config {
STRING *repeat;
STRING *host_labels;
STRING *chart_labels;
+ STRING *source;
STRING *p_db_lookup_dimensions;
STRING *p_db_lookup_method;
diff --git a/database/sqlite/sqlite_aclk.h b/database/sqlite/sqlite_aclk.h
index d555a0cef6..705102d741 100644
--- a/database/sqlite/sqlite_aclk.h
+++ b/database/sqlite/sqlite_aclk.h
@@ -27,12 +27,20 @@ static inline void uuid_unparse_lower_fix(uuid_t *uuid, char *out)
out[23] = '_';
}
+static inline int uuid_parse_fix(char *in, uuid_t uuid)
+{
+ in[8] = '-';
+ in[13] = '-';
+ in[18] = '-';
+ in[23] = '-';
+ return uuid_parse(in, uuid);
+}
+
static inline int claimed()
{
return localhost->aclk_state.claimed_id != NULL;
}
-
#define TABLE_ACLK_ALERT "CREATE TABLE IF NOT EXISTS aclk_alert_%s (sequence_id INTEGER PRIMARY KEY, " \
"alert_unique_id, date_created, date_submitted, date_cloud_ack, filtered_alert_unique_id NOT NULL, " \
"unique(alert_unique_id));"
@@ -79,6 +87,8 @@ struct aclk_sync_host_config {
char uuid_str[UUID_STR_LEN];
char node_id[UUID_STR_LEN];
char *alerts_snapshot_uuid; // will contain the snapshot_uuid value if snapshot was requested
+ uint64_t alerts_log_first_sequence_id;
+ uint64_t alerts_log_last_sequence_id;
};
extern sqlite3 *db_meta;
diff --git a/database/sqlite/sqlite_aclk_alert.c b/database/sqlite/sqlite_aclk_alert.c
index 52d343acba..c58bf4bd47 100644
--- a/database/sqlite/sqlite_aclk_alert.c
+++ b/database/sqlite/sqlite_aclk_alert.c
@@ -7,37 +7,7 @@
#include "../../aclk/aclk_alarm_api.h"
#endif
-#define SQL_GET_ALERT_REMOVE_TIME "SELECT when_key FROM health_log_%s WHERE alarm_id = %u " \
- "AND unique_id > %u AND unique_id < %u " \
- "AND new_status = -2;"
-
-time_t removed_when(uint32_t alarm_id, uint32_t before_unique_id, uint32_t after_unique_id, char *uuid_str) {
- sqlite3_stmt *res = NULL;
- time_t when = 0;
- char sql[ACLK_SYNC_QUERY_SIZE];
-
- snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, SQL_GET_ALERT_REMOVE_TIME, uuid_str, alarm_id, after_unique_id, before_unique_id);
-
- int rc = sqlite3_prepare_v2(db_meta, sql, -1, &res, 0);
- if (rc != SQLITE_OK) {
- error_report("Failed to prepare statement when trying to find removed gap.");
- return 0;
- }
-
- rc = sqlite3_step_monitored(res);
- if (likely(rc == SQLITE_ROW)) {
- when = (time_t) sqlite3_column_int64(res, 0);
- }
-
- rc = sqlite3_finalize(res);
- if (unlikely(rc != SQLITE_OK))
- error_report("Failed to finalize statement when trying to find removed gap, rc = %d", rc);
-
- return when;
-}
-
#define SQL_UPDATE_FILTERED_ALERT "UPDATE aclk_alert_%s SET filtered_alert_unique_id = %u where filtered_alert_unique_id = %u"
-
void update_filtered(ALARM_ENTRY *ae, uint32_t unique_id, char *uuid_str) {
char sql[ACLK_SYNC_QUERY_SIZE];
snprintfz(sql, ACLK_SYNC_QUERY_SIZE-1, SQL_UPDATE_FILTERED_ALERT, uuid_str, ae->unique_id, unique_id);
@@ -45,17 +15,16 @@ void update_filtered(ALARM_ENTRY *ae, uint32_t unique_id, char *uuid_str) {
ae->flags |= HEALTH_ENTRY_FLAG_ACLK_QUEUED;
}
-#define SQL_SELECT_ALERT_BY_UNIQUE_ID "SELECT hl.unique_id FROM health_log_%s hl, alert_hash ah WHERE hl.unique_id = %u " \
- "AND hl.config_hash_id = ah.hash_id " \
+#define SQL_SELECT_VARIABLE_ALERT_BY_UNIQUE_ID "SELECT hld.unique_id FROM health_log hl, alert_hash ah, health_log_detail hld WHERE hld.unique_id = %u " \
+ "AND hl.config_hash_id = ah.hash_id AND hld.health_log_id = hl.health_log_id AND host_id = @host_id " \
"AND ah.warn IS NULL AND ah.crit IS NULL;"
-
-static inline bool is_event_from_alert_variable_config(uint32_t unique_id, char *uuid_str) {
+static inline bool is_event_from_alert_variable_config(uint32_t unique_id, uuid_t *host_id) {
sqlite3_stmt *res = NULL;
int rc = 0;
bool ret = false;
char sql[ACLK_SYNC_QUERY_SIZE];
- snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, SQL_SELECT_ALERT_BY_UNIQUE_ID, uuid_str, unique_id);
+ snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, SQL_SELECT_VARIABLE_ALERT_BY_UNIQUE_ID, unique_id);
rc = sqlite3_prepare_v2(db_meta, sql, -1, &res, 0);
if (rc != SQLITE_OK) {
@@ -63,6 +32,13 @@ static inline bool is_event_from_alert_variable_config(uint32_t unique_id, char
return false;
}
+ rc = sqlite3_bind_blob(res, 1, host_id, sizeof(*host_id), SQLITE_STATIC);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind host_id for checking alert variable.");
+ sqlite3_finalize(res);
+ return false;
+ }
+
rc = sqlite3_step_monitored(res);
if (likely(rc == SQLITE_ROW)) {
ret = true;
@@ -76,13 +52,12 @@ static inline bool is_event_from_alert_variable_config(uint32_t unique_id, char
}
#define MAX_REMOVED_PERIOD 604800 //a week
-//decide if some events should be sent or not
-
-#define SQL_SELECT_ALERT_BY_ID "SELECT hl.new_status, hl.config_hash_id, hl.unique_id FROM health_log_%s hl, aclk_alert_%s aa " \
- "WHERE hl.unique_id = aa.filtered_alert_unique_id " \
- "AND hl.alarm_id = %u " \
- "ORDER BY alarm_event_id DESC LIMIT 1;"
+//decide if some events should be sent or not
+#define SQL_SELECT_ALERT_BY_ID "SELECT hld.new_status, hl.config_hash_id, hld.unique_id FROM health_log hl, aclk_alert_%s aa, health_log_detail hld " \
+ "WHERE hld.unique_id = aa.filtered_alert_unique_id " \
+ "AND hld.alarm_id = %u AND hl.host_id = @host_id AND hl.health_log_id = hld.health_log_id " \
+ "ORDER BY hld.alarm_event_id DESC LIMIT 1;"
int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae)
{
sqlite3_stmt *res = NULL;
@@ -104,7 +79,7 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae)
//get the previous sent event of this alarm_id
//base the search on the last filtered event
- snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, SQL_SELECT_ALERT_BY_ID, uuid_str, uuid_str, ae->alarm_id);
+ snprintfz(sql,ACLK_SYNC_QUERY_SIZE-1, SQL_SELECT_ALERT_BY_ID, uuid_str, ae->alarm_id);
int rc = sqlite3_prepare_v2(db_meta, sql, -1, &res, 0);
if (rc != SQLITE_OK) {
@@ -113,6 +88,13 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae)
return send;
}
+ rc = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind host_id for checking alert variable.");
+ sqlite3_finalize(res);
+ return false;
+ }
+
rc = sqlite3_step_monitored(res);
if (likely(rc == SQLITE_ROW)) {
status = (RRDCALC_STATUS) sqlite3_column_int(res, 0);
@@ -142,20 +124,6 @@ int should_send_to_cloud(RRDHOST *host, ALARM_ENTRY *ae)
goto done;
}
- //detect a long off period of the agent, TODO make global
- if (ae->new_status == RRDCALC_STATUS_WARNING || ae->new_status == RRDCALC_STATUS_CRITICAL) {
- time_t when = removed_when(ae->alarm_id, ae->unique_id, unique_id, uuid_str);
-
- if (when && (when + (time_t)MAX_REMOVED_PERIOD) < ae->when) {
- send = 1;
- goto done;
- } else {
- send = 0;
- update_filtered(ae, unique_id, uuid_str);
- goto done;
- }
- }
-
done:
rc = sqlite3_finalize(res);
if (unlikely(rc != SQLITE_OK))
@@ -164,12 +132,8 @@ done:
return send;
}
-// will replace call to aclk_update_alarm in health/health_log.c
-// and handle both cases
-
#define SQL_QUEUE_ALERT_TO_CLOUD "INSERT INTO aclk_alert_%s (alert_unique_id, date_created, filtered_alert_unique_id) " \
"VALUES (@alert_unique_id, unixepoch(), @alert_unique_id) ON CONFLICT (alert_unique_id) do nothing;"
-
int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae, int skip_filter)
{
if(!service_running(SERVICE_ACLK))
@@ -193,7 +157,7 @@ int sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae, int skip_filter)
char uuid_str[UUID_STR_LEN];
uuid_unparse_lower_fix(&host->host_uuid, uuid_str);
- if (is_event_from_alert_variable_config(ae->unique_id, uuid_str))
+ if (is_event_from_alert_variable_config(ae->unique_id, &host->host_uuid))
return 0;
sqlite3_stmt *res_alert = NULL;
@@ -305,21 +269,18 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
sqlite3_stmt *res = NULL;
- buffer_sprintf(sql, "select aa.sequence_id, hl.unique_id, hl.alarm_id, hl.config_hash_id, hl.updated_by_id, hl.when_key, " \
- " hl.duration, hl.non_clear_duration, hl.flags, hl.exec_run_timestamp, hl.delay_up_to_timestamp, hl.name, " \
- " hl.chart, hl.family, hl.exec, hl.recipient, hl.source, hl.units, hl.info, hl.exec_code, hl.new_status, " \
- " hl.old_status, hl.delay, hl.new_value, hl.old_value, hl.last_repeat, hl.chart_context, hl.transition_id, hl.alarm_event_id " \
- " from health_log_%s hl, aclk_alert_%s aa " \
- " where hl.unique_id = aa.alert_unique_id and aa.date_submitted is null " \
- " order by aa.sequence_id asc limit %d;", wc->uuid_str, wc->uuid_str, limit);
+ buffer_sprintf(sql, "select aa.sequence_id, hld.unique_id, hld.alarm_id, hl.config_hash_id, hld.updated_by_id, hld.when_key, " \
+ " hld.duration, hld.non_clear_duration, hld.flags, hld.exec_run_timestamp, hld.delay_up_to_timestamp, hl.name, " \
+ " hl.chart, hl.family, hl.exec, hl.recipient, ha.source, hl.units, hld.info, hld.exec_code, hld.new_status, " \
+ " hld.old_status, hld.delay, hld.new_value, hld.old_value, hld.last_repeat, hl.chart_context, hld.transition_id, hld.alarm_event_id " \
+ " from health_log hl, aclk_alert_%s aa, alert_hash ha, health_log_detail hld " \
+ " where hld.unique_id = aa.alert_unique_id and hl.config_hash_id = ha.hash_id and aa.date_submitted is null " \
+ " and hl.host_id = @host_id and hl.health_log_id = hld.health_log_id " \
+ " order by aa.sequence_id asc limit %d;", wc->uuid_str, limit);
rc = sqlite3_prepare_v2(db_meta, buffer_tostring(sql), -1, &res, 0);
if (rc != SQLITE_OK) {
- // Try to create tables
- if (wc->host)
- sql_create_health_log_table(wc->host);
-
BUFFER *sql_fix = buffer_create(1024, &netdata_buffers_statistics.buffers_sqlite);
buffer_sprintf(sql_fix, TABLE_ACLK_ALERT, wc->uuid_str);
rc = db_execute(db_meta, buffer_tostring(sql_fix));
@@ -344,10 +305,15 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
}
}
- uint64_t first_sequence_id = 0;
- uint64_t last_sequence_id = 0;
- static __thread uint64_t log_first_sequence_id = 0;
- static __thread uint64_t log_last_sequence_id = 0;
+ rc = sqlite3_bind_blob(res, 1, &wc->host->host_uuid, sizeof(wc->host->host_uuid), SQLITE_STATIC);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind host_id for pushing alert event.");
+ sqlite3_finalize(res);
+ return;
+ }
+
+ uint64_t first_sequence_id = 0;
+ uint64_t last_sequence_id = 0;
while (sqlite3_step_monitored(res) == SQLITE_ROW) {
struct alarm_log_entry alarm_log;
@@ -371,7 +337,8 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
alarm_log.timezone = strdupz(rrdhost_abbrev_timezone(wc->host));
alarm_log.exec_path = sqlite3_column_bytes(res, 14) > 0 ? strdupz((char *)sqlite3_column_text(res, 14)) :
strdupz((char *)string2str(wc->host->health.health_default_exec));
- alarm_log.conf_source = strdupz((char *)sqlite3_column_text(res, 16));
+
+ alarm_log.conf_source = sqlite3_column_bytes(res, 16) > 0 ? strdupz((char *)sqlite3_column_text(res, 16)) : strdupz("");
char *edit_command = sqlite3_column_bytes(res, 16) > 0 ?
health_edit_command_from_source((char *)sqlite3_column_text(res, 16)) :
@@ -420,11 +387,11 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
if (first_sequence_id == 0)
first_sequence_id = (uint64_t) sqlite3_column_int64(res, 0);
- if (log_first_sequence_id == 0)
- log_first_sequence_id = (uint64_t) sqlite3_column_int64(res, 0);
+ if (wc->alerts_log_first_sequence_id == 0)
+ wc->alerts_log_first_sequence_id = (uint64_t) sqlite3_column_int64(res, 0);
last_sequence_id = (uint64_t) sqlite3_column_int64(res, 0);
- log_last_sequence_id = (uint64_t) sqlite3_column_int64(res, 0);
+ wc->alerts_log_last_sequence_id = (uint64_t) sqlite3_column_int64(res, 0);
destroy_alarm_log_entry(&alarm_log);
freez(edit_command);
@@ -443,15 +410,15 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
rrdhost_flag_set(wc->host, RRDHOST_FLAG_ACLK_STREAM_ALERTS);
} else {
- if (log_first_sequence_id)
+ if (wc->alerts_log_first_sequence_id)
log_access(
"ACLK RES [%s (%s)]: ALERTS SENT from %" PRIu64 " to %" PRIu64 "",
wc->node_id,
wc->host ? rrdhost_hostname(wc->host) : "N/A",
- log_first_sequence_id,
- log_last_sequence_id);
- log_first_sequence_id = 0;
- log_last_sequence_id = 0;
+ wc->alerts_log_first_sequence_id,
+ wc->alerts_log_last_sequence_id);
+ wc->alerts_log_first_sequence_id = 0;
+ wc->alerts_log_last_sequence_id = 0;
}
rc = sqlite3_finalize(res);
@@ -486,17 +453,49 @@ void sql_queue_existing_alerts_to_aclk(RRDHOST *host)
char uuid_str[UUID_STR_LEN];
uuid_unparse_lower_fix(&host->host_uuid, uuid_str);
BUFFER *sql = buffer_create(1024, &netdata_buffers_statistics.buffers_sqlite);
+ sqlite3_stmt *res = NULL;
+ int rc;
- buffer_sprintf(sql,"delete from aclk_alert_%s; " \
- "insert into aclk_alert_%s (alert_unique_id, date_created, filtered_alert_unique_id) " \
- "select unique_id alert_unique_id, unixepoch(), unique_id alert_unique_id from health_log_%s " \
- "where new_status <> 0 and new_status <> -2 and config_hash_id is not null and updated_by_id = 0 " \
- "order by unique_id asc on conflict (alert_unique_id) do nothing;", uuid_str, uuid_str, uuid_str);
+ netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock);
- netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock);
+ buffer_sprintf(sql, "delete from aclk_alert_%s; ", uuid_str);
+ if (unlikely(db_execute(db_meta, buffer_tostring(sql)))) {
+ netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
+ buffer_free(sql);
+ return;
+ }
- if (unlikely(db_execute(db_meta, buffer_tostring(sql))))
- error_report("Failed to queue existing ACLK alert events for host %s", rrdhost_hostname(host));
+ buffer_flush(sql);
+ buffer_sprintf(sql, "insert into aclk_alert_%s (alert_unique_id, date_created, filtered_alert_unique_id) " \
+ "select hld.unique_id alert_unique_id, unixepoch(), hld.unique_id alert_unique_id from health_log_detail hld, health_log hl " \
+ "where hld.new_status <> 0 and hld.new_status <> -2 and hl.health_log_id = hld.health_log_id and hl.config_hash_id is not null " \
+ "and hld.updated_by_id = 0 and hl.host_id = @host_id order by hld.unique_id asc on conflict (alert_unique_id) do nothing;", uuid_str);
+
+ rc = sqlite3_prepare_v2(db_meta, buffer_tostring(sql), -1, &res, 0);
+ if (rc != SQLITE_OK) {
+ error_report("Failed to prepare statement when trying to queue existing alerts.");
+ netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
+ buffer_free(sql);
+ return;
+ }
+
+ rc = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind host_id for when trying to queue existing alerts.");
+ sqlite3_finalize(res);
+ netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
+ buffer_free(sql);
+ return;
+ }
+
+ rc = execute_insert(res);
+ if (unlikely(rc != SQLITE_DONE)) {
+ error_report("Failed to queue existing alerts, rc = %d", rc);
+ }
+
+ rc = sqlite3_finalize(res);
+ if (unlikely(rc != SQLITE_OK))
+ error_report("Failed to finalize statement to queue existing alerts, rc = %d", rc);
netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock);
@@ -523,7 +522,6 @@ void aclk_send_alarm_configuration(char *config_hash)
"module, charts, families, lookup, every, units, green, red, calc, warn, crit, to_key, exec, delay, repeat, info," \
"options, host_labels, p_db_lookup_dimensions, p_db_lookup_method, p_db_lookup_options, p_db_lookup_after," \
"p_db_lookup_before, p_update_every FROM alert_hash WHERE hash_id = @hash_id;"
-
int aclk_push_alert_config_event(char *node_id __maybe_unused, char *config_hash __maybe_unused)
{
int rc = 0;
@@ -685,13 +683,11 @@ void aclk_start_alert_streaming(char *node_id, bool resets)
}
#define SQL_QUEUE_REMOVE_ALERTS "INSERT INTO aclk_alert_%s (alert_unique_id, date_created, filtered_alert_unique_id) " \
- "SELECT unique_id alert_unique_id, UNIXEPOCH(), unique_id alert_unique_id FROM health_log_%s " \
- "WHERE new_status = -2 AND updated_by_id = 0 AND unique_id NOT IN " \
- "(SELECT alert_unique_id FROM aclk_alert_%s) " \
- "AND config_hash_id NOT IN (select hash_id from alert_hash where warn is null and crit is null) " \
- "ORDER BY unique_id ASC " \
- "ON CONFLICT (alert_unique_id) DO NOTHING;"
-
+ "SELECT hld.unique_id alert_unique_id, UNIXEPOCH(), hld.unique_id alert_unique_id FROM health_log hl, health_log_detail hld " \
+ "WHERE hl.host_id = @host_id AND hl.health_log_id = hld.health_log_id AND hld.new_status = -2 AND hld.updated_by_id = 0 " \
+ "AND hld.unique_id NOT IN (SELECT alert_unique_id FROM aclk_alert_%s) " \
+ "AND hl.config_hash_id NOT IN (select hash_id from alert_hash where warn is null and crit is null) " \
+ "ORDER BY hld.unique_id ASC ON CONFLICT (alert_unique_id) DO NOTHING;"
void sql_process_queue_removed_alerts_to_aclk(char *node_id)
{
struct aclk_sync_host_config *wc;
@@ -702,15 +698,35 @@ void sql_process_queue_removed_alerts_to_aclk(char *node_id)
return;
char sql[ACLK_SYNC_QUERY_SIZE * 2];
+ sqlite3_stmt *res = NULL;
- snprintfz(sql,ACLK_SYNC_QUERY_SIZE * 2 - 1, SQL_QUEUE_REMOVE_ALERTS, wc->uuid_str, wc->uuid_str, wc->uuid_str);
+ snprintfz(sql, ACLK_SYNC_QUERY_SIZE * 2 - 1, SQL_QUEUE_REMOVE_ALERTS, wc->uuid_str, wc->uuid_str);
- if (unlikely(db_execute(db_meta, sql))) {
- log_access("ACLK STA [%s (%s)]: QUEUED REMOVED ALERTS FAILED", wc->node_id, rrdhost_hostname(wc->host));
- error_report("Failed to queue ACLK alert removed entries for host %s", rrdhost_hostname(wc->host));
+ int rc = sqlite3_prepare_v2(db_meta, sql, -1, &res, 0);
+ if (rc != SQLITE_OK) {
+ error_report("Failed to prepare statement when trying to queue removed alerts.");
+ return;
+ }
+
+ rc = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind host_id for when trying to queue remvoed alerts.");
+ sqlite3_finalize(res);
+ return;
+ }
+
+ rc = execute_insert(res);
+ if (unlikely(rc != SQLITE_DONE)) {
+ sqlite3_finalize(res);
+ error_report("Failed to queue removed alerts, rc = %d", rc);
+ return;
}
- else
- log_access("ACLK STA [%s (%s)]: QUEUED REMOVED ALERTS", wc->node_id, rrdhost_hostname(wc->host));
+
+ rc = sqlite3_finalize(res);
+ if (unlikely(rc != SQLITE_OK))
+ error_report("Failed to finalize statement to queue removed alerts, rc = %d", rc);
+
+ log_access("ACLK STA [%s (%s)]: QUEUED REMOVED ALERTS", wc->node_id, rrdhost_hostname(wc->host));
rrdhost_flag_set(wc->host, RRDHOST_FLAG_ACLK_STREAM_ALERTS);
wc->alert_queue_removed = 0;
@@ -886,7 +902,7 @@ void aclk_push_alert_snapshot_event(char *node_id __maybe_unused)
if (have_recent_alarm(host, ae->alarm_id, ae->unique_id))
continue;
- if (is_event_from_alert_variable_config(ae->unique_id, uuid_str))
+ if (is_event_from_alert_variable_config(ae->unique_id, &host->host_uuid))
continue;
cnt++;
@@ -918,7 +934,7 @@ void aclk_push_alert_snapshot_event(char *node_id __maybe_unused)
if (have_recent_alarm(host, ae->alarm_id, ae->unique_id))
continue;
- if (is_event_from_alert_variable_config(ae->unique_id, uuid_str))
+ if (is_event_from_alert_variable_config(ae->unique_id, &host->host_uuid))
continue;
cnt++;
@@ -984,7 +1000,6 @@ void sql_aclk_alert_clean_dead_entries(RRDHOST *host)
#define SQL_GET_MIN_MAX_ALERT_SEQ "SELECT MIN(sequence_id), MAX(sequence_id), " \
"(SELECT MAX(sequence_id) FROM aclk_alert_%s WHERE date_submitted IS NOT NULL) " \
"FROM aclk_alert_%s WHERE date_submitted IS NULL;"
-
int get_proto_alert_status(RRDHOST *host, struct proto_alert_status *proto_alert_status)
{
int rc;
diff --git a/database/sqlite/sqlite_db_migration.c b/database/sqlite/sqlite_db_migration.c
index 9c7235fdb6..50c0837672 100644
--- a/database/sqlite/sqlite_db_migration.c
+++ b/database/sqlite/sqlite_db_migration.c
@@ -11,7 +11,6 @@ static int return_int_cb(void *data, int argc, char **argv, char **column)
return 0;
}
-
int table_exists_in_database(const char *table)
{
char *err_msg = NULL;
@@ -214,6 +213,77 @@ static int do_migration_v7_v8(sqlite3 *database, const char *name)
return 0;
}
+static int do_migration_v8_v9(sqlite3 *database, const char *name)
+{
+ info("Running database migration %s", name);
+
+ char sql[2048];
+ int rc;
+ sqlite3_stmt *res = NULL;
+
+ //create the health_log table and it's index
+ snprintfz(sql, 2047, "CREATE TABLE IF NOT EXISTS health_log (health_log_id INTEGER PRIMARY KEY, host_id blob, alarm_id int, " \
+ "config_hash_id blob, name text, chart text, family text, recipient text, units text, exec text, " \
+ "chart_context text, last_transition_id blob, UNIQUE (host_id, alarm_id)) ;");
+ sqlite3_exec_monitored(database, sql, 0, 0, NULL);
+
+ //TODO indexes
+ snprintfz(sql, 2047, "CREATE INDEX IF NOT EXISTS health_log_ind_1 ON health_log (host_id);");
+ sqlite3_exec_monitored(database, sql, 0, 0, NULL);
+
+ snprintfz(sql, 2047, "CREATE TABLE IF NOT EXISTS health_log_detail (health_log_id int, unique_id int, alarm_id int, alarm_event_id int, " \
+ "updated_by_id int, updates_id int, when_key int, duration int, non_clear_duration int, " \
+ "flags int, exec_run_timestamp int, delay_up_to_timestamp int, " \
+ "info text, exec_code int, new_status real, old_status real, delay int, " \
+ "new_value double, old_value double, last_repeat int, transition_id blob, global_id int, host_id blob);");
+ sqlite3_exec_monitored(database, sql, 0, 0, NULL);
+
+ snprintfz(sql, 2047, "CREATE INDEX IF NOT EXISTS health_log_d_ind_1 ON health_log_detail (unique_id);");
+ sqlite3_exec_monitored(database, sql, 0, 0, NULL);
+ snprintfz(sql, 2047, "CREATE INDEX IF NOT EXISTS health_log_d_ind_2 ON health_log_detail (global_id);");
+ sqlite3_exec_monitored(database, sql, 0, 0, NULL);
+ snprintfz(sql, 2047, "CREATE INDEX IF NOT EXISTS health_log_d_ind_3 ON health_log_detail (transition_id);");
+ sqlite3_exec_monitored(database, sql, 0, 0, NULL);
+
+ snprintfz(sql, 2047, "ALTER TABLE alert_hash ADD source text;");
+ sqlite3_exec_monitored(database, sql, 0, 0, NULL);
+
+ snprintfz(sql, 2047, "CREATE INDEX IF NOT EXISTS alert_hash_index ON alert_hash (hash_id);");
+ sqlite3_exec_monitored(database, sql, 0, 0, NULL);
+
+ snprintfz(sql, 2047, "SELECT name FROM sqlite_schema WHERE type ='table' AND name LIKE 'health_log_%%' AND name <> 'health_log_detail';");
+ rc = sqlite3_prepare_v2(database, sql, -1, &res, 0);
+ if (rc != SQLITE_OK) {
+ error_report("Failed to prepare statement to alter health_log tables");
+ return 1;
+ }
+
+ DICTIONARY *dict_tables = dictionary_create(DICT_OPTION_NONE);
+
+ while (sqlite3_step_monitored(res) == SQLITE_ROW) {
+ char *table = strdupz((char *) sqlite3_column_text(res, 0));
+ if (health_migrate_old_health_log_table(table)) {
+ dictionary_set(dict_tables, table, NULL, 0);
+ }
+ freez(table);
+ }
+
+ rc = sqlite3_finalize(res);
+ if (unlikely(rc != SQLITE_OK))
+ error_report("Failed to finalize statement when copying health_log tables, rc = %d", rc);
+
+ char *table = NULL;
+ dfe_start_read(dict_tables, table) {
+ sql_drop_table(table_dfe.name);
+ }
+ dfe_done(table);
+ dictionary_destroy(dict_tables);
+
+ snprintfz(sql, 2047, "ALTER TABLE health_log_detail DROP COLUMN host_id;");
+ sqlite3_exec_monitored(database, sql, 0, 0, NULL);
+
+ return 0;
+}
static int do_migration_noop(sqlite3 *database, const char *name)
{
@@ -266,6 +336,7 @@ DATABASE_FUNC_MIGRATION_LIST migration_action[] = {
{.name = "v5 to v6", .func = do_migration_v5_v6},
{.name = "v6 to v7", .func = do_migration_v6_v7},
{.name = "v7 to v8", .func = do_migration_v7_v8},
+ {.name = "v8 to v9", .func = do_migration_v8_v9},
// the terminator of this array
{.name = NULL, .func = NULL}
};
diff --git a/database/sqlite/sqlite_functions.c b/database/sqlite/sqlite_functions.c
index 555db10117..3f6b5bb6f5 100644
--- a/database/sqlite/sqlite_functions.c
+++ b/database/sqlite/sqlite_functions.c
@@ -3,7 +3,7 @@
#include "sqlite_functions.h"
#include "sqlite_db_migration.h"
-#define DB_METADATA_VERSION 8
+#define DB_METADATA_VERSION 9
const char *database_config[] = {
"CREATE TABLE IF NOT EXISTS host(host_id BLOB PRIMARY KEY, hostname TEXT NOT NULL, "
@@ -32,7 +32,9 @@ const char *database_config[] = {
"every text, units text, calc text, families text, plugin text, module text, charts text, green text, "
"red text, warn text, crit text, exec text, to_key text, info text, delay text, options text, "
"repeat text, host_labels text, p_db_lookup_dimensions text, p_db_lookup_method text, p_db_lookup_options int, "
- "p_db_lookup_after int, p_db_lookup_before int, p_update_every int);",
+ "p_db_lookup_after int, p_db_lookup_before int, p_update_every int, source text);",
+
+ "CREATE INDEX IF NOT EXISTS alert_hash_index ON alert_hash (hash_id);",
"CREATE TABLE IF NOT EXISTS host_info(host_id blob, system_key text NOT NULL, system_value text NOT NULL, "
"date_created INT, PRIMARY KEY(host_id, system_key));",
@@ -43,6 +45,24 @@ const char *database_config[] = {
"CREATE TRIGGER IF NOT EXISTS ins_host AFTER INSERT ON host BEGIN INSERT INTO node_instance (host_id, date_created)"
" SELECT new.host_id, unixepoch() WHERE new.host_id NOT IN (SELECT host_id FROM node_instance); END;",
+ "CREATE TABLE IF NOT EXISTS health_log (health_log_id INTEGER PRIMARY KEY, host_id blob, alarm_id int, "
+ "config_hash_id blob, name text, chart text, family text, recipient text, units text, exec text, "
+ "chart_context text, last_transition_id blob, UNIQUE (host_id, alarm_id)) ;",
+
+ //TODO indexes
+ "CREATE INDEX IF NOT EXISTS health_log_ind_1 ON health_log (host_id);",
+
+ "CREATE TABLE IF NOT EXISTS health_log_detail (health_log_id int, unique_id int, alarm_id int, alarm_event_id int, "
+ "updated_by_id int, updates_id int, when_key int, duration int, non_clear_duration int, "
+ "flags int, exec_run_timestamp int, delay_up_to_timestamp int, "
+ "info text, exec_code int, new_status real, old_status real, delay int, "
+ "new_value double, old_value double, last_repeat int, transition_id blob, global_id int);",
+
+ "CREATE INDEX IF NOT EXISTS health_log_d_ind_1 ON health_log_detail (unique_id);",
+ "CREATE INDEX IF NOT EXISTS health_log_d_ind_2 ON health_log_detail (global_id);",
+ "CREATE INDEX IF NOT EXISTS health_log_d_ind_3 ON health_log_detail (transition_id);",
+ //TODO more indexes
+
NULL
};
@@ -336,6 +356,30 @@ static void sqlite_uuid_parse(sqlite3_context *context, int argc, sqlite3_value
sqlite3_result_blob(context, &uuid, sizeof(uuid_t), SQLITE_TRANSIENT);
}
+void sqlite_now_usec(sqlite3_context *context, int argc, sqlite3_value **argv)
+{
+ if (argc != 1 ){
+ sqlite3_result_null(context);
+ return ;
+ }
+
+ if (sqlite3_value_int(argv[0]) != 0) {
+ struct timespec req = {.tv_sec = 0, .tv_nsec = 1};
+ nanosleep(&req, NULL);
+ }
+
+ sqlite3_result_int64(context, (sqlite_int64) now_realtime_usec());
+}
+
+void sqlite_uuid_random(sqlite3_context *context, int argc, sqlite3_value **argv)
+{
+ (void)argc;
+ (void)argv;
+
+ uuid_t uuid;
+ uuid_generate_random(uuid);
+ sqlite3_result_blob(context, &uuid, sizeof(uuid_t), SQLITE_TRANSIENT);
+}
/*
* Initialize the SQLite database
@@ -405,6 +449,18 @@ int sql_init_database(db_check_action_type_t rebuild, int memory)
char buf[1024 + 1] = "";
const char *list[2] = { buf, NULL };
+ rc = sqlite3_create_function(db_meta, "u2h", 1, SQLITE_ANY | SQLITE_DETERMINISTIC, 0, sqlite_uuid_parse, 0, 0);
+ if (unlikely(rc != SQLITE_OK))
+ error_report("Failed to register internal u2h function");
+
+ rc = sqlite3_create_function(db_meta, "now_usec", 1, SQLITE_ANY, 0, sqlite_now_usec, 0, 0);
+ if (unlikely(rc != SQLITE_OK))
+ error_report("Failed to register internal now_usec function");
+
+ rc = sqlite3_create_function(db_meta, "uuid_random", 0, SQLITE_ANY, 0, sqlite_uuid_random, 0, 0);
+ if (unlikely(rc != SQLITE_OK))
+ error_report("Failed to register internal uuid_random function");
+
int target_version = DB_METADATA_VERSION;
if (likely(!memory))
@@ -454,9 +510,6 @@ int sql_init_database(db_check_action_type_t rebuild, int memory)
initialize_thread_key_pool();
- rc = sqlite3_create_function(db_meta, "u2h", 1, SQLITE_ANY | SQLITE_DETERMINISTIC, 0, sqlite_uuid_parse, 0, 0);
- if (unlikely(rc != SQLITE_OK))
- error_report("Failed to register internal u2h function");
return 0;
}
@@ -927,3 +980,19 @@ int sql_metadata_cache_stats(int op)
netdata_thread_enable_cancelability();
return count;
}
+
+#define SQL_DROP_TABLE "DROP table %s;"
+
+void sql_drop_table(const char *table)
+{
+ if (!table)
+ return;
+
+ char wstr[255];
+ snprintfz(wstr, 254, SQL_DROP_TABLE, table);
+
+ int rc = sqlite3_exec_monitored(db_meta, wstr, 0, 0, NULL);
+ if (rc != SQLITE_OK) {
+ error_report("DES SQLite error during drop table operation for %s, rc = %d", table, rc);
+ }
+}
diff --git a/database/sqlite/sqlite_functions.h b/database/sqlite/sqlite_functions.h
index ee63a397cf..407ed1eff7 100644
--- a/database/sqlite/sqlite_functions.h
+++ b/database/sqlite/sqlite_functions.h
@@ -77,4 +77,6 @@ void invalidate_node_instances(uuid_t *host_id, uuid_t *claim_id);
// Provide statistics
int sql_metadata_cache_stats(int op);
+void sql_drop_table(const char *table);
+void sqlite_now_usec(sqlite3_context *context, int argc, sqlite3_value **argv);
#endif //NETDATA_SQLITE_FUNCTIONS_H
diff --git a/database/sqlite/sqlite_health.c b/database/sqlite/sqlite_health.c
index 6f8a47e6a7..c51add0179 100644
--- a/database/sqlite/sqlite_health.c
+++ b/database/sqlite/sqlite_health.c
@@ -8,45 +8,12 @@
#define sqlite3_bind_string_or_null(res,key,param) ((key) ? sqlite3_bind_text(res, param, string2str(key), -1, SQLITE_STATIC) : sqlite3_bind_null(res, param))
/* Health related SQL queries
- Creates a health log table in sqlite, one per host guid