summaryrefslogtreecommitdiffstats
path: root/database/sqlite/sqlite_aclk.c
diff options
context:
space:
mode:
authorStelios Fragkakis <52996999+stelfrag@users.noreply.github.com>2023-11-23 23:56:34 +0200
committerGitHub <noreply@github.com>2023-11-23 23:56:34 +0200
commit85f359fc2677752ef7501fbee92d9caecc924538 (patch)
treee874de2346c715e9b5762517f9c7faf08af61da8 /database/sqlite/sqlite_aclk.c
parenta721fcb61f9c50762a92d2939091fdfc02bf0014 (diff)
Handle ephemeral hosts (#16381)
* Handle ephemeral hosts * Node empheral removal timeout 86400 seconds (1 day) * Move config from health to global section * Set a node to queryable false when it is ephemeral and is removed * Log queryable. Send queryable=0 only when forcing host deletion (the node is ephemeral) * Switch to "is ephemeral node" Document stream.conf * Unregister node id
Diffstat (limited to 'database/sqlite/sqlite_aclk.c')
-rw-r--r--database/sqlite/sqlite_aclk.c146
1 files changed, 118 insertions, 28 deletions
diff --git a/database/sqlite/sqlite_aclk.c b/database/sqlite/sqlite_aclk.c
index 3a702738aa..83203b22e3 100644
--- a/database/sqlite/sqlite_aclk.c
+++ b/database/sqlite/sqlite_aclk.c
@@ -71,6 +71,7 @@ enum {
IDX_ENTRIES,
IDX_HEALTH_ENABLED,
IDX_LAST_CONNECTED,
+ IDX_IS_EPHEMERAL,
};
static int create_host_callback(void *data, int argc, char **argv, char **column)
@@ -79,9 +80,26 @@ static int create_host_callback(void *data, int argc, char **argv, char **column
UNUSED(argc);
UNUSED(column);
+ time_t last_connected = (time_t) (argv[IDX_LAST_CONNECTED] ? str2uint64_t(argv[IDX_LAST_CONNECTED], NULL) : 0);
+ time_t age = now_realtime_sec() - last_connected;
+ int is_ephemeral = 0;
+
+ if (argv[IDX_IS_EPHEMERAL])
+ is_ephemeral = str2i(argv[IDX_IS_EPHEMERAL]);
+
char guid[UUID_STR_LEN];
uuid_unparse_lower(*(uuid_t *)argv[IDX_HOST_ID], guid);
+ if (is_ephemeral && age > rrdhost_free_ephemeral_time_s) {
+ netdata_log_info(
+ "Skipping ephemeral hostname \"%s\" with GUID \"%s\", age = %ld seconds (limit %ld seconds)",
+ (const char *)argv[IDX_HOSTNAME],
+ guid,
+ age,
+ rrdhost_free_ephemeral_time_s);
+ return 0;
+ }
+
struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info));
__atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED);
@@ -90,33 +108,47 @@ static int create_host_callback(void *data, int argc, char **argv, char **column
sql_build_host_system_info((uuid_t *)argv[IDX_HOST_ID], system_info);
RRDHOST *host = rrdhost_find_or_create(
- (const char *) argv[IDX_HOSTNAME]
- , (const char *) argv[IDX_REGISTRY]
- , guid
- , (const char *) argv[IDX_OS]
- , (const char *) argv[IDX_TIMEZONE]
- , (const char *) argv[IDX_ABBREV_TIMEZONE]
- , (int32_t) (argv[IDX_UTC_OFFSET] ? str2uint32_t(argv[IDX_UTC_OFFSET], NULL) : 0)
- , (const char *) argv[IDX_TAGS]
- , (const char *) (argv[IDX_PROGRAM_NAME] ? argv[IDX_PROGRAM_NAME] : "unknown")
- , (const char *) (argv[IDX_PROGRAM_VERSION] ? argv[IDX_PROGRAM_VERSION] : "unknown")
- , argv[IDX_UPDATE_EVERY] ? str2i(argv[IDX_UPDATE_EVERY]) : 1
- , argv[IDX_ENTRIES] ? str2i(argv[IDX_ENTRIES]) : 0
- , default_rrd_memory_mode
- , 0 // health
- , 0 // rrdpush enabled
- , NULL //destination
- , NULL // api key
- , NULL // send charts matching
- , false // rrdpush_enable_replication
- , 0 // rrdpush_seconds_to_replicate
- , 0 // rrdpush_replication_step
- , system_info
- , 1
- );
+ (const char *)argv[IDX_HOSTNAME],
+ (const char *)argv[IDX_REGISTRY],
+ guid,
+ (const char *)argv[IDX_OS],
+ (const char *)argv[IDX_TIMEZONE],
+ (const char *)argv[IDX_ABBREV_TIMEZONE],
+ (int32_t)(argv[IDX_UTC_OFFSET] ? str2uint32_t(argv[IDX_UTC_OFFSET], NULL) : 0),
+ (const char *)argv[IDX_TAGS],
+ (const char *)(argv[IDX_PROGRAM_NAME] ? argv[IDX_PROGRAM_NAME] : "unknown"),
+ (const char *)(argv[IDX_PROGRAM_VERSION] ? argv[IDX_PROGRAM_VERSION] : "unknown"),
+ argv[IDX_UPDATE_EVERY] ? str2i(argv[IDX_UPDATE_EVERY]) : 1,
+ argv[IDX_ENTRIES] ? str2i(argv[IDX_ENTRIES]) : 0,
+ default_rrd_memory_mode,
+ 0 // health
+ ,
+ 0 // rrdpush enabled
+ ,
+ NULL //destination
+ ,
+ NULL // api key
+ ,
+ NULL // send charts matching
+ ,
+ false // rrdpush_enable_replication
+ ,
+ 0 // rrdpush_seconds_to_replicate
+ ,
+ 0 // rrdpush_replication_step
+ ,
+ system_info,
+ 1);
+
if (likely(host)) {
+ if (is_ephemeral)
+ rrdhost_option_set(host, RRDHOST_OPTION_EPHEMERAL_HOST);
+
+ if (is_ephemeral)
+ host->child_disconnected_time = now_realtime_sec();
+
host->rrdlabels = sql_load_host_labels((uuid_t *)argv[IDX_HOST_ID]);
- host->last_connected = (time_t) (argv[IDX_LAST_CONNECTED] ? str2uint64_t(argv[IDX_LAST_CONNECTED], NULL) : 0);
+ host->last_connected = last_connected;
}
(*number_of_chidren)++;
@@ -125,7 +157,7 @@ static int create_host_callback(void *data, int argc, char **argv, char **column
char node_str[UUID_STR_LEN] = "<none>";
if (likely(host->node_id))
uuid_unparse_lower(*host->node_id, node_str);
- internal_error(true, "Adding archived host \"%s\" with GUID \"%s\" node id = \"%s\"", rrdhost_hostname(host), host->machine_guid, node_str);
+ internal_error(true, "Adding archived host \"%s\" with GUID \"%s\" node id = \"%s\" ephemeral=%d", rrdhost_hostname(host), host->machine_guid, node_str, is_ephemeral);
#endif
return 0;
}
@@ -215,6 +247,43 @@ fail:
buffer_free(sql);
}
+// OPCODE: ACLK_DATABASE_NODE_UNREGISTER
+static void sql_unregister_node(char *machine_guid)
+{
+ int rc;
+ uuid_t host_uuid;
+
+ if (unlikely(!machine_guid))
+ return;
+
+ rc = uuid_parse(machine_guid, host_uuid);
+ freez(machine_guid);
+ if (rc)
+ return;
+
+ sqlite3_stmt *res = NULL;
+
+ rc = sqlite3_prepare_v2(db_meta, "UPDATE node_instance SET node_id = NULL WHERE host_id = @host_id", -1, &res, 0);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to prepare statement remote node id for a host");
+ return;
+ }
+
+ rc = sqlite3_bind_blob(res, 1, &host_uuid, sizeof(host_uuid), SQLITE_STATIC);
+ if (unlikely(rc != SQLITE_OK)) {
+ error_report("Failed to bind host_id parameter to remove node id");
+ goto failed;
+ }
+ rc = sqlite3_step_monitored(res);
+ if (unlikely(rc != SQLITE_DONE))
+ error_report("Failed to execute command to remove node id");
+
+failed:
+ if (unlikely(sqlite3_finalize(res) != SQLITE_OK))
+ error_report("Failed to finalize statement to remove node id");
+}
+
+
static int sql_check_aclk_table(void *data __maybe_unused, int argc __maybe_unused, char **argv __maybe_unused, char **column __maybe_unused)
{
struct aclk_database_cmd cmd;
@@ -375,7 +444,10 @@ static void aclk_synchronization(void *arg __maybe_unused)
struct aclk_sync_cfg_t *ahc = host->aclk_config;
if (unlikely(!ahc))
sql_create_aclk_table(host, &host->host_uuid, host->node_id);
- aclk_host_state_update(host, live);
+ aclk_host_state_update(host, live, 1);
+ break;
+ case ACLK_DATABASE_NODE_UNREGISTER:
+ sql_unregister_node(cmd.param[0]);
break;
// ALERTS
case ACLK_DATABASE_PUSH_ALERT_CONFIG:
@@ -472,7 +544,10 @@ void sql_create_aclk_table(RRDHOST *host __maybe_unused, uuid_t *host_uuid __may
#define SQL_FETCH_ALL_HOSTS \
"SELECT host_id, hostname, registry_hostname, update_every, os, " \
"timezone, tags, hops, memory_mode, abbrev_timezone, utc_offset, program_name, " \
- "program_version, entries, health_enabled, last_connected FROM host WHERE hops >0;"
+ "program_version, entries, health_enabled, last_connected, " \
+ "(SELECT CASE WHEN hl.label_value = 'true' THEN 1 ELSE 0 END FROM " \
+ "host_label hl WHERE hl.host_id = h.host_id AND hl.label_key = '_is_ephemeral') " \
+ "FROM host h WHERE hops > 0"
#define SQL_FETCH_ALL_INSTANCES \
"SELECT ni.host_id, ni.node_id FROM host h, node_instance ni " \
@@ -572,3 +647,18 @@ void schedule_node_info_update(RRDHOST *host __maybe_unused)
aclk_database_enq_cmd(&cmd);
#endif
}
+
+#ifdef ENABLE_ACLK
+void unregister_node(const char *machine_guid)
+{
+ if (unlikely(!machine_guid))
+ return;
+
+ struct aclk_database_cmd cmd;
+ memset(&cmd, 0, sizeof(cmd));
+ cmd.opcode = ACLK_DATABASE_NODE_UNREGISTER;
+ cmd.param[0] = strdupz(machine_guid);
+ cmd.completion = NULL;
+ aclk_database_enq_cmd(&cmd);
+}
+#endif \ No newline at end of file