diff options
author | Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> | 2023-11-23 23:56:34 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-23 23:56:34 +0200 |
commit | 85f359fc2677752ef7501fbee92d9caecc924538 (patch) | |
tree | e874de2346c715e9b5762517f9c7faf08af61da8 /database/sqlite/sqlite_aclk.c | |
parent | a721fcb61f9c50762a92d2939091fdfc02bf0014 (diff) |
Handle ephemeral hosts (#16381)
* Handle ephemeral hosts
* Node empheral removal timeout 86400 seconds (1 day)
* Move config from health to global section
* Set a node to queryable false when it is ephemeral and is removed
* Log queryable. Send queryable=0 only when forcing host deletion (the node is ephemeral)
* Switch to "is ephemeral node"
Document stream.conf
* Unregister node id
Diffstat (limited to 'database/sqlite/sqlite_aclk.c')
-rw-r--r-- | database/sqlite/sqlite_aclk.c | 146 |
1 files changed, 118 insertions, 28 deletions
diff --git a/database/sqlite/sqlite_aclk.c b/database/sqlite/sqlite_aclk.c index 3a702738aa..83203b22e3 100644 --- a/database/sqlite/sqlite_aclk.c +++ b/database/sqlite/sqlite_aclk.c @@ -71,6 +71,7 @@ enum { IDX_ENTRIES, IDX_HEALTH_ENABLED, IDX_LAST_CONNECTED, + IDX_IS_EPHEMERAL, }; static int create_host_callback(void *data, int argc, char **argv, char **column) @@ -79,9 +80,26 @@ static int create_host_callback(void *data, int argc, char **argv, char **column UNUSED(argc); UNUSED(column); + time_t last_connected = (time_t) (argv[IDX_LAST_CONNECTED] ? str2uint64_t(argv[IDX_LAST_CONNECTED], NULL) : 0); + time_t age = now_realtime_sec() - last_connected; + int is_ephemeral = 0; + + if (argv[IDX_IS_EPHEMERAL]) + is_ephemeral = str2i(argv[IDX_IS_EPHEMERAL]); + char guid[UUID_STR_LEN]; uuid_unparse_lower(*(uuid_t *)argv[IDX_HOST_ID], guid); + if (is_ephemeral && age > rrdhost_free_ephemeral_time_s) { + netdata_log_info( + "Skipping ephemeral hostname \"%s\" with GUID \"%s\", age = %ld seconds (limit %ld seconds)", + (const char *)argv[IDX_HOSTNAME], + guid, + age, + rrdhost_free_ephemeral_time_s); + return 0; + } + struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info)); __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED); @@ -90,33 +108,47 @@ static int create_host_callback(void *data, int argc, char **argv, char **column sql_build_host_system_info((uuid_t *)argv[IDX_HOST_ID], system_info); RRDHOST *host = rrdhost_find_or_create( - (const char *) argv[IDX_HOSTNAME] - , (const char *) argv[IDX_REGISTRY] - , guid - , (const char *) argv[IDX_OS] - , (const char *) argv[IDX_TIMEZONE] - , (const char *) argv[IDX_ABBREV_TIMEZONE] - , (int32_t) (argv[IDX_UTC_OFFSET] ? str2uint32_t(argv[IDX_UTC_OFFSET], NULL) : 0) - , (const char *) argv[IDX_TAGS] - , (const char *) (argv[IDX_PROGRAM_NAME] ? argv[IDX_PROGRAM_NAME] : "unknown") - , (const char *) (argv[IDX_PROGRAM_VERSION] ? argv[IDX_PROGRAM_VERSION] : "unknown") - , argv[IDX_UPDATE_EVERY] ? str2i(argv[IDX_UPDATE_EVERY]) : 1 - , argv[IDX_ENTRIES] ? str2i(argv[IDX_ENTRIES]) : 0 - , default_rrd_memory_mode - , 0 // health - , 0 // rrdpush enabled - , NULL //destination - , NULL // api key - , NULL // send charts matching - , false // rrdpush_enable_replication - , 0 // rrdpush_seconds_to_replicate - , 0 // rrdpush_replication_step - , system_info - , 1 - ); + (const char *)argv[IDX_HOSTNAME], + (const char *)argv[IDX_REGISTRY], + guid, + (const char *)argv[IDX_OS], + (const char *)argv[IDX_TIMEZONE], + (const char *)argv[IDX_ABBREV_TIMEZONE], + (int32_t)(argv[IDX_UTC_OFFSET] ? str2uint32_t(argv[IDX_UTC_OFFSET], NULL) : 0), + (const char *)argv[IDX_TAGS], + (const char *)(argv[IDX_PROGRAM_NAME] ? argv[IDX_PROGRAM_NAME] : "unknown"), + (const char *)(argv[IDX_PROGRAM_VERSION] ? argv[IDX_PROGRAM_VERSION] : "unknown"), + argv[IDX_UPDATE_EVERY] ? str2i(argv[IDX_UPDATE_EVERY]) : 1, + argv[IDX_ENTRIES] ? str2i(argv[IDX_ENTRIES]) : 0, + default_rrd_memory_mode, + 0 // health + , + 0 // rrdpush enabled + , + NULL //destination + , + NULL // api key + , + NULL // send charts matching + , + false // rrdpush_enable_replication + , + 0 // rrdpush_seconds_to_replicate + , + 0 // rrdpush_replication_step + , + system_info, + 1); + if (likely(host)) { + if (is_ephemeral) + rrdhost_option_set(host, RRDHOST_OPTION_EPHEMERAL_HOST); + + if (is_ephemeral) + host->child_disconnected_time = now_realtime_sec(); + host->rrdlabels = sql_load_host_labels((uuid_t *)argv[IDX_HOST_ID]); - host->last_connected = (time_t) (argv[IDX_LAST_CONNECTED] ? str2uint64_t(argv[IDX_LAST_CONNECTED], NULL) : 0); + host->last_connected = last_connected; } (*number_of_chidren)++; @@ -125,7 +157,7 @@ static int create_host_callback(void *data, int argc, char **argv, char **column char node_str[UUID_STR_LEN] = "<none>"; if (likely(host->node_id)) uuid_unparse_lower(*host->node_id, node_str); - internal_error(true, "Adding archived host \"%s\" with GUID \"%s\" node id = \"%s\"", rrdhost_hostname(host), host->machine_guid, node_str); + internal_error(true, "Adding archived host \"%s\" with GUID \"%s\" node id = \"%s\" ephemeral=%d", rrdhost_hostname(host), host->machine_guid, node_str, is_ephemeral); #endif return 0; } @@ -215,6 +247,43 @@ fail: buffer_free(sql); } +// OPCODE: ACLK_DATABASE_NODE_UNREGISTER +static void sql_unregister_node(char *machine_guid) +{ + int rc; + uuid_t host_uuid; + + if (unlikely(!machine_guid)) + return; + + rc = uuid_parse(machine_guid, host_uuid); + freez(machine_guid); + if (rc) + return; + + sqlite3_stmt *res = NULL; + + rc = sqlite3_prepare_v2(db_meta, "UPDATE node_instance SET node_id = NULL WHERE host_id = @host_id", -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement remote node id for a host"); + return; + } + + rc = sqlite3_bind_blob(res, 1, &host_uuid, sizeof(host_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind host_id parameter to remove node id"); + goto failed; + } + rc = sqlite3_step_monitored(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to execute command to remove node id"); + +failed: + if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) + error_report("Failed to finalize statement to remove node id"); +} + + static int sql_check_aclk_table(void *data __maybe_unused, int argc __maybe_unused, char **argv __maybe_unused, char **column __maybe_unused) { struct aclk_database_cmd cmd; @@ -375,7 +444,10 @@ static void aclk_synchronization(void *arg __maybe_unused) struct aclk_sync_cfg_t *ahc = host->aclk_config; if (unlikely(!ahc)) sql_create_aclk_table(host, &host->host_uuid, host->node_id); - aclk_host_state_update(host, live); + aclk_host_state_update(host, live, 1); + break; + case ACLK_DATABASE_NODE_UNREGISTER: + sql_unregister_node(cmd.param[0]); break; // ALERTS case ACLK_DATABASE_PUSH_ALERT_CONFIG: @@ -472,7 +544,10 @@ void sql_create_aclk_table(RRDHOST *host __maybe_unused, uuid_t *host_uuid __may #define SQL_FETCH_ALL_HOSTS \ "SELECT host_id, hostname, registry_hostname, update_every, os, " \ "timezone, tags, hops, memory_mode, abbrev_timezone, utc_offset, program_name, " \ - "program_version, entries, health_enabled, last_connected FROM host WHERE hops >0;" + "program_version, entries, health_enabled, last_connected, " \ + "(SELECT CASE WHEN hl.label_value = 'true' THEN 1 ELSE 0 END FROM " \ + "host_label hl WHERE hl.host_id = h.host_id AND hl.label_key = '_is_ephemeral') " \ + "FROM host h WHERE hops > 0" #define SQL_FETCH_ALL_INSTANCES \ "SELECT ni.host_id, ni.node_id FROM host h, node_instance ni " \ @@ -572,3 +647,18 @@ void schedule_node_info_update(RRDHOST *host __maybe_unused) aclk_database_enq_cmd(&cmd); #endif } + +#ifdef ENABLE_ACLK +void unregister_node(const char *machine_guid) +{ + if (unlikely(!machine_guid)) + return; + + struct aclk_database_cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = ACLK_DATABASE_NODE_UNREGISTER; + cmd.param[0] = strdupz(machine_guid); + cmd.completion = NULL; + aclk_database_enq_cmd(&cmd); +} +#endif
\ No newline at end of file |