summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-06-19 20:52:35 +0300
committerGitHub <noreply@github.com>2023-06-19 20:52:35 +0300
commit0b4f820e9d42d10f64c3305d9c084261bc9880cf (patch)
tree641fcb81e9c84e08fbe08ca80776c6b593b218ba /daemon
parent35884c7a8447fbeb699cae6a2a20dc0a2137c659 (diff)
/api/v2/nodes and streaming function (#15168)
* dummy streaming function * expose global functions upstream * separate function for pushing global functions * add missing conditions * allow streaming function to run async * started internal API for functions * cache host retention and expose it to /api/v2/nodes * internal API for function table fields; more progress on streaming status * abstracted and unified rrdhost status * port old coverity warning fix - although it is not needed * add ML information to rrdhost status * add ML capability to streaming to signal the transmission of ML information; added ML information to host status * protect host->receiver * count metrics and instances per host * exposed all inbound and outbound streaming * fix for ML status and dependency of DATA_WITH_ML to INTERPOLATED, not IEEE754 * update ML dummy * added all fields * added streaming group by and cleaned up accepted values by cloud * removed type * Revert "removed type" This reverts commit faae4177e603d4f85b7433f33f92ef3ccd23976e. * added context to db summary * new /api/v2/nodes schema * added ML type * change default function charts * log to trace new capa * add more debug * removed debugging code * retry on receive interrupted read; respect sender reconnect delay in all cases * set disconnected host flag and manipulate localhost child count atomically, inside set/clear receiver * fix infinite loop * send_to_plugin() now has a spinlock to ensure that only 1 thread is writing to the plugin/child at the same time * global cloud_status() call * cloud should be a section, since it will contain error information * put cloud capabilities into cloud * aclk status in /api/v2 agents sections * keep aclk_connection_counter * updates on /api/v2/nodes * final /api/v2/nodes and addition of /api/v2/nodes_instances * parametrize all /api/v2/xxx output to control which info is outputed per endpoint * always accept nodes selector * st needs to be per instance, not per node * fix merging of contexts; fix cups plugin priorities * add after and before parameters to /api/v2/contexts/nodes/nodes_instances/q * give each libuv worker a unique id * aclk http_api_v2 version 4
Diffstat (limited to 'daemon')
-rw-r--r--daemon/commands.c2
-rw-r--r--daemon/common.c92
-rw-r--r--daemon/common.h21
-rw-r--r--daemon/event_loop.c7
-rw-r--r--daemon/global_statistics.c8
-rw-r--r--daemon/main.c19
6 files changed, 132 insertions, 17 deletions
diff --git a/daemon/commands.c b/daemon/commands.c
index fcb75b71c9..0a4c655899 100644
--- a/daemon/commands.c
+++ b/daemon/commands.c
@@ -213,7 +213,7 @@ static cmd_status_t cmd_reload_claiming_state_execute(char *args, char **message
info("COMMAND: Reloading Agent Claiming configuration.");
load_claiming_state();
registry_update_cloud_base_url();
- rrdpush_claimed_id(localhost);
+ rrdpush_send_claimed_id(localhost);
error_log_limit_reset();
return CMD_STATUS_SUCCESS;
}
diff --git a/daemon/common.c b/daemon/common.c
index 6eae07cffc..d189713051 100644
--- a/daemon/common.c
+++ b/daemon/common.c
@@ -16,8 +16,14 @@ char *netdata_configured_host_prefix = NULL;
char *netdata_configured_timezone = NULL;
char *netdata_configured_abbrev_timezone = NULL;
int32_t netdata_configured_utc_offset = 0;
-int netdata_ready;
-int netdata_cloud_setting;
+
+bool netdata_ready = false;
+
+#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
+bool netdata_cloud_enabled = false;
+#else
+bool netdata_cloud_enabled = true;
+#endif
long get_netdata_cpus(void) {
static long processors = 0;
@@ -54,3 +60,85 @@ long get_netdata_cpus(void) {
return processors;
}
+
+const char *cloud_status_to_string(CLOUD_STATUS status) {
+ switch(status) {
+ default:
+ case CLOUD_STATUS_DISABLED:
+ return "disabled";
+
+ case CLOUD_STATUS_BANNED:
+ return "banned";
+
+ case CLOUD_STATUS_OFFLINE:
+ return "offline";
+
+ case CLOUD_STATUS_ONLINE:
+ return "online";
+ }
+}
+
+CLOUD_STATUS cloud_status(void) {
+#ifdef ENABLE_ACLK
+ if(aclk_disable_runtime)
+ return CLOUD_STATUS_BANNED;
+
+ if(aclk_connected)
+ return CLOUD_STATUS_ONLINE;
+
+ if(netdata_cloud_enabled)
+ return CLOUD_STATUS_OFFLINE;
+
+ return CLOUD_STATUS_DISABLED;
+#else
+ return CLOUD_STATUS_DISABLED;
+#endif
+}
+
+time_t cloud_last_change(void) {
+#ifdef ENABLE_ACLK
+ time_t ret = MAX(last_conn_time_mqtt, last_disconnect_time);
+ if(!ret) ret = netdata_start_time;
+ return ret;
+#else
+ return netdata_start_time;
+#endif
+}
+
+time_t cloud_next_connection_attempt(void) {
+#ifdef ENABLE_ACLK
+ return next_connection_attempt;
+#else
+ return 0;
+#endif
+}
+
+size_t cloud_connection_id(void) {
+#ifdef ENABLE_ACLK
+ return aclk_connection_counter;
+#else
+ return 0;
+#endif
+}
+
+const char *cloud_offline_reason() {
+#ifdef ENABLE_ACLK
+ if(!netdata_cloud_enabled)
+ return "disabled";
+
+ if(aclk_disable_runtime)
+ return "banned";
+
+ return aclk_status_to_string();
+#else
+ return "disabled";
+#endif
+}
+
+const char *cloud_base_url() {
+#ifdef ENABLE_ACLK
+ return aclk_cloud_base_url;
+#else
+ return NULL;
+#endif
+}
diff --git a/daemon/common.h b/daemon/common.h
index aeaf01637a..bd7efd3482 100644
--- a/daemon/common.h
+++ b/daemon/common.h
@@ -109,9 +109,26 @@ extern int32_t netdata_configured_utc_offset;
extern int netdata_zero_metrics_enabled;
extern int netdata_anonymous_statistics_enabled;
-extern int netdata_ready;
-extern int netdata_cloud_setting;
+extern bool netdata_ready;
+extern bool netdata_cloud_enabled;
+
+extern time_t netdata_start_time;
long get_netdata_cpus(void);
+typedef enum __attribute__((packed)) {
+ CLOUD_STATUS_DISABLED = 0,
+ CLOUD_STATUS_BANNED,
+ CLOUD_STATUS_OFFLINE,
+ CLOUD_STATUS_ONLINE,
+} CLOUD_STATUS;
+
+const char *cloud_status_to_string(CLOUD_STATUS status);
+CLOUD_STATUS cloud_status(void);
+time_t cloud_last_change(void);
+time_t cloud_next_connection_attempt(void);
+size_t cloud_connection_id(void);
+const char *cloud_offline_reason(void);
+const char *cloud_base_url(void);
+
#endif /* NETDATA_COMMON_H */
diff --git a/daemon/event_loop.c b/daemon/event_loop.c
index 5fd02377eb..fb38791546 100644
--- a/daemon/event_loop.c
+++ b/daemon/event_loop.c
@@ -56,5 +56,10 @@ void register_libuv_worker_jobs() {
// netdatacli
worker_register_job_name(UV_EVENT_SCHEDULE_CMD, "schedule command");
- uv_thread_set_name_np(pthread_self(), "LIBUV_WORKER");
+ static int workers = 0;
+ int worker_id = __atomic_add_fetch(&workers, 1, __ATOMIC_RELAXED);
+
+ char buf[NETDATA_THREAD_TAG_MAX + 1];
+ snprintfz(buf, NETDATA_THREAD_TAG_MAX, "UV_WORKER[%d]", worker_id);
+ uv_thread_set_name_np(pthread_self(), buf);
}
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c
index ee68bebd15..435e727ec9 100644
--- a/daemon/global_statistics.c
+++ b/daemon/global_statistics.c
@@ -231,10 +231,10 @@ static void global_statistics_charts(void) {
static collected_number compression_ratio = -1,
average_response_time = -1;
- static time_t netdata_start_time = 0;
- if (!netdata_start_time)
- netdata_start_time = now_boottime_sec();
- time_t netdata_uptime = now_boottime_sec() - netdata_start_time;
+ static time_t netdata_boottime_time = 0;
+ if (!netdata_boottime_time)
+ netdata_boottime_time = now_boottime_sec();
+ time_t netdata_uptime = now_boottime_sec() - netdata_boottime_time;
struct global_statistics gs;
struct rusage me;
diff --git a/daemon/main.c b/daemon/main.c
index 15f6b1dd61..91b68a1a16 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -14,7 +14,7 @@ int netdata_anonymous_statistics_enabled;
int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
bool ieee754_doubles = false;
-
+time_t netdata_start_time = 0;
struct netdata_static_thread *static_threads;
struct config netdata_config = {
@@ -446,8 +446,11 @@ void netdata_cleanup_and_exit(int ret) {
for (size_t tier = 0; tier < storage_tiers; tier++)
running += rrdeng_collectors_running(multidb_ctx[tier]);
- if(running)
- sleep_usec(100 * USEC_PER_MS);
+ if(running) {
+ error_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS);
+ error_limit(&erl, "waiting for %zu collectors to finish", running);
+ // sleep_usec(100 * USEC_PER_MS);
+ }
}
delta_shutdown_time("wait for dbengine main cache to finish flushing");
@@ -1313,9 +1316,9 @@ void post_conf_load(char **user)
// --------------------------------------------------------------------
// Check if the cloud is enabled
#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
- netdata_cloud_setting = 0;
+ netdata_cloud_enabled = false;
#else
- netdata_cloud_setting = appconfig_get_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", 1);
+ netdata_cloud_enabled = appconfig_get_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", 1);
#endif
// This must be set before any point in the code that accesses it. Do not move it from this function.
appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
@@ -1342,6 +1345,8 @@ void replication_initialize(void);
int main(int argc, char **argv) {
// initialize the system clocks
clocks_init();
+ netdata_start_time = now_realtime_sec();
+
usec_t started_ut = now_monotonic_usec();
usec_t last_ut = started_ut;
const char *prev_msg = NULL;
@@ -1357,7 +1362,7 @@ int main(int argc, char **argv) {
static_threads = static_threads_get();
- netdata_ready=0;
+ netdata_ready = false;
// set the name for logging
program_name = "netdata";
@@ -2117,7 +2122,7 @@ int main(int argc, char **argv) {
usec_t ready_ut = now_monotonic_usec();
info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS);
- netdata_ready = 1;
+ netdata_ready = true;
send_statistics("START", "-", "-");
if (crash_detected)