summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2022-10-05 14:13:46 +0300
committerGitHub <noreply@github.com>2022-10-05 14:13:46 +0300
commit8fc3b351a2e7fc96eced8f924de2e9cec9842128 (patch)
treebde41c66573ccaf8876c280e00742cc6096b587c /daemon
parent6850878e697d66dc90b9af1e750b22238c63c292 (diff)
Allow netdata plugins to expose functions for querying more information about specific charts (#13720)
* function renames and code cleanup in popen.c; no actual code changes * netdata popen() now opens both child process stdin and stdout and returns FILE * for both * pass both input and output to parser structures * updated rrdset to call custom functions * RRDSET FUNCTION leading calls for both sync and async operation * put RRDSET functions to a separate file * added format and timeout at function definition * support for synchronous (internal plugins) and asynchronous (external plugins and children) functions * /api/v1/function endpoint * functions are now attached to the host and there is a dictionary view per chart * functions implemented at plugins.d * remove the defer until keyword hook from plugins.d when it is done * stream sender implementation of functions * sanitization of all functions so that certain characters are only allowed * strictier sanitization * common max size * 1st working plugins.d example * always init inflight dictionary * properly destroy dictionaries to avoid parallel insertion of items * add more debugging on disconnection reasons * add more debugging on disconnection reasons again * streaming receiver respects newlines * dont use the same fp for both streaming receive and send * dont free dbengine memory with internal checks * make sender proceed in the buffer * added timing info and garbage collection at plugins.d * added info about routing nodes * added info about routing nodes with delay * added more info about delays * added more info about delays again * signal sending thread to wake up * streaming version labeling and commented code to support capabilities * added functions to /api/v1/data, /api/v1/charts, /api/v1/chart, /api/v1/info * redirect top output to stdout * address coverity findings * fix resource leaks of popen * log attempts to connect to individual destinations * better messages * properly parse destinations * try to find a function from the most matching to the least matching * log added streaming destinations * rotate destinations bypassing a node in the middle that does not accept our connection * break the loops properly * use typedef to define callbacks * capabilities negotiation during streaming * functions exposed upstream based on capabilities; compression disabled per node persisting reconnects; always try to connect with all capabilities * restore functionality to lookup functions * better logging of capabilities * remove old versions from capabilities when a newer version is there * fix formatting * optimization for plugins.d rrdlabels to avoid creating and destructing dictionaries all the time * delayed health initialization for rrddim and rrdset * cleanup health initialization * fix for popen() not returning the right value * add health worker jobs for initializing rrdset and rrddim * added content type support for functions; apps.plugin permanent function to display all the processes * fixes for functions parameters parsing in apps.plugin * fix for process matching in apps.plugiin * first working function for apps.plugin * Dashboard ACL is disabled for functions; Function errors are all in JSON format * apps.plugin function processes returns json table * use json_escape_string() to escape message * fix formatting * apps.plugin exposes all its metrics to function processes * fix json formatting when filtering out some rows * reopen the internal pipe of rrdpush in case of errors * misplaced statement * do not use buffer->len * support for GLOBAL functions (functions that are not linked to a chart * added /api/v1/functions endpoint; removed format from the FUNCTIONS api; * swagger documentation about the new api end points * added plugins.d documentation about functions * never re-close a file * remove uncessesary ifdef * fixed issues identified by codacy * fix for null label value * make edit-config copy-and-paste friendly * Revert "make edit-config copy-and-paste friendly" This reverts commit 54500c0e0a97f65a0c66c4d34e966f6a9056698e. * reworked sender handshake to fix coverity findings * timeout is zero, for both send_timeout() and recv_timeout() * properly detect that parent closed the socket * support caching of function responses; limit function response to 10MB; added protection from malformed function responses * disabled excessive logging * added units to apps.plugin function processes and normalized all values to be human readable * shorter field names * fixed issues reported * fixed apps.plugin error response; tested that pluginsd can properly handle faulty responses * use double linked list macros for double linked list management * faster apps.plugin function printing by minimizing file operations * added memory percentage * fix compatibility issues with older compilers and FreeBSD * rrdpush sender code cleanup; rrhost structure cleanup from sender flags and variables; * fix letftover variable in ifdef * apps.plugin: do not call detach from the thread; exit immediately when input is broken * exclude AR charts from health * flush cleaner; prefer sender output * clarity * do not fill the cbuffer if not connected * fix * dont enabled host->sender if streaming is not enabled; send host label updates to parent; * functions are only available through ACLK * Prepared statement reports only in dev mode * fix AR chart detection * fix for streaming not being enabling itself * more cleanup of sender and receiver structures * moved read-only flags and configuration options to rrdhost->options * fixed merge with master * fix for incomplete rename * prevent service thread from working on charts that are being collected Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Diffstat (limited to 'daemon')
-rw-r--r--daemon/analytics.c22
-rw-r--r--daemon/main.c13
-rw-r--r--daemon/service.c39
-rw-r--r--daemon/signals.c6
4 files changed, 49 insertions, 31 deletions
diff --git a/daemon/analytics.c b/daemon/analytics.c
index bf8668d065..3d0e514d66 100644
--- a/daemon/analytics.c
+++ b/daemon/analytics.c
@@ -337,11 +337,12 @@ void analytics_alarms_notifications(void)
BUFFER *b = buffer_create(1000);
int cnt = 0;
- FILE *fp = mypopen(script, &command_pid);
- if (fp) {
+ FILE *fp_child_input;
+ FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input);
+ if (fp_child_output) {
char line[200 + 1];
- while (fgets(line, 200, fp) != NULL) {
+ while (fgets(line, 200, fp_child_output) != NULL) {
char *end = line;
while (*end && *end != '\n')
end++;
@@ -354,7 +355,7 @@ void analytics_alarms_notifications(void)
cnt++;
}
- mypclose(fp, command_pid);
+ netdata_pclose(fp_child_input, fp_child_output, command_pid);
}
freez(script);
@@ -384,8 +385,8 @@ void analytics_https(void)
BUFFER *b = buffer_create(30);
#ifdef ENABLE_HTTPS
analytics_exporting_connectors_ssl(b);
- buffer_strcat(b, netdata_client_ctx && localhost->ssl.flags == NETDATA_SSL_HANDSHAKE_COMPLETE && __atomic_load_n(&localhost->rrdpush_sender_connected, __ATOMIC_SEQ_CST) ? "streaming|" : "|");
- buffer_strcat(b, netdata_srv_ctx ? "web" : "");
+ buffer_strcat(b, netdata_ssl_client_ctx && rrdhost_flag_check(localhost, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED) && localhost->sender->ssl.flags == NETDATA_SSL_HANDSHAKE_COMPLETE ? "streaming|" : "|");
+ buffer_strcat(b, netdata_ssl_srv_ctx ? "web" : "");
#else
buffer_strcat(b, "||");
#endif
@@ -1016,11 +1017,12 @@ void send_statistics(const char *action, const char *action_result, const char *
info("%s '%s' '%s' '%s'", as_script, action, action_result, action_data);
- FILE *fp = mypopen(command_to_run, &command_pid);
- if (fp) {
+ FILE *fp_child_input;
+ FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input);
+ if (fp_child_output) {
char buffer[4 + 1];
- char *s = fgets(buffer, 4, fp);
- int exit_code = mypclose(fp, command_pid);
+ char *s = fgets(buffer, 4, fp_child_output);
+ int exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid);
if (exit_code)
error("Execution of anonymous statistics script returned %d.", exit_code);
if (s && strncmp(buffer, "200", 3))
diff --git a/daemon/main.c b/daemon/main.c
index 6d3ca585dd..0ac0942292 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -379,10 +379,10 @@ int help(int exitcode) {
static void security_init(){
char filename[FILENAME_MAX + 1];
snprintfz(filename, FILENAME_MAX, "%s/ssl/key.pem",netdata_configured_user_config_dir);
- security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename);
+ ssl_security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename);
snprintfz(filename, FILENAME_MAX, "%s/ssl/cert.pem",netdata_configured_user_config_dir);
- security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename);
+ ssl_security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename);
tls_version = config_get(CONFIG_SECTION_WEB, "tls version", "1.3");
tls_ciphers = config_get(CONFIG_SECTION_WEB, "tls ciphers", "none");
@@ -795,12 +795,13 @@ int get_system_info(struct rrdhost_system_info *system_info) {
info("Executing %s", script);
- FILE *fp = mypopen(script, &command_pid);
- if(fp) {
+ FILE *fp_child_input;
+ FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input);
+ if(fp_child_output) {
char line[200 + 1];
// Removed the double strlens, if the Coverity tainted string warning reappears I'll revert.
// One time init code, but I'm curious about the warning...
- while (fgets(line, 200, fp) != NULL) {
+ while (fgets(line, 200, fp_child_output) != NULL) {
char *value=line;
while (*value && *value != '=') value++;
if (*value=='=') {
@@ -821,7 +822,7 @@ int get_system_info(struct rrdhost_system_info *system_info) {
}
}
}
- mypclose(fp, command_pid);
+ netdata_pclose(fp_child_input, fp_child_output, command_pid);
}
freez(script);
return 0;
diff --git a/daemon/service.c b/daemon/service.c
index aff22fb2dc..71b377dbba 100644
--- a/daemon/service.c
+++ b/daemon/service.c
@@ -5,6 +5,9 @@
/* Run service jobs every X seconds */
#define SERVICE_HEARTBEAT 10
+#define TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT (3600 / 2)
+#define ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT 60
+
#define WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK 1
#define WORKER_JOB_CLEANUP_OBSOLETE_CHARTS 2
#define WORKER_JOB_ARCHIVE_CHART 3
@@ -69,43 +72,53 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) {
rrddim_free(st, rd);
}
-static void svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) {
+static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) {
worker_is_busy(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS);
RRDDIM *rd;
time_t now = now_realtime_sec();
- dfe_start_reentrant(st->rrddim_root_index, rd) {
+ bool done_all_dimensions = true;
+
+ dfe_start_write(st->rrddim_root_index, rd) {
if(unlikely(
all_dimensions ||
(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->last_collected_time.tv_sec + rrdset_free_obsolete_time < now))
)) {
- info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rrddim_name(rd), rrddim_id(rd), rrdset_name(st), rrdset_id(st));
- svc_rrddim_obsolete_to_archive(rd);
-
+ if(dictionary_acquired_item_references(rd_dfe.item) == 1) {
+ info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rrddim_name(rd), rrddim_id(rd), rrdset_name(st), rrdset_id(st));
+ svc_rrddim_obsolete_to_archive(rd);
+ }
+ else
+ done_all_dimensions = false;
}
+ else
+ done_all_dimensions = false;
}
dfe_done(rd);
+
+ return done_all_dimensions;
}
static void svc_rrdset_obsolete_to_archive(RRDSET *st) {
worker_is_busy(WORKER_JOB_ARCHIVE_CHART);
+ if(!svc_rrdset_archive_obsolete_dimensions(st, true))
+ return;
+
rrdset_flag_set(st, RRDSET_FLAG_ARCHIVED);
rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE);
rrdcalc_unlink_all_rrdset_alerts(st);
- svc_rrdset_archive_obsolete_dimensions(st, true);
-
rrdsetvar_release_and_delete_all(st);
// has to be run after all dimensions are archived - or use-after-free will occur
rrdvar_delete_all(st->rrdvars);
if(st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) {
- if(rrdhost_flag_check(st->rrdhost, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS)) {
+ if(rrdhost_option_check(st->rrdhost, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS)) {
worker_is_busy(WORKER_JOB_DELETE_CHART);
rrdset_delete_files(st);
}
@@ -148,7 +161,10 @@ static void svc_rrdset_check_obsoletion(RRDHOST *host) {
rrdset_foreach_read(st, host) {
last_entry_t = rrdset_last_entry_t(st);
- if(last_entry_t && last_entry_t < host->senders_connect_time)
+ if(last_entry_t && last_entry_t < host->senders_connect_time && host->senders_connect_time
+ + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every
+ < now_realtime_sec())
+
rrdset_is_obsolete(st);
}
@@ -175,12 +191,11 @@ static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() {
host->senders_last_chart_command
&& host->senders_last_chart_command + host->health_delay_up_to < now_realtime_sec()
)
- || (host->senders_connect_time + 300 < now_realtime_sec())
+ || (host->senders_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec())
)
) {
svc_rrdset_check_obsoletion(host);
host->trigger_chart_obsoletion_check = 0;
-
}
}
@@ -200,7 +215,7 @@ restart_after_removal:
if(rrdhost_should_be_removed(host, protected_host, now)) {
info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid);
- if (rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST)
+ if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST)
#ifdef ENABLE_DBENGINE
/* don't delete multi-host DB host files */
&& !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->storage_instance[0]))
diff --git a/daemon/signals.c b/daemon/signals.c
index b991d46bf1..c857a9b578 100644
--- a/daemon/signals.c
+++ b/daemon/signals.c
@@ -82,7 +82,7 @@ void signals_init(void) {
// This prevents zombie processes when running in a container.
if (getpid() == 1) {
info("SIGNAL: Enabling reaper");
- myp_init();
+ netdata_popen_tracking_init();
reaper_enabled = 1;
} else {
info("SIGNAL: Not enabling reaper");
@@ -139,7 +139,7 @@ void signals_reset(void) {
}
if (reaper_enabled == 1)
- myp_free();
+ netdata_popen_tracking_cleanup();
}
// reap_child reaps the child identified by pid.
@@ -198,7 +198,7 @@ static void reap_children() {
} else if (i.si_pid == 0) {
// No child exited.
return;
- } else if (myp_reap(i.si_pid) == 0) {
+ } else if (netdata_popen_tracking_pid_shoud_be_reaped(i.si_pid) == 0) {
// myp managed, sleep for a short time to avoid busy wait while
// this is handled by myp.
usleep(10000);