summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-06-07 21:10:27 +0300
committerGitHub <noreply@github.com>2023-06-07 21:10:27 +0300
commit66c85460199dbf65aad09cdfcdbae25c6bde265b (patch)
treea77e1f19d21f429fbc73ff8c71660cfb97c934ed /daemon
parent892255b23728fde076402b7300f13c80de32e5fc (diff)
Re-write of SSL support in Netdata; restoration of SIGCHLD; detection of stale plugins; streaming improvements (#15113)
* add information about streaming connections to /api/v2/nodes; reset defer time when sender or receivers connect or disconnect * make each streaming destination respect its SSL settings * to not send SSL traffic over non-SSL connection * keep track of outgoing streaming connection attempts * retry SSL reads when SSL_read() returns SSL_ERROR_WANT_READ * Revert "retry SSL reads when SSL_read() returns SSL_ERROR_WANT_READ" This reverts commit 14c858677c6f2d3b08c94f298e2f45ecdb74c801. * cleanup SSL connections properly * initialize SSL in rpt before takeover * sender should free SSL when talking to a non-SSL destination * do not shutdown SSL when receiver exits * restore operation of SIGCHLD when the reaper is not enabled * create an fgets function that checks for data and times out * work on error handling of plugins exiting * remove newlines from logs * global call to waitid(), caching the result for netdata_pclose() to process * receiver tid * parser timeouts in 2 minutes instead of 10 * fix crash when UUID is NULL in SQLite * abstract sqlite3 parsing for uuid and text * write proper ssl errors on read and write * fix for SSL_ERROR_WANT_RETRY_VERIFY * SSL WANT per function * unified SSL error logging * fix compilation warning * additional logging about parser cleanup * streaming parser should call the pluginsd parser cleanup * SSL error handling work * SSL initialization unification * check for pending data when receiving SSL response with timeout * macro to check if an SSL connection has been established * remove SSL_pending() * check for SSL macros * use SSL_peek() to find if there is a response * SSL renames * more SSL renames & cleanup * rrdpush ssl connection function * abstract all SSL functions into security.c * keep track of SSL connections and always attempt to use SSL read/write when on SSL connection * signal openssl to skip certificate validation when configured to do so * better SSL error handling and logging * SSL code cleanup * SSL retry on SSL_connect and SSL_accept * SSL provide default return value for old compilers * SSL read/write functions emulate system read/write functions * fix receive/send timeout and switch from SSL_peek() to SSL_pending() * remove SSL_pending() * removed sender auto-retry and debug info for initial recevier response * ssl skip certificate verification config for web server * ssl errors log ip and port of the peer * keep ssl with web_client for its whole lifetime * thread safe socket peers to text * use error_limit() for common ssl errors * cleanup * more cleanup * coverity fixes * ssl error logs include both local and remote ip/port info * remove obsolete code
Diffstat (limited to 'daemon')
-rw-r--r--daemon/analytics.c8
-rw-r--r--daemon/main.c4
-rw-r--r--daemon/signals.c98
3 files changed, 41 insertions, 69 deletions
diff --git a/daemon/analytics.c b/daemon/analytics.c
index b3c802b86c..2689886bd7 100644
--- a/daemon/analytics.c
+++ b/daemon/analytics.c
@@ -375,8 +375,12 @@ void analytics_https(void)
BUFFER *b = buffer_create(30, NULL);
#ifdef ENABLE_HTTPS
analytics_exporting_connectors_ssl(b);
- buffer_strcat(b, netdata_ssl_client_ctx && rrdhost_flag_check(localhost, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED) && localhost->sender->ssl.flags == NETDATA_SSL_HANDSHAKE_COMPLETE ? "streaming|" : "|");
- buffer_strcat(b, netdata_ssl_srv_ctx ? "web" : "");
+
+ buffer_strcat(b, netdata_ssl_streaming_sender_ctx &&
+ rrdhost_flag_check(localhost, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED) &&
+ SSL_connection(&localhost->sender->ssl) ? "streaming|" : "|");
+
+ buffer_strcat(b, netdata_ssl_web_server_ctx ? "web" : "");
#else
buffer_strcat(b, "||");
#endif
diff --git a/daemon/main.c b/daemon/main.c
index 0805b21773..cff6530f3f 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -482,7 +482,7 @@ void netdata_cleanup_and_exit(int ret) {
#ifdef ENABLE_HTTPS
delta_shutdown_time("free openssl structures");
- security_clean_openssl();
+ netdata_ssl_cleanup();
#endif
delta_shutdown_time("remove incomplete shutdown file");
@@ -834,7 +834,7 @@ static void security_init(){
tls_version = config_get(CONFIG_SECTION_WEB, "tls version", "1.3");
tls_ciphers = config_get(CONFIG_SECTION_WEB, "tls ciphers", "none");
- security_openssl_library();
+ netdata_ssl_initialize_openssl();
}
#endif
diff --git a/daemon/signals.c b/daemon/signals.c
index c857a9b578..3699010ce6 100644
--- a/daemon/signals.c
+++ b/daemon/signals.c
@@ -2,8 +2,6 @@
#include "common.h"
-static int reaper_enabled = 0;
-
typedef enum signal_action {
NETDATA_SIGNAL_END_OF_LIST,
NETDATA_SIGNAL_IGNORE,
@@ -78,16 +76,6 @@ void signals_init(void) {
struct sigaction sa;
sa.sa_flags = 0;
- // Enable process tracking / reaper if running as init (pid == 1).
- // This prevents zombie processes when running in a container.
- if (getpid() == 1) {
- info("SIGNAL: Enabling reaper");
- netdata_popen_tracking_init();
- reaper_enabled = 1;
- } else {
- info("SIGNAL: Not enabling reaper");
- }
-
// ignore all signals while we run in a signal handler
sigfillset(&sa.sa_mask);
@@ -97,10 +85,6 @@ void signals_init(void) {
case NETDATA_SIGNAL_IGNORE:
sa.sa_handler = SIG_IGN;
break;
- case NETDATA_SIGNAL_CHILD:
- if (reaper_enabled == 0)
- continue;
- // FALLTHROUGH
default:
sa.sa_handler = signal_handler;
break;
@@ -115,9 +99,6 @@ void signals_restore_SIGCHLD(void)
{
struct sigaction sa;
- if (reaper_enabled == 0)
- return;
-
sa.sa_flags = 0;
sigfillset(&sa.sa_mask);
sa.sa_handler = signal_handler;
@@ -137,9 +118,6 @@ void signals_reset(void) {
if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1)
error("SIGNAL: Failed to reset signal handler for: %s", signals_waiting[i].name);
}
-
- if (reaper_enabled == 1)
- netdata_popen_tracking_cleanup();
}
// reap_child reaps the child identified by pid.
@@ -147,39 +125,42 @@ static void reap_child(pid_t pid) {
siginfo_t i;
errno = 0;
- debug(D_CHILDS, "SIGNAL: Reaping pid: %d...", pid);
- if (waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) {
+ debug(D_CHILDS, "SIGNAL: reap_child(%d)...", pid);
+ if (netdata_waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) {
if (errno != ECHILD)
- error("SIGNAL: Failed to wait for: %d", pid);
+ error("SIGNAL: waitid(%d): failed to wait for child", pid);
else
- debug(D_CHILDS, "SIGNAL: Already reaped: %d", pid);
+ info("SIGNAL: waitid(%d): failed - it seems the child is already reaped", pid);
return;
- } else if (i.si_pid == 0) {
+ }
+ else if (i.si_pid == 0) {
// Process didn't exit, this shouldn't happen.
+ error("SIGNAL: waitid(%d): reports pid 0 - child has not exited", pid);
return;
}
switch (i.si_code) {
- case CLD_EXITED:
- debug(D_CHILDS, "SIGNAL: Child %d exited: %d", pid, i.si_status);
- break;
- case CLD_KILLED:
- debug(D_CHILDS, "SIGNAL: Child %d killed by signal: %d", pid, i.si_status);
- break;
- case CLD_DUMPED:
- debug(D_CHILDS, "SIGNAL: Child %d dumped core by signal: %d", pid, i.si_status);
- break;
- case CLD_STOPPED:
- debug(D_CHILDS, "SIGNAL: Child %d stopped by signal: %d", pid, i.si_status);
- break;
- case CLD_TRAPPED:
- debug(D_CHILDS, "SIGNAL: Child %d trapped by signal: %d", pid, i.si_status);
- break;
- case CLD_CONTINUED:
- debug(D_CHILDS, "SIGNAL: Child %d continued by signal: %d", pid, i.si_status);
- break;
- default:
- debug(D_CHILDS, "SIGNAL: Child %d gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status);
+ case CLD_EXITED:
+ info("SIGNAL: reap_child(%d) exited with code: %d", pid, i.si_status);
+ break;
+ case CLD_KILLED:
+ info("SIGNAL: reap_child(%d) killed by signal: %d", pid, i.si_status);
+ break;
+ case CLD_DUMPED:
+ info("SIGNAL: reap_child(%d) dumped core by signal: %d", pid, i.si_status);
+ break;
+ case CLD_STOPPED:
+ info("SIGNAL: reap_child(%d) stopped by signal: %d", pid, i.si_status);
+ break;
+ case CLD_TRAPPED:
+ info("SIGNAL: reap_child(%d) trapped by signal: %d", pid, i.si_status);
+ break;
+ case CLD_CONTINUED:
+ info("SIGNAL: reap_child(%d) continued by signal: %d", pid, i.si_status);
+ break;
+ default:
+ info("SIGNAL: reap_child(%d) gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status);
+ break;
}
}
@@ -187,25 +168,13 @@ static void reap_child(pid_t pid) {
static void reap_children() {
siginfo_t i;
- while (1 == 1) {
- // Identify which process caused the signal so we can determine
- // if we need to reap a re-parented process.
+ while(1) {
i.si_pid = 0;
- if (waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1) {
- if (errno != ECHILD) // This shouldn't happen with WNOHANG but does.
- error("SIGNAL: Failed to wait");
- return;
- } else if (i.si_pid == 0) {
- // No child exited.
+ if (netdata_waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1 || i.si_pid == 0)
+ // nothing to do
return;
- } else if (netdata_popen_tracking_pid_shoud_be_reaped(i.si_pid) == 0) {
- // myp managed, sleep for a short time to avoid busy wait while
- // this is handled by myp.
- usleep(10000);
- } else {
- // Unknown process, likely a re-parented child, reap it.
- reap_child(i.si_pid);
- }
+
+ reap_child(i.si_pid);
}
}
@@ -267,7 +236,6 @@ void signals_handle(void) {
break;
case NETDATA_SIGNAL_CHILD:
- debug(D_CHILDS, "SIGNAL: Received %s. Reaping...", name);
reap_children();
break;