diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2023-06-07 21:10:27 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-07 21:10:27 +0300 |
commit | 66c85460199dbf65aad09cdfcdbae25c6bde265b (patch) | |
tree | a77e1f19d21f429fbc73ff8c71660cfb97c934ed /libnetdata | |
parent | 892255b23728fde076402b7300f13c80de32e5fc (diff) |
Re-write of SSL support in Netdata; restoration of SIGCHLD; detection of stale plugins; streaming improvements (#15113)
* add information about streaming connections to /api/v2/nodes; reset defer time when sender or receivers connect or disconnect
* make each streaming destination respect its SSL settings
* to not send SSL traffic over non-SSL connection
* keep track of outgoing streaming connection attempts
* retry SSL reads when SSL_read() returns SSL_ERROR_WANT_READ
* Revert "retry SSL reads when SSL_read() returns SSL_ERROR_WANT_READ"
This reverts commit 14c858677c6f2d3b08c94f298e2f45ecdb74c801.
* cleanup SSL connections properly
* initialize SSL in rpt before takeover
* sender should free SSL when talking to a non-SSL destination
* do not shutdown SSL when receiver exits
* restore operation of SIGCHLD when the reaper is not enabled
* create an fgets function that checks for data and times out
* work on error handling of plugins exiting
* remove newlines from logs
* global call to waitid(), caching the result for netdata_pclose() to process
* receiver tid
* parser timeouts in 2 minutes instead of 10
* fix crash when UUID is NULL in SQLite
* abstract sqlite3 parsing for uuid and text
* write proper ssl errors on read and write
* fix for SSL_ERROR_WANT_RETRY_VERIFY
* SSL WANT per function
* unified SSL error logging
* fix compilation warning
* additional logging about parser cleanup
* streaming parser should call the pluginsd parser cleanup
* SSL error handling work
* SSL initialization unification
* check for pending data when receiving SSL response with timeout
* macro to check if an SSL connection has been established
* remove SSL_pending()
* check for SSL macros
* use SSL_peek() to find if there is a response
* SSL renames
* more SSL renames & cleanup
* rrdpush ssl connection function
* abstract all SSL functions into security.c
* keep track of SSL connections and always attempt to use SSL read/write when on SSL connection
* signal openssl to skip certificate validation when configured to do so
* better SSL error handling and logging
* SSL code cleanup
* SSL retry on SSL_connect and SSL_accept
* SSL provide default return value for old compilers
* SSL read/write functions emulate system read/write functions
* fix receive/send timeout and switch from SSL_peek() to SSL_pending()
* remove SSL_pending()
* removed sender auto-retry and debug info for initial recevier response
* ssl skip certificate verification config for web server
* ssl errors log ip and port of the peer
* keep ssl with web_client for its whole lifetime
* thread safe socket peers to text
* use error_limit() for common ssl errors
* cleanup
* more cleanup
* coverity fixes
* ssl error logs include both local and remote ip/port info
* remove obsolete code
Diffstat (limited to 'libnetdata')
-rw-r--r-- | libnetdata/parser/parser.c | 81 | ||||
-rw-r--r-- | libnetdata/parser/parser.h | 2 | ||||
-rw-r--r-- | libnetdata/popen/popen.c | 106 | ||||
-rw-r--r-- | libnetdata/popen/popen.h | 9 | ||||
-rw-r--r-- | libnetdata/socket/security.c | 562 | ||||
-rw-r--r-- | libnetdata/socket/security.h | 63 | ||||
-rw-r--r-- | libnetdata/socket/socket.c | 115 | ||||
-rw-r--r-- | libnetdata/socket/socket.h | 24 |
8 files changed, 684 insertions, 278 deletions
diff --git a/libnetdata/parser/parser.c b/libnetdata/parser/parser.c index c3eebcd163..80c9a2639a 100644 --- a/libnetdata/parser/parser.c +++ b/libnetdata/parser/parser.c @@ -1,4 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later +#include <poll.h> +#include <stdio.h> #include "parser.h" #include "collectors/plugins.d/pluginsd_parser.h" @@ -124,26 +126,77 @@ void parser_destroy(PARSER *parser) * */ -int parser_next(PARSER *parser, char *buffer, size_t buffer_size) -{ - char *tmp = fgets(buffer, (int)buffer_size, (FILE *)parser->fp_input); +typedef enum { + PARSER_FGETS_RESULT_OK, + PARSER_FGETS_RESULT_TIMEOUT, + PARSER_FGETS_RESULT_ERROR, + PARSER_FGETS_RESULT_EOF, +} PARSER_FGETS_RESULT; + +static inline PARSER_FGETS_RESULT parser_fgets(char *s, int size, FILE *stream) { + errno = 0; + + struct pollfd fds[1]; + int timeout_msecs = 2 * 60 * MSEC_PER_SEC; + + fds[0].fd = fileno(stream); + fds[0].events = POLLIN; + + int ret = poll(fds, 1, timeout_msecs); - if (unlikely(!tmp)) { - if (feof((FILE *)parser->fp_input)) - error("PARSER: read failed: end of file"); + if (ret > 0) { + /* There is data to read */ + if (fds[0].revents & POLLIN) { + char *tmp = fgets(s, size, stream); - else if (ferror((FILE *)parser->fp_input)) - error("PARSER: read failed: input error"); + if(unlikely(!tmp)) { + if (feof(stream)) { + error("PARSER: read failed: end of file."); + return PARSER_FGETS_RESULT_EOF; + } - else - error("PARSER: read failed: unknown error"); + else if (ferror(stream)) { + error("PARSER: read failed: input error."); + return PARSER_FGETS_RESULT_ERROR; + } - return 1; + error("PARSER: read failed: unknown error."); + return PARSER_FGETS_RESULT_ERROR; + } + + return PARSER_FGETS_RESULT_OK; + } + else if(fds[0].revents & POLLERR) { + error("PARSER: read failed: POLLERR."); + return PARSER_FGETS_RESULT_ERROR; + } + else if(fds[0].revents & POLLHUP) { + error("PARSER: read failed: POLLHUP."); + return PARSER_FGETS_RESULT_ERROR; + } + else if(fds[0].revents & POLLNVAL) { + error("PARSER: read failed: POLLNVAL."); + return PARSER_FGETS_RESULT_ERROR; + } + + error("PARSER: poll() returned positive number, but POLLIN|POLLERR|POLLHUP|POLLNVAL are not set."); + return PARSER_FGETS_RESULT_ERROR; + } + else if (ret == 0) { + error("PARSER: timeout while waiting for data."); + return PARSER_FGETS_RESULT_TIMEOUT; } - return 0; + error("PARSER: poll() failed with code %d.", ret); + return PARSER_FGETS_RESULT_ERROR; } +int parser_next(PARSER *parser, char *buffer, size_t buffer_size) { + if(likely(parser_fgets(buffer, (int)buffer_size, (FILE *)parser->fp_input) == PARSER_FGETS_RESULT_OK)) + return 0; + + return 1; +} /* * Takes an initialized parser object that has an unprocessed entry (by calling parser_next) @@ -202,7 +255,6 @@ inline int parser_action(PARSER *parser, char *input) else rc = PARSER_RC_ERROR; -#ifdef NETDATA_INTERNAL_CHECKS if(rc == PARSER_RC_ERROR) { BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL); for(size_t i = 0; i < num_words ;i++) { @@ -214,12 +266,11 @@ inline int parser_action(PARSER *parser, char *input) buffer_fast_strcat(wb, "\"", 1); } - internal_error(true, "PLUGINSD: parser_action('%s') failed on line %zu: { %s } (quotes added to show parsing)", + error("PLUGINSD: parser_action('%s') failed on line %zu: { %s } (quotes added to show parsing)", command, parser->line, buffer_tostring(wb)); buffer_free(wb); } -#endif return (rc == PARSER_RC_ERROR || rc == PARSER_RC_STOP); } diff --git a/libnetdata/parser/parser.h b/libnetdata/parser/parser.h index 9e0d3480de..c21cbaf7e6 100644 --- a/libnetdata/parser/parser.h +++ b/libnetdata/parser/parser.h @@ -44,7 +44,7 @@ typedef struct parser { FILE *fp_input; // Input source e.g. stream FILE *fp_output; // Stream to send commands to plugin #ifdef ENABLE_HTTPS - struct netdata_ssl *ssl_output; + NETDATA_SSL *ssl_output; #endif void *user; // User defined structure to hold extra state between calls uint32_t flags; diff --git a/libnetdata/popen/popen.c b/libnetdata/popen/popen.c index 5ed74ae958..783c74a51f 100644 --- a/libnetdata/popen/popen.c +++ b/libnetdata/popen/popen.c @@ -5,11 +5,13 @@ // ---------------------------------------------------------------------------- // popen with tracking -static pthread_mutex_t netdata_popen_tracking_mutex; -static bool netdata_popen_tracking_enabled = false; +static pthread_mutex_t netdata_popen_tracking_mutex = NETDATA_MUTEX_INITIALIZER; struct netdata_popen { pid_t pid; + bool reaped; + siginfo_t infop; + int waitid_ret; struct netdata_popen *next; struct netdata_popen *prev; }; @@ -18,29 +20,20 @@ static struct netdata_popen *netdata_popen_root = NULL; // myp_add_lock takes the lock if we're tracking. static void netdata_popen_tracking_lock(void) { - if(!netdata_popen_tracking_enabled) - return; - netdata_mutex_lock(&netdata_popen_tracking_mutex); } // myp_add_unlock release the lock if we're tracking. static void netdata_popen_tracking_unlock(void) { - if(!netdata_popen_tracking_enabled) - return; - netdata_mutex_unlock(&netdata_popen_tracking_mutex); } // myp_add_locked adds pid if we're tracking. // myp_add_lock must have been called previously. static void netdata_popen_tracking_add_pid_unsafe(pid_t pid) { - if(!netdata_popen_tracking_enabled) - return; - struct netdata_popen *mp; - mp = mallocz(sizeof(struct netdata_popen)); + mp = callocz(1, sizeof(struct netdata_popen)); mp->pid = pid; DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(netdata_popen_root, mp, prev, next); @@ -48,12 +41,9 @@ static void netdata_popen_tracking_add_pid_unsafe(pid_t pid) { // myp_del deletes pid if we're tracking. static void netdata_popen_tracking_del_pid(pid_t pid) { - if(!netdata_popen_tracking_enabled) - return; - struct netdata_popen *mp; - netdata_mutex_lock(&netdata_popen_tracking_mutex); + netdata_popen_tracking_lock(); DOUBLE_LINKED_LIST_FOREACH_FORWARD(netdata_popen_root, mp, prev, next) { if(unlikely(mp->pid == pid)) @@ -65,34 +55,15 @@ static void netdata_popen_tracking_del_pid(pid_t pid) { freez(mp); } else - error("Cannot find pid %d.", pid); - - netdata_mutex_unlock(&netdata_popen_tracking_mutex); -} + error("POPEN: Cannot find pid %d.", pid); -// netdata_popen_tracking_init() should be called by apps which act as init -// (pid 1) so that processes created by mypopen and mypopene -// are tracked. This enables the reaper to ignore processes -// which will be handled internally, by calling myp_reap, to -// avoid issues with already reaped processes during wait calls. -// -// Callers should call myp_free() to clean up resources. -void netdata_popen_tracking_init(void) { - info("process tracking enabled."); - netdata_popen_tracking_enabled = true; - - if (netdata_mutex_init(&netdata_popen_tracking_mutex) != 0) - fatal("netdata_popen_tracking_init() mutex init failed."); + netdata_popen_tracking_unlock(); } // myp_free cleans up any resources allocated for process // tracking. void netdata_popen_tracking_cleanup(void) { - if(!netdata_popen_tracking_enabled) - return; - - netdata_mutex_lock(&netdata_popen_tracking_mutex); - netdata_popen_tracking_enabled = false; + netdata_popen_tracking_lock(); while(netdata_popen_root) { struct netdata_popen *mp = netdata_popen_root; @@ -100,26 +71,45 @@ void netdata_popen_tracking_cleanup(void) { freez(mp); } - netdata_mutex_unlock(&netdata_popen_tracking_mutex); + netdata_popen_tracking_unlock(); } -// myp_reap returns 1 if pid should be reaped, 0 otherwise. -int netdata_popen_tracking_pid_shoud_be_reaped(pid_t pid) { - if(!netdata_popen_tracking_enabled) - return 0; +int netdata_waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options) { + struct netdata_popen *mp = NULL; - netdata_mutex_lock(&netdata_popen_tracking_mutex); + if(idtype == P_PID && id != 0) { + // the caller is asking to waitid() for a specific child pid - int ret = 1; - struct netdata_popen *mp; - DOUBLE_LINKED_LIST_FOREACH_FORWARD(netdata_popen_root, mp, prev, next) { - if(unlikely(mp->pid == pid)) { - ret = 0; - break; + netdata_popen_tracking_lock(); + DOUBLE_LINKED_LIST_FOREACH_FORWARD(netdata_popen_root, mp, prev, next) { + if(unlikely(mp->pid == (pid_t)id)) + break; } + + if(!mp) + netdata_popen_tracking_unlock(); } - netdata_mutex_unlock(&netdata_popen_tracking_mutex); + int ret; + if(mp && mp->reaped) { + // we have already reaped this child + ret = mp->waitid_ret; + *infop = mp->infop; + } + else { + // we haven't reaped this child yet + ret = waitid(idtype, id, infop, options); + + if(mp && !mp->reaped) { + mp->reaped = true; + mp->infop = *infop; + mp->waitid_ret = ret; + } + } + + if(mp) + netdata_popen_tracking_unlock(); + return ret; } @@ -404,7 +394,7 @@ int netdata_pclose(FILE *fp_child_input, FILE *fp_child_output, pid_t pid) { errno = 0; - ret = waitid(P_PID, (id_t) pid, &info, WEXITED); + ret = netdata_waitid(P_PID, (id_t) pid, &info, WEXITED); netdata_popen_tracking_del_pid(pid); if (ret != -1) { @@ -415,8 +405,12 @@ int netdata_pclose(FILE *fp_child_input, FILE *fp_child_output, pid_t pid) { return(info.si_status); case CLD_KILLED: - if(info.si_status == 15) { - info("child pid %d killed by signal %d.", info.si_pid, info.si_status); + if(info.si_status == SIGTERM) { + info("child pid %d killed by SIGTERM", info.si_pid); + return(0); + } + else if(info.si_status == SIGPIPE) { + info("child pid %d killed by SIGPIPE.", info.si_pid); return(0); } else { @@ -450,7 +444,3 @@ int netdata_pclose(FILE *fp_child_input, FILE *fp_child_output, pid_t pid) { return 0; } - -int netdata_spawn_waitpid(pid_t pid) { - return netdata_pclose(NULL, NULL, pid); -} diff --git a/libnetdata/popen/popen.h b/libnetdata/popen/popen.h index c57a35a4e3..4f86158bcb 100644 --- a/libnetdata/popen/popen.h +++ b/libnetdata/popen/popen.h @@ -28,13 +28,6 @@ int netdata_popene_variadic_internal_dont_use_directly(volatile pid_t *pidptr, c int netdata_pclose(FILE *fp_child_input, FILE *fp_child_output, pid_t pid); int netdata_spawn(const char *command, volatile pid_t *pidptr); -int netdata_spawn_waitpid(pid_t pid); - -void netdata_popen_tracking_init(void); -void netdata_popen_tracking_cleanup(void); -int netdata_popen_tracking_pid_shoud_be_reaped(pid_t pid); - -void signals_unblock(void); -void signals_reset(void); +int netdata_waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options); #endif /* NETDATA_POPEN_H */ diff --git a/libnetdata/socket/security.c b/libnetdata/socket/security.c index 7c50921502..d1181ad5f3 100644 --- a/libnetdata/socket/security.c +++ b/libnetdata/socket/security.c @@ -3,13 +3,389 @@ #ifdef ENABLE_HTTPS SSL_CTX *netdata_ssl_exporting_ctx =NULL; -SSL_CTX *netdata_ssl_client_ctx =NULL; -SSL_CTX *netdata_ssl_srv_ctx =NULL; +SSL_CTX *netdata_ssl_streaming_sender_ctx =NULL; +SSL_CTX *netdata_ssl_web_server_ctx =NULL; const char *netdata_ssl_security_key =NULL; const char *netdata_ssl_security_cert =NULL; const char *tls_version=NULL; const char *tls_ciphers=NULL; -int netdata_ssl_validate_server = NETDATA_SSL_VALID_CERTIFICATE; +bool netdata_ssl_validate_certificate = true; +bool netdata_ssl_validate_certificate_sender = true; + +static SOCKET_PEERS netdata_ssl_peers(NETDATA_SSL *ssl) { + int sock_fd; + + if(unlikely(!ssl->conn)) + sock_fd = -1; + else + sock_fd = SSL_get_rfd(ssl->conn); + + return socket_peers(sock_fd); +} + +bool netdata_ssl_open(NETDATA_SSL *ssl, SSL_CTX *ctx, int fd) { + errno = 0; + ssl->ssl_errno = 0; + + if(ssl->conn) { + if(!ctx || SSL_get_SSL_CTX(ssl->conn) != ctx) { + SSL_free(ssl->conn); + ssl->conn = NULL; + } + else if (SSL_clear(ssl->conn) == 0) { + netdata_ssl_log_error_queue("SSL_clear", ssl); + SSL_free(ssl->conn); + ssl->conn = NULL; + } + } + + if(!ssl->conn) { + if(!ctx) { + internal_error(true, "SSL: not CTX given"); + ssl->state = NETDATA_SSL_STATE_FAILED; + return false; + } + + ssl->conn = SSL_new(ctx); + if (!ssl->conn) { + netdata_ssl_log_error_queue("SSL_new", ssl); + ssl->state = NETDATA_SSL_STATE_FAILED; + return false; + } + } + + if(SSL_set_fd(ssl->conn, fd) != 1) { + netdata_ssl_log_error_queue("SSL_set_fd", ssl); + ssl->state = NETDATA_SSL_STATE_FAILED; + return false; + } + + ssl->state = NETDATA_SSL_STATE_INIT; + + ERR_clear_error(); + + return true; +} + +void netdata_ssl_close(NETDATA_SSL *ssl) { + errno = 0; + ssl->ssl_errno = 0; + + if(ssl->conn) { + if(SSL_connection(ssl)) { + int ret = SSL_shutdown(ssl->conn); + if(ret == 0) + SSL_shutdown(ssl->conn); + } + + SSL_free(ssl->conn); + + ERR_clear_error(); + } + + *ssl = NETDATA_SSL_UNSET_CONNECTION; +} + +void netdata_ssl_log_error_queue(const char *call, NETDATA_SSL *ssl) { + error_limit_static_thread_var(erl, 1, 0); + unsigned long err; + while((err = ERR_get_error())) { + char *code; + + switch (err) { + case SSL_ERROR_NONE: + code = "SSL_ERROR_NONE"; + break; + + case SSL_ERROR_SSL: + code = "SSL_ERROR_SSL"; + ssl->state = NETDATA_SSL_STATE_FAILED; + break; + + case SSL_ERROR_WANT_READ: + code = "SSL_ERROR_WANT_READ"; + break; + + case SSL_ERROR_WANT_WRITE: + code = "SSL_ERROR_WANT_WRITE"; + break; + + case SSL_ERROR_WANT_X509_LOOKUP: + code = "SSL_ERROR_WANT_X509_LOOKUP"; + break; + + case SSL_ERROR_SYSCALL: + code = "SSL_ERROR_SYSCALL"; + ssl->state = NETDATA_SSL_STATE_FAILED; + break; + + case SSL_ERROR_ZERO_RETURN: + code = "SSL_ERROR_ZERO_RETURN"; + break; + + case SSL_ERROR_WANT_CONNECT: + code = "SSL_ERROR_WANT_CONNECT"; + break; + + case SSL_ERROR_WANT_ACCEPT: + code = "SSL_ERROR_WANT_ACCEPT"; + break; + +#ifdef SSL_ERROR_WANT_ASYNC + case SSL_ERROR_WANT_ASYNC: + code = "SSL_ERROR_WANT_ASYNC"; + break; +#endif + +#ifdef SSL_ERROR_WANT_ASYNC_JOB + case SSL_ERROR_WANT_ASYNC_JOB: + code = "SSL_ERROR_WANT_ASYNC_JOB"; + break; +#endif + +#ifdef SSL_ERROR_WANT_CLIENT_HELLO_CB + case SSL_ERROR_WANT_CLIENT_HELLO_CB: + code = "SSL_ERROR_WANT_CLIENT_HELLO_CB"; + break; +#endif + +#ifdef SSL_ERROR_WANT_RETRY_VERIFY + case SSL_ERROR_WANT_RETRY_VERIFY: + code = "SSL_ERROR_WANT_RETRY_VERIFY"; + break; +#endif + + default: + code = "SSL_ERROR_UNKNOWN"; + break; + } + + char str[1024 + 1]; + ERR_error_string_n(err, str, 1024); + str[1024] = '\0'; + SOCKET_PEERS peers = netdata_ssl_peers(ssl); + error_limit(&erl, "SSL: %s() on socket local [[%s]:%d] <-> remote [[%s]:%d], returned error %lu (%s): %s", + call, peers.local.ip, peers.local.port, peers.peer.ip, peers.peer.port, err, code, str); + } +} + +static inline bool is_handshake_complete(NETDATA_SSL *ssl, const char *op) { + error_limit_static_thread_var(erl, 1, 0); + + if(unlikely(!ssl->conn)) { + internal_error(true, "SSL: trying to %s on a NULL connection", op); + return false; + } + + switch(ssl->state) { + case NETDATA_SSL_STATE_NOT_SSL: { + SOCKET_PEERS peers = netdata_ssl_peers(ssl); + error_limit(&erl, "SSL: on socket local [[%s]:%d] <-> remote [[%s]:%d], attempt to %s on non-SSL connection", + peers.local.ip, peers.local.port, peers.peer.ip, peers.peer.port, op); + return false; + } + + case NETDATA_SSL_STATE_INIT: { + SOCKET_PEERS peers = netdata_ssl_peers(ssl); + error_limit(&erl, "SSL: on socket local [[%s]:%d] <-> remote [[%s]:%d], attempt to %s on an incomplete connection", + peers.local.ip, peers.local.port, peers.peer.ip, peers.peer.port, op); + return false; + } + + case NETDATA_SSL_STATE_FAILED: { + SOCKET_PEERS peers = netdata_ssl_peers(ssl); + error_limit(&erl, "SSL: on socket local [[%s]:%d] <-> remote [[%s]:%d], attempt to %s on a failed connection", + peers.local.ip, peers.local.port, peers.peer.ip, peers.peer.port, op); + return false; + } + + case NETDATA_SSL_STATE_COMPLETE: { + return true; + } + } + + return false; +} + +/* + * netdata_ssl_read() should return the same as read(): + * + * Positive value: The read() function succeeded and read some bytes. The exact number of bytes read is returned. + * + * Zero: For files and sockets, a return value of zero signifies end-of-file (EOF), meaning no more data is available + * for reading. For sockets, this usually means the other side has closed the connection. + * + * -1: An error occurred. The specific error can be found by examining the errno variable. + * EAGAIN or EWOULDBLOCK: The file descriptor is in non-blocking mode, and the read operation would block. + * (These are often the same value, but can be different on some systems.) + */ + +ssize_t netdata_ssl_read(NETDATA_SSL *ssl, void *buf, size_t num) { + errno = 0; + ssl->ssl_errno = 0; + + if(unlikely(!is_handshake_complete(ssl, "read"))) + return -1; + + int bytes = SSL_read(ssl->conn, buf, (int)num); + + if(unlikely(bytes <= 0)) { + int err = SSL_get_error(ssl->conn, bytes); + netdata_ssl_log_error_queue("SSL_read", ssl); + if (err == SSL_ERROR_WANT_READ || err == SSL_ERROR_WANT_WRITE) { + ssl->ssl_errno = err; + errno = EWOULDBLOCK; + } + + bytes = -1; // according to read() or recv() + } + + return bytes; +} + +/* + * netdata_ssl_write() should return the same as write(): + * + * Positive value: The write() function succeeded and wrote some bytes. The exact number of bytes written is returned. + * + * Zero: It's technically possible for write() to return zero, indicating that zero bytes were written. However, for a + * socket, this generally does not happen unless the size of the data to be written is zero. + * + * -1: An error occurred. The specific error can be found by examining the errno variable. + * EAGAIN or EWOULDBLOCK: The file descriptor is in non-blocking mode, and the write operation would block. + * (These are often the same value, but can be different on some systems.) + */ + +ssize_t netdata_ssl_write(NETDATA_SSL *ssl, const void *buf, size_t num) { + errno = 0; + ssl->ssl_errno = 0; + + if(unlikely(!is_handshake_complete(ssl, "write"))) + return -1; + + int bytes = SSL_write(ssl->conn, (uint8_t *)buf, (int)num); + + if(unlikely(bytes <= 0)) { + int err = SSL_get_error(ssl->conn, bytes); + netdata_ssl_log_error_queue("SSL_write", ssl); + if (err == SSL_ERROR_WANT_READ || err == SSL_ERROR_WANT_WRITE) { + ssl->ssl_errno = err; + errno = EWOULDBLOCK; + } + + bytes = -1; // according to write() or send() + } + + return bytes; +} + +static inline bool is_handshake_initialized(NETDATA_SSL *ssl, const char *op) { + error_limit_static_thread_var(erl, 1, 0); + + if(unlikely(!ssl->conn)) { + internal_error(true, "SSL: trying to %s on a NULL connection", op); + return false; + } + + switch(ssl->state) { + case NETDATA_SSL_STATE_NOT_SSL: { + SOCKET_PEERS peers = netdata_ssl_peers(ssl); + error_limit(&erl, "SSL: on socket local [[%s]:%d] <-> remote [[%s]:%d], attempt to %s on non-SSL connection", + peers.local.ip, peers.local.port, peers.peer.ip, peers.peer.port, op); + return false; + } + + case NETDATA_SSL_STATE_INIT: { + return true; + } + + case NETDATA_SSL_STATE_FAILED: { + SOCKET_PEERS peers = netdata_ssl_peers(ssl); + error_limit(&erl, "SSL: on socket local [[%s]:%d] <-> remote [[%s]:%d], attempt to %s on a failed connection", + peers.local.ip, peers.local.port, peers.peer.ip, peers.peer.port, op); + return false; + } + + case NETDATA_SSL_STATE_COMPLETE: { + SOCKET_PEERS peers = netdata_ssl_peers(ssl); + error_limit(&erl, "SSL: on socket local [[%s]:%d] <-> remote [[%s]:%d], attempt to %s on an complete connection", + peers.local.ip, peers.local.port, peers.peer.ip, peers.peer.port, op); + return false; + } + } + + return false; +} + +#define WANT_READ_WRITE_TIMEOUT_MS 10 + +static inline bool want_read_write_should_retry(NETDATA_SSL *ssl, int err) { + int ssl_errno = SSL_get_error(ssl->conn, err); + if(ssl_errno == SSL_ERROR_WANT_READ || ssl_errno == SSL_ERROR_WANT_WRITE) { + struct pollfd pfds[1] = { [0] = { + .fd = SSL_get_rfd(ssl->conn), + .events = (short)(((ssl_errno == SSL_ERROR_WANT_READ ) ? POLLIN : 0) | + ((ssl_errno == SSL_ERROR_WANT_WRITE) ? POLLOUT : 0)), + }}; + + if(poll(pfds, 1, WANT_READ_WRITE_TIMEOUT_MS) <= 0) + return false; // timeout (0) or error (<0) + + return true; // we have activity, so we should retry + } + + return false; // an unknown error +} + +bool netdata_ssl_connect(NETDATA_SSL *ssl) { + errno = 0; + ssl->ssl_errno = 0; + + if(unlikely(!is_handshake_initialized(ssl, "connect"))) + return false; + + SSL_set_connect_state(ssl->conn); + + int err; + while ((err = SSL_connect(ssl->conn)) != 1) { + if(!want_read_write_should_retry(ssl, err)) + break; + } + + if (err != 1) { + netdata_ssl_log_error_queue("SSL_connect", ssl); + ssl->state = NETDATA_SSL_STATE_FAILED; + return false; + } + + ssl->state = NETDATA_SSL_STATE_COMPLETE; + return true; +} + +bool netdata_ssl_accept(NETDATA_SSL *ssl) { + errno = 0; + ssl->ssl_errno = 0; + + if(unlikely(!is_handshake_initialized(ssl, "accept"))) + return false; + + SSL_set_accept_state(ssl->conn); + + int err; + while ((err = SSL_accept(ssl->conn)) != 1) { + if(!want_read_write_should_retry(ssl, err)) + break; + } + + if (err != 1) { + netdata_ssl_log_error_queue("SSL_accept", ssl); + ssl->state = NETDATA_SSL_STATE_FAILED; + return false; + } + + ssl->state = NETDATA_SSL_STATE_COMPLETE; + return true; +} /** * Info Callback @@ -20,7 +396,7 @@ int netdata_ssl_validate_server = NETDATA_SSL_VALID_CERTIFICATE; * @param where the variable with the flags set. * @param ret the return of the caller */ -static void security_info_callback(const SSL *ssl, int where, int ret __maybe_unused) { +static void netdata_ssl_info_callback(const SSL *ssl, int where, int ret __maybe_unused) { (void)ssl; if (where & SSL_CB_ALERT) { debug(D_WEB_CLIENT,"SSL INFO CALLBACK %s %s", SSL_alert_type_string(ret), SSL_alert_desc_string_long(ret)); @@ -32,8 +408,8 @@ static void security_info_callback(const SSL *ssl, int where, int ret __maybe_un * * Starts the openssl library for the Netdata. */ -void security_openssl_library() -{ +void netdata_ssl_initialize_openssl() { + #if OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 # if (SSLEAY_VERSION_NUMBER >= OPENSSL_VERSION_097) OPENSSL_config(NULL); @@ -42,10 +418,13 @@ void security_openssl_library() SSL_load_error_strings(); SSL_library_init(); + #else + if (OPENSSL_init_ssl(OPENSSL_INIT_LOAD_CONFIG, NULL) != 1) { error("SSL library cannot be initialized."); } + #endif } @@ -59,7 +438,7 @@ void security_openssl_library() * * @return it returns the version number. */ -int tls_select_version(const char *lversion) { +static int netdata_ssl_select_tls_version(const char *lversion) { if (!strcmp(lversion, "1") || !strcmp(lversion, "1.0")) return TLS1_VERSION; else if (!strcmp(lversion, "1.1")) @@ -80,43 +459,13 @@ int tls_select_version(const char *lversion) { #endif /** - * OpenSSL common options - * - * Clients and SERVER have common options, this function is responsible to set them in the context. - * - * @param ctx the initialized SSL context. - * @param side 0 means server, and 1 client. - */ -void security_openssl_common_options(SSL_CTX *ctx, int side) { -#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_110 - if (!side) { - int version = tls_select_version(tls_version) ; -#endif -#if OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 - SSL_CTX_set_options (ctx,SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3|SSL_OP_NO_COMPRESSION); -#else - SSL_CTX_set_min_proto_version(ctx, TLS1_VERSION); - SSL_CTX_set_max_proto_version(ctx, version); - - if(tls_ciphers && strcmp(tls_ciphers, "none") != 0) { - if (!SSL_CTX_set_cipher_list(ctx, tls_ciphers)) { - error("SSL error. cannot set the cipher list"); - } - } - } -#endif - - SSL_CTX_set_mode(ctx, SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER); -} - -/** * Initialize Openssl Client * * Starts the client context with TLS 1.2. * * @return It returns the context on success or NULL otherwise */ -SSL_CTX * security_initialize_openssl_client() { +SSL_CTX * netdata_ssl_create_client_ctx(unsigned long mode) { SSL_CTX *ctx; #if OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 ctx = SSL_CTX_new(SSLv23_client_method()); @@ -138,6 +487,9 @@ SSL_CTX * security_initialize_openssl_client() { #endif } + if(mode) + SSL_CTX_set_mode(ctx, mode); + return ctx; } @@ -148,7 +500,7 @@ SSL_CTX * security_initialize_openssl_client() { * * @return It returns the context on success or NULL otherwise */ -static SSL_CTX * security_initialize_openssl_server() { +static SSL_CTX * netdata_ssl_create_server_ctx(unsigned long mode) { SSL_CTX *ctx; char lerror[512]; static int netdata_id_context = 1; @@ -171,7 +523,19 @@ static SSL_CTX * security_initialize_openssl_server() { SSL_CTX_use_certificate_chain_file(ctx, netdata_ssl_security_cert); #endif - security_openssl_common_options(ctx, 0); + +#if OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 + SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3|SSL_OP_NO_COMPRESSION); +#else + SSL_CTX_set_min_proto_version(ctx, TLS1_VERSION); + SSL_CTX_set_max_proto_version(ctx, netdata_ssl_select_tls_version(tls_version)); + + if(tls_ciphers && strcmp(tls_ciphers, "none") != 0) { + if (!SSL_CTX_set_cipher_list(ctx, tls_ciphers)) { + error("SSL error. cannot set the cipher list"); + } + } +#endif SSL_CTX_use_PrivateKey_file(ctx, netdata_ssl_security_key,SSL_FILETYPE_PEM); @@ -183,13 +547,15 @@ static SSL_CTX * security_initialize_openssl_server() { } SSL_CTX_set_session_id_context(ctx,(void*)&netdata_id_context,(unsigned int)sizeof(netdata_id_context)); - SSL_CTX_set_info_callback(ctx,security_info_callback |