diff options
author | Costa Tsaousis <costa@tsaousis.gr> | 2018-10-15 23:16:42 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-10-15 23:16:42 +0300 |
commit | 8fbf817ef83b3524b15f908251909d9d6feb5532 (patch) | |
tree | 4c2d417b7392c907bbdbe355b8db361bd3741a02 /streaming | |
parent | 1ad4f1bcfc691120102b57dbd426de0870abd76f (diff) |
modularized all source code (#4391)
* modularized all external plugins
* added README.md in plugins
* fixed title
* fixed typo
* relative link to external plugins
* external plugins configuration README
* added plugins link
* remove plugins link
* plugin names are links
* added links to external plugins
* removed unecessary spacing
* list to table
* added language
* fixed typo
* list to table on internal plugins
* added more documentation to internal plugins
* moved python, node, and bash code and configs into the external plugins
* added statsd README
* fix bug with corrupting config.h every 2nd compilation
* moved all config files together with their code
* more documentation
* diskspace info
* fixed broken links in apps.plugin
* added backends docs
* updated plugins readme
* move nc-backend.sh to backends
* created daemon directory
* moved all code outside src/
* fixed readme identation
* renamed plugins.d.plugin to plugins.d
* updated readme
* removed linux- from linux plugins
* updated readme
* updated readme
* updated readme
* updated readme
* updated readme
* updated readme
* fixed README.md links
* fixed netdata tree links
* updated codacy, codeclimate and lgtm excluded paths
* update CMakeLists.txt
* updated automake options at top directory
* libnetdata slit into directories
* updated READMEs
* updated READMEs
* updated ARL docs
* updated ARL docs
* moved /plugins to /collectors
* moved all external plugins outside plugins.d
* updated codacy, codeclimate, lgtm
* updated README
* updated url
* updated readme
* updated readme
* updated readme
* updated readme
* moved api and web into webserver
* web/api web/gui web/server
* modularized webserver
* removed web/gui/version.txt
Diffstat (limited to 'streaming')
-rw-r--r-- | streaming/Makefile.am | 12 | ||||
-rw-r--r-- | streaming/README.md | 0 | ||||
-rw-r--r-- | streaming/rrdpush.c | 1227 | ||||
-rw-r--r-- | streaming/rrdpush.h | 25 | ||||
-rw-r--r-- | streaming/stream.conf | 191 |
5 files changed, 1455 insertions, 0 deletions
diff --git a/streaming/Makefile.am b/streaming/Makefile.am new file mode 100644 index 0000000000..84048948b4 --- /dev/null +++ b/streaming/Makefile.am @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_libconfig_DATA = \ + stream.conf \ + $(NULL) + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/streaming/README.md b/streaming/README.md new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/streaming/README.md diff --git a/streaming/rrdpush.c b/streaming/rrdpush.c new file mode 100644 index 0000000000..5d28206049 --- /dev/null +++ b/streaming/rrdpush.c @@ -0,0 +1,1227 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrdpush.h" + +/* + * rrdpush + * + * 3 threads are involved for all stream operations + * + * 1. a random data collection thread, calling rrdset_done_push() + * this is called for each chart. + * + * the output of this work is kept in a BUFFER in RRDHOST + * the sender thread is signalled via a pipe (also in RRDHOST) + * + * 2. a sender thread running at the sending netdata + * this is spawned automatically on the first chart to be pushed + * + * It tries to push the metrics to the remote netdata, as fast + * as possible (i.e. immediately after they are collected). + * + * 3. a receiver thread, running at the receiving netdata + * this is spawned automatically when the sender connects to + * the receiver. + * + */ + +#define START_STREAMING_PROMPT "Hit me baby, push them over..." + +typedef enum { + RRDPUSH_MULTIPLE_CONNECTIONS_ALLOW, + RRDPUSH_MULTIPLE_CONNECTIONS_DENY_NEW +} RRDPUSH_MULTIPLE_CONNECTIONS_STRATEGY; + +unsigned int default_rrdpush_enabled = 0; +char *default_rrdpush_destination = NULL; +char *default_rrdpush_api_key = NULL; +char *default_rrdpush_send_charts_matching = NULL; + +int rrdpush_init() { + default_rrdpush_enabled = (unsigned int)appconfig_get_boolean(&stream_config, CONFIG_SECTION_STREAM, "enabled", default_rrdpush_enabled); + default_rrdpush_destination = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "destination", ""); + default_rrdpush_api_key = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "api key", ""); + default_rrdpush_send_charts_matching = appconfig_get(&stream_config, CONFIG_SECTION_STREAM, "send charts matching", "*"); + rrdhost_free_orphan_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds", rrdhost_free_orphan_time); + + if(default_rrdpush_enabled && (!default_rrdpush_destination || !*default_rrdpush_destination || !default_rrdpush_api_key || !*default_rrdpush_api_key)) { + error("STREAM [send]: cannot enable sending thread - information is missing."); + default_rrdpush_enabled = 0; + } + + return default_rrdpush_enabled; +} + +#define CONNECTED_TO_SIZE 100 + +// data collection happens from multiple threads +// each of these threads calls rrdset_done() +// which in turn calls rrdset_done_push() +// which uses this pipe to notify the streaming thread +// that there are more data ready to be sent +#define PIPE_READ 0 +#define PIPE_WRITE 1 + +// to have the remote netdata re-sync the charts +// to its current clock, we send for this many +// iterations a BEGIN line without microseconds +// this is for the first iterations of each chart +unsigned int remote_clock_resync_iterations = 60; + +#define rrdpush_buffer_lock(host) netdata_mutex_lock(&((host)->rrdpush_sender_buffer_mutex)) +#define rrdpush_buffer_unlock(host) netdata_mutex_unlock(&((host)->rrdpush_sender_buffer_mutex)) + +static inline int should_send_chart_matching(RRDSET *st) { + if(unlikely(!rrdset_flag_check(st, RRDSET_FLAG_ENABLED))) { + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND); + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE); + } + else if(!rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_SEND|RRDSET_FLAG_UPSTREAM_IGNORE)) { + RRDHOST *host = st->rrdhost; + + if(simple_pattern_matches(host->rrdpush_send_charts_matching, st->id) || + simple_pattern_matches(host->rrdpush_send_charts_matching, st->name)) { + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_IGNORE); + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_SEND); + } + else { + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND); + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_IGNORE); + } + } + + return(rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_SEND)); +} + +// checks if the current chart definition has been sent +static inline int need_to_send_chart_definition(RRDSET *st) { + rrdset_check_rdlock(st); + + if(unlikely(!(rrdset_flag_check(st, RRDSET_FLAG_UPSTREAM_EXPOSED)))) + return 1; + + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if(unlikely(!rd->exposed)) { + #ifdef NETDATA_INTERNAL_CHECKS + info("host '%s', chart '%s', dimension '%s' flag 'exposed' triggered chart refresh to upstream", st->rrdhost->hostname, st->id, rd->id); + #endif + return 1; + } + } + + return 0; +} + +// sends the current chart definition +static inline void rrdpush_send_chart_definition_nolock(RRDSET *st) { + RRDHOST *host = st->rrdhost; + + rrdset_flag_set(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + // send the chart + buffer_sprintf( + host->rrdpush_sender_buffer + , "CHART \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" \"%s\" %ld %d \"%s %s %s %s\" \"%s\" \"%s\"\n" + , st->id + , st->name + , st->title + , st->units + , st->family + , st->context + , rrdset_type_name(st->chart_type) + , st->priority + , st->update_every + , rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)?"obsolete":"" + , rrdset_flag_check(st, RRDSET_FLAG_DETAIL)?"detail":"" + , rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)?"store_first":"" + , rrdset_flag_check(st, RRDSET_FLAG_HIDDEN)?"hidden":"" + , (st->plugin_name)?st->plugin_name:"" + , (st->module_name)?st->module_name:"" + ); + + // send the dimensions + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + buffer_sprintf( + host->rrdpush_sender_buffer + , "DIMENSION \"%s\" \"%s\" \"%s\" " COLLECTED_NUMBER_FORMAT " " COLLECTED_NUMBER_FORMAT " \"%s %s\"\n" + , rd->id + , rd->name + , rrd_algorithm_name(rd->algorithm) + , rd->multiplier + , rd->divisor + , rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN)?"hidden":"" + , rrddim_flag_check(rd, RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS)?"noreset":"" + ); + rd->exposed = 1; + } + + // send the chart local custom variables + RRDSETVAR *rs; + for(rs = st->variables; rs ;rs = rs->next) { + if(unlikely(rs->type == RRDVAR_TYPE_CALCULATED && rs->options & RRDVAR_OPTION_CUSTOM_CHART_VAR)) { + calculated_number *value = (calculated_number *) rs->value; + + buffer_sprintf( + host->rrdpush_sender_buffer + , "VARIABLE CHART %s = " CALCULATED_NUMBER_FORMAT "\n" + , rs->variable + , *value + ); + } + } + + st->upstream_resync_time = st->last_collected_time.tv_sec + (remote_clock_resync_iterations * st->update_every); +} + +// sends the current chart dimensions +static inline void rrdpush_send_chart_metrics_nolock(RRDSET *st) { + RRDHOST *host = st->rrdhost; + buffer_sprintf(host->rrdpush_sender_buffer, "BEGIN \"%s\" %llu\n", st->id, (st->last_collected_time.tv_sec > st->upstream_resync_time)?st->usec_since_last_update:0); + + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if(rd->updated && rd->exposed) + buffer_sprintf(host->rrdpush_sender_buffer + , "SET \"%s\" = " COLLECTED_NUMBER_FORMAT "\n" + , rd->id + , rd->collected_value + ); + } + + buffer_strcat(host->rrdpush_sender_buffer, "END\n"); +} + +static void rrdpush_sender_thread_spawn(RRDHOST *host); + +void rrdset_push_chart_definition_now(RRDSET *st) { + RRDHOST *host = st->rrdhost; + + if(unlikely(!host->rrdpush_send_enabled || !should_send_chart_matching(st))) + return; + + rrdset_rdlock(st); + rrdpush_buffer_lock(host); + rrdpush_send_chart_definition_nolock(st); + rrdpush_buffer_unlock(host); + rrdset_unlock(st); +} + +void rrdset_done_push(RRDSET *st) { + if(unlikely(!should_send_chart_matching(st))) + return; + + RRDHOST *host = st->rrdhost; + + rrdpush_buffer_lock(host); + + if(unlikely(host->rrdpush_send_enabled && !host->rrdpush_sender_spawn)) + rrdpush_sender_thread_spawn(host); + + if(unlikely(!host->rrdpush_sender_buffer || !host->rrdpush_sender_connected)) { + if(unlikely(!host->rrdpush_sender_error_shown)) + error("STREAM %s [send]: not ready - discarding collected metrics.", host->hostname); + + host->rrdpush_sender_error_shown = 1; + + rrdpush_buffer_unlock(host); + return; + } + else if(unlikely(host->rrdpush_sender_error_shown)) { + info("STREAM %s [send]: sending metrics...", host->hostname); + host->rrdpush_sender_error_shown = 0; + } + + if(need_to_send_chart_definition(st)) + rrdpush_send_chart_definition_nolock(st); + + rrdpush_send_chart_metrics_nolock(st); + + // signal the sender there are more data + if(host->rrdpush_sender_pipe[PIPE_WRITE] != -1 && write(host->rrdpush_sender_pipe[PIPE_WRITE], " ", 1) == -1) + error("STREAM %s [send]: cannot write to internal pipe", host->hostname); + + rrdpush_buffer_unlock(host); +} + +// ---------------------------------------------------------------------------- +// rrdpush sender thread + +static inline void rrdpush_sender_add_host_variable_to_buffer_nolock(RRDHOST *host, RRDVAR *rv) { + calculated_number *value = (calculated_number *)rv->value; + + buffer_sprintf( + host->rrdpush_sender_buffer + , "VARIABLE HOST %s = " CALCULATED_NUMBER_FORMAT "\n" + , rv->name + , *value + ); + + debug(D_STREAM, "RRDVAR pushed HOST VARIABLE %s = " CALCULATED_NUMBER_FORMAT, rv->name, *value); +} + +void rrdpush_sender_send_this_host_variable_now(RRDHOST *host, RRDVAR *rv) { + if(host->rrdpush_send_enabled && host->rrdpush_sender_spawn && host->rrdpush_sender_connected) { + rrdpush_buffer_lock(host); + rrdpush_sender_add_host_variable_to_buffer_nolock(host, rv); + rrdpush_buffer_unlock(host); + } +} + +static int rrdpush_sender_thread_custom_host_variables_callback(void *rrdvar_ptr, void *host_ptr) { + RRDVAR *rv = (RRDVAR *)rrdvar_ptr; + RRDHOST *host = (RRDHOST *)host_ptr; + + if(unlikely(rv->options & RRDVAR_OPTION_CUSTOM_HOST_VAR && rv->type == RRDVAR_TYPE_CALCULATED)) { + rrdpush_sender_add_host_variable_to_buffer_nolock(host, rv); + + // return 1, so that the traversal will return the number of variables sent + return 1; + } + + // returning a negative number will break the traversal + return 0; +} + +static void rrdpush_sender_thread_send_custom_host_variables(RRDHOST *host) { + int ret = rrdvar_callback_for_all_host_variables(host, rrdpush_sender_thread_custom_host_variables_callback, host); + debug(D_STREAM, "RRDVAR sent %d VARIABLES", ret); +} + +// resets all the chart, so that their definitions +// will be resent to the central netdata +static void rrdpush_sender_thread_reset_all_charts(RRDHOST *host) { + rrdhost_rdlock(host); + + RRDSET *st; + rrdset_foreach_read(st, host) { + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + st->upstream_resync_time = 0; + + rrdset_rdlock(st); + + RRDDIM *rd; + rrddim_foreach_read(rd, st) + rd->exposed = 0; + + rrdset_unlock(st); + } + + rrdhost_unlock(host); +} + +static inline void rrdpush_sender_thread_data_flush(RRDHOST *host) { + rrdpush_buffer_lock(host); + + if(buffer_strlen(host->rrdpush_sender_buffer)) + error("STREAM %s [send]: discarding %zu bytes of metrics already in the buffer.", host->hostname, buffer_strlen(host->rrdpush_sender_buffer)); + + buffer_flush(host->rrdpush_sender_buffer); + + rrdpush_sender_thread_reset_all_charts(host); + rrdpush_sender_thread_send_custom_host_variables(host); + + rrdpush_buffer_unlock(host); +} + +void rrdpush_sender_thread_stop(RRDHOST *host) { + rrdpush_buffer_lock(host); + rrdhost_wrlock(host); + + netdata_thread_t thr = 0; + + if(host->rrdpush_sender_spawn) { + info("STREAM %s [send]: signaling sending thread to stop...", host->hostname); + + // signal the thread that we want to join it + host->rrdpush_sender_join = 1; + + // copy the thread id, so that we will be waiting for the right one + // even if a new one has been spawn + thr = host->rrdpush_sender_thread; + + // signal it to cancel + netdata_thread_cancel(host->rrdpush_sender_thread); + } + + rrdhost_unlock(host); + rrdpush_buffer_unlock(host); + + if(thr != 0) { + info("STREAM %s [send]: waiting for the sending thread to stop...", host->hostname); + void *result; + netdata_thread_join(thr, &result); + info("STREAM %s [send]: sending thread has exited.", host->hostname); + } +} + +static inline void rrdpush_sender_thread_close_socket(RRDHOST *host) { + host->rrdpush_sender_connected = 0; + + if(host->rrdpush_sender_socket != -1) { + close(host->rrdpush_sender_socket); + host->rrdpush_sender_socket = -1; + } +} + +static int rrdpush_sender_thread_connect_to_master(RRDHOST *host, int default_port, int timeout, size_t *reconnects_counter, char *connected_to, size_t connected_to_size) { + struct timeval tv = { + .tv_sec = timeout, + .tv_usec = 0 + }; + + // make sure the socket is closed + rrdpush_sender_thread_close_socket(host); + + debug(D_STREAM, "STREAM: Attempting to connect..."); + info("STREAM %s [send to %s]: connecting...", host->hostname, host->rrdpush_send_destination); + + host->rrdpush_sender_socket = connect_to_one_of( + host->rrdpush_send_destination + , default_port + , &tv + , reconnects_counter + , connected_to + , connected_to_size + ); + + if(unlikely(host->rrdpush_sender_socket == -1)) { + error("STREAM %s [send to %s]: failed to connect", host->hostname, host->rrdpush_send_destination); + return 0; + } + + info("STREAM %s [send to %s]: initializing communication...", host->hostname, connected_to); + + #define HTTP_HEADER_SIZE 8192 + char http[HTTP_HEADER_SIZE + 1]; + snprintfz(http, HTTP_HEADER_SIZE, + "STREAM key=%s&hostname=%s®istry_hostname=%s&machine_guid=%s&update_every=%d&os=%s&timezone=%s&tags=%s HTTP/1.1\r\n" + "User-Agent: %s/%s\r\n" + "Accept: */*\r\n\r\n" + , host->rrdpush_send_api_key + , host->hostname + , host->registry_hostname + , host->machine_guid + , default_rrd_update_every + , host->os + , host->timezone + , (host->tags)?host->tags:"" + , host->program_name + , host->program_version + ); + + if(send_timeout(host->rrdpush_sender_socket, http, strlen(http), 0, timeout) == -1) { + error("STREAM %s [send to %s]: failed to send HTTP header to remote netdata.", host->hostname, connected_to); + rrdpush_sender_thread_close_socket(host); + return 0; + } + + info("STREAM %s [send to %s]: waiting response from remote netdata...", host->hostname, connected_to); + + if(recv_timeout(host->rrdpush_sender_socket, http, HTTP_HEADER_SIZE, 0, timeout) == -1) { + error("STREAM %s [send to %s]: remote netdata does not respond.", host->hostname, connected_to); + rrdpush_sender_thread_close_socket(host); + return 0; + } + + if(strncmp(http, START_STREAMING_PROMPT, strlen(START_STREAMING_PROMPT)) != 0) { + error("STREAM %s [send to %s]: server is not replying properly (is it a netdata?).", host->hostname, connected_to); + rrdpush_sender_thread_close_socket(host); + return 0; + } + + info("STREAM %s [send to %s]: established communication - ready to send metrics...", host->hostname, connected_to); + + if(sock_setnonblock(host->rrdpush_sender_socket) < 0) + error("STREAM %s [send to %s]: cannot set non-blocking mode for socket.", host->hostname, connected_to); + + if(sock_enlarge_out(host->rrdpush_sender_socket) < 0) + error("STREAM %s [send to %s]: cannot enlarge the socket buffer.", host->hostname, connected_to); + + debug(D_STREAM, "STREAM: Connected on fd %d...", host->rrdpush_sender_socket); + + return 1; +} + +static void rrdpush_sender_thread_cleanup_callback(void *ptr) { + RRDHOST *host = (RRDHOST *)ptr; + + rrdpush_buffer_lock(host); + rrdhost_wrlock(host); + + info("STREAM %s [send]: sending thread cleans up...", host->hostname); + + rrdpush_sender_thread_close_socket(host); + + // close the pipe + if(host->rrdpush_sender_pipe[PIPE_READ] != -1) { + close(host->rrdpush_sender_pipe[PIPE_READ]); + host->rrdpush_sender_pipe[PIPE_READ] = -1; + } + + if(host->rrdpush_sender_pipe[PIPE_WRITE] != -1) { + close(host->rrdpush_sender_pipe[PIPE_WRITE]); + host->rrdpush_sender_pipe[PIPE_WRITE] = -1; + } + + buffer_free(host->rrdpush_sender_buffer); + host->rrdpush_sender_buffer = NULL; + + if(!host->rrdpush_sender_join) { + info("STREAM %s [send]: sending thread detaches itself.", host->hostname); + netdata_thread_detach(netdata_thread_self()); + } + + host->rrdpush_sender_spawn = 0; + + info("STREAM %s [send]: sending thread now exits.", host->hostname); + + rrdhost_unlock(host); + rrdpush_buffer_unlock(host); +} + +void *rrdpush_sender_thread(void *ptr) { + RRDHOST *host = (RRDHOST *)ptr; + + if(!host->rrdpush_send_enabled || !host->rrdpush_send_destination || !*host->rrdpush_send_destination || !host->rrdpush_send_api_key || !*host->rrdpush_send_api_key) { + error("STREAM %s [send]: thread created (task id %d), but host has streaming disabled.", host->hostname, gettid()); + return NULL; + } + + info("STREAM %s [send]: thread created (task id %d)", host->hostname, gettid()); + + int timeout = (int)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "timeout seconds", 60); + int default_port = (int)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "default port", 19999); + size_t max_size = (size_t)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "buffer size bytes", 1024 * 1024); + unsigned int reconnect_delay = (unsigned int)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "reconnect delay seconds", 5); + remote_clock_resync_iterations = (unsigned int)appconfig_get_number(&stream_config, CONFIG_SECTION_STREAM, "initial clock resync iterations", remote_clock_resync_iterations); + char connected_to[CONNECTED_TO_SIZE + 1] = ""; + + // initialize rrdpush globals + host->rrdpush_sender_buffer = buffer_create(1); + host->rrdpush_sender_connected = 0; + if(pipe(host->rrdpush_sender_pipe) == -1) fatal("STREAM %s [send]: cannot create required pipe.", host->hostname); + + // initialize local variables + size_t begin = 0; + size_t reconnects_counter = 0; + size_t sent_bytes = 0; + size_t sent_bytes_on_this_connection = 0; + + + time_t last_sent_t = 0; + struct pollfd fds[2], *ifd, *ofd; + nfds_t fdmax; + + ifd = &fds[0]; + ofd = &fds[1]; + + size_t not_connected_loops = 0; + + netdata_thread_cleanup_push(rrdpush_sender_thread_cleanup_callback, host); + + for(; host->rrdpush_send_enabled && !netdata_exit ;) { + // check for outstanding cancellation requests + netdata_thread_testcancel(); + + // if we don't have socket open, lets wait a bit + if(unlikely(host->rrdpush_sender_socket == -1)) { + if(not_connected_loops == 0 && sent_bytes_on_this_connection > 0) { + // fast re-connection on first disconnect + sleep_usec(USEC_PER_MS * 500); // milliseconds + } + else { + // slow re-connection on repeating errors + sleep_usec(USEC_PER_SEC * reconnect_delay); // seconds + } + + if(rrdpush_sender_thread_connect_to_master(host, default_port, timeout, &reconnects_counter, connected_to, CONNECTED_TO_SIZE)) { + last_sent_t = now_monotonic_sec(); + + // reset the buffer, to properly send charts and metrics + rrdpush_sender_thread_data_flush(host); + + // make sure the next reconnection will be immediate + not_connected_loops = 0; + + // reset the bytes we have sent for this session + sent_bytes_on_this_connection = 0; + + // let the data collection threads know we are ready + host->rrdpush_sender_connected = 1; + } + else { + // increase the failed connections counter + not_connected_loops++; + + // reset the number of bytes sent + sent_bytes_on_this_connection = 0; + } + + // loop through + continue; + } + else if(unlikely(now_monotonic_sec() - last_sent_t > timeout)) { + error("STREAM %s [send to %s]: could not send metrics for %d seconds - closing connection - we have sent %zu bytes on this connection.", host->hostname, connected_to, timeout, sent_bytes_on_this_connection); + rrdpush_sender_thread_close_socket(host); + } + + ifd->fd = host->rrdpush_sender_pipe[PIPE_READ]; + ifd->events = POLLIN; + ifd->revents = 0; + + ofd->fd = host->rrdpush_sender_socket; + ofd->revents = 0; + if(ofd->fd != -1 && begin < buffer_strlen(host->rrdpush_sender_buffer)) { + debug(D_STREAM, "STREAM: Requesting data output on streaming socket %d...", ofd->fd); + ofd->events = POLLOUT; + fdmax = 2; + } + else { + debug(D_STREAM, "STREAM: Not requesting data output on streaming socket %d (nothing to send now)...", ofd->fd); + ofd->events = 0; + fdmax = 1; + } + + debug(D_STREAM, "STREAM: Waiting for poll() events (current buffer length %zu bytes)...", buffer_strlen(host->rrdpush_sender_buffer)); + if(unlikely(netdata_exit)) break; + int retval = poll(fds, fdmax, 1000); + if(unlikely(netdata_exit)) break; + + if(unlikely(retval == -1)) { + debug(D_STREAM, "STREAM: poll() failed (current buffer length %zu bytes)...", buffer_strlen(host->rrdpush_sender_buffer)); + + if(errno == EAGAIN || errno == EINTR) { + debug(D_STREAM, "STREAM: poll() failed with EAGAIN or EINTR..."); + } + else { + error("STREAM %s [send to %s]: failed to poll(). Closing socket.", host->hostname, connected_to); + rrdpush_sender_thread_close_socket(host); + } + + continue; + } + else if(likely(retval)) { + if (ifd->revents & POLLIN || ifd->revents & POLLPRI) { + debug(D_STREAM, "STREAM: Data added to send buffer (current buffer length %zu bytes)...", buffer_strlen(host->rrdpush_sender_buffer)); + + char buffer[1000 + 1]; + if (read(host->rrdpush_sender_pipe[PIPE_READ], buffer, 1000) == -1) + error("STREAM %s [send to %s]: cannot read from internal pipe.", host->hostname, connected_to); + } + + if (ofd->revents & POLLOUT) { + if (begin < buffer_strlen(host->rrdpush_sender_buffer)) { + debug(D_STREAM, "STREAM: Sending data (current buffer length %zu bytes, begin = %zu)...", buffer_strlen(host->rrdpush_sender_buffer), begin); + + // BEGIN RRDPUSH LOCKED SESSION + + // during this session, data collectors + // will not be able to append data to our buffer + // but the socket is in non-blocking mode + // so, we will not block at send() + + netdata_thread_disable_cancelability(); + + debug(D_STREAM, "STREAM: Getting exclusive lock on host..."); + rrdpush_buffer_lock(host); + + debug(D_STREAM, "STREAM: Sending data, starting from %zu, size %zu...", begin, buffer_strlen(host->rrdpush_sender_buffer)); + ssize_t ret = send(host->rrdpush_sender_socket, &host->rrdpush_sender_buffer->buffer[begin], buffer_strlen(host->rrdpush_sender_buffer) - begin, MSG_DONTWAIT); + if (unlikely(ret == -1)) { + if (errno != EAGAIN && errno != EINTR && errno != EWOULDBLOCK) { + debug(D_STREAM, "STREAM: Send failed - closing socket..."); + error("STREAM %s [send to %s]: failed to send metrics - closing connection - we have sent %zu bytes on this connection.", host->hostname, connected_to, sent_bytes_on_this_connection); + rrdpush_sender_thread_close_socket(host); + } + else { + debug(D_STREAM, "STREAM: Send failed - will retry..."); + } + } + else if (likely(ret > 0)) { + // DEBUG - dump the string to see it + //char c = host->rrdpush_sender_buffer->buffer[begin + ret]; + //host->rrdpush_sender_buffer->buffer[begin + ret] = '\0'; + //debug(D_STREAM, "STREAM: sent from %zu to %zd:\n%s\n", begin, ret, &host->rrdpush_sender_buffer->buffer[begin]); + //host->rrdpush_sender_buffer->buffer[begin + ret] = c; + + sent_bytes_on_this_connection += ret; + sent_bytes += ret; + begin += ret; + + if (begin == buffer_strlen(host->rrdpush_sender_buffer)) { + // we send it all + + debug(D_STREAM, "STREAM: Sent %zd bytes (the whole buffer)...", ret); + buffer_flush(host->rrdpush_sender_buffer); + begin = 0; + } + else { + debug(D_STREAM, "STREAM: Sent %zd bytes (part of the data buffer)...", ret); + } + + last_sent_t = now_monotonic_sec(); + } + else { + debug(D_STREAM, "STREAM: send() returned %zd - closing the socket...", ret); + error("STREAM %s [send to %s]: failed to send metrics (send() returned %zd) - closing connection - we have sent %zu bytes on this connection.", + host->hostname, connected_to, ret, sent_bytes_on_this_connection); + rrdpush_sender_thread_close_socket(host); + } + + debug(D_STREAM, "STREAM: Releasing exclusive lock on host..."); + rrdpush_buffer_unlock(host); + + netdata_thread_enable_cancelability(); + + // END RRDPUSH LOCKED SESSION + } + else { + debug(D_STREAM, "STREAM: we have sent the entire buffer, but we received POLLOUT..."); + } + } + + if(host->rrdpush_sender_socket != -1) { + char *error = NULL; + + if (unlikely(ofd->revents & POLLERR)) + error = "socket reports errors (POLLERR)"; + + else if (unlikely(ofd->revents & POLLHUP)) + error = "connection closed by remote end (POLLHUP)"; + + else if (unlikely(ofd->revents & POLLNVAL)) + error = "connection is invalid (POLLNVAL)"; + + if(unlikely(error)) { + debug(D_STREAM, "STREAM: %s - closing socket...", error); + error("STREAM %s [send to %s]: %s - reopening socket - we have sent %zu bytes on this connection.", host->hostname, connected_to, error, sent_bytes_on_this_connection); + rrdpush_sender_thread_close_socket(host); + } + } + } + else { + debug(D_STREAM, "STREAM: poll() timed out."); + } + + // protection from overflow + if(buffer_strlen(host->rrdpush_sender_buffer) > max_size) { + debug(D_STREAM, "STREAM: Buffer is too big (%zu bytes), bigger than the max (%zu) - flushing it...", buffer_strlen(host->rrdpush_sender_buffer), max_size); + errno = 0; + error("STREAM %s [send to %s]: too many data pending - buffer is %zu bytes long, %zu unsent - we have sent %zu bytes in total, %zu on this connection. Closing connection to flush the data.", host->hostname, connected_to, host->rrdpush_sender_buffer->len, host->rrdpush_sender_buffer->len - begin, sent_bytes, sent_bytes_on_this_connection); + rrdpush_sender_thread_close_socket(host); + } + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + + +// ---------------------------------------------------------------------------- +// rrdpush receiver thread + +static void log_stream_connection(const char *client_ip, const char *client_port, const char *api_key, const char *machine_guid, const char *host, const char *msg) { + log_access("STREAM: %d '[%s]:%s' '%s' host '%s' api key '%s' machine guid '%s'", gettid(), client_ip, client_port, msg, host, api_key, machine_guid); +} + +static RRDPUSH_MULTIPLE_CONNECTIONS_STRATEGY get_multiple_connections_strategy(struct config *c, const char *section, const char *name, RRDPUSH_MULTIPLE_CONNECTIONS_STRATEGY def) { + char *value; + switch(def) { + default: + case RRDPUSH_MULTIPLE_CONNECTIONS_ALLOW: + value = "allow"; + break; + + case RRDPUSH_MULTIPLE_CONNECTIONS_DENY_NEW: + value = "deny"; + break; + } + + value = appconfig_get(c, section, name, value); + + RRDPUSH_MULTIPLE_CONNECTIONS_STRATEGY ret = def; + + if(strcasecmp(value, "allow") == 0 || strcasecmp(value, "permit") == 0 || strcasecmp(value, "accept") == 0) + ret = RRDPUSH_MULTIPLE_CONNECTIONS_ALLOW; + + else if(strcasecmp(value, "deny") == 0 || strcasecmp(value, "reject") == 0 || strcasecmp(value, "block") == 0) + ret = RRDPUSH_MULTIPLE_CONNECTIONS_DENY_NEW; + + else + error("Invalid stream config value at section [%s], setting '%s', value '%s'", section, name, value); + + return ret; +} + +static int rrdpush_receive(int fd + , const char *key + , const char *hostname + , const char *registry_hostname + , const char *machine_guid + , const char *os + , const char *timezone + , const char *tags + , const char *program_name + , const char *program_version + , int update_every + , char *client_ip + , char *client_port +) { + RRDHOST *host; + int history = default_rrd_history_entries; + RRD_MEMORY_MODE mode = default_rrd_memory_mode; + int health_enabled = default_health_enabled; + int rrdpush_enabled = default_rrdpush_enabled; + char *rrdpush_destination = default_rrdpush_destination; + char *rrdpush_api_key = default_rrdpush_api_key; + char *rrdpush_send_charts_matching = default_rrdpush_send_charts_matching; + time_t alarms_delay = 60; + RRDPUSH_MULTIPLE_CONNECTIONS_STRATEGY rrdpush_multiple_connections_strategy = RRDPUSH_MULTIPLE_CONNECTIONS_ALLOW; + + update_every = (int)appconfig_get_number(&stream_config, machine_guid, "update |