From 983a26d1a2c110b35db252b4b79c3f03eb4eeb4b Mon Sep 17 00:00:00 2001 From: "Austin S. Hemmelgarn" Date: Mon, 13 Apr 2020 10:32:33 -0400 Subject: Revert "Revert changes since v1.21 in pereparation for hotfix release." This reverts commit e2874320fc027f7ab51ab3e115d5b1889b8fd747. --- CMakeLists.txt | 2 + Makefile.am | 32 +- README.md | 3 +- aclk/agent_cloud_link.c | 120 +++--- aclk/agent_cloud_link.h | 7 +- aclk/mqtt.c | 40 +- backends/backends.h | 4 - backends/prometheus/backend_prometheus.c | 40 +- build_external/README.md | 3 +- collectors/COLLECTORS.md | 3 +- collectors/apps.plugin/apps_groups.conf | 8 +- collectors/apps.plugin/apps_plugin.c | 5 - .../charts.d.plugin/libreswan/libreswan.chart.sh | 15 + collectors/macos.plugin/README.md | 2 +- collectors/python.d.plugin/mysql/mysql.chart.py | 2 +- configure.ac | 14 - daemon/common.h | 2 + daemon/config/README.md | 3 +- daemon/main.c | 2 - docs/generator/requirements.txt | 2 +- docs/netdata-security.md | 2 +- docs/step-by-step/step-00.md | 3 +- docs/step-by-step/step-04.md | 3 +- docs/tutorials/collect-unbound-metrics.md | 3 +- docs/what-is-netdata.md | 3 +- exporting/aws_kinesis/aws_kinesis.c | 36 +- exporting/exporting_engine.c | 10 +- exporting/exporting_engine.h | 77 +++- exporting/graphite/graphite.c | 2 +- exporting/init_connectors.c | 16 +- exporting/json/json.c | 2 +- exporting/mongodb/mongodb.c | 60 ++- exporting/mongodb/mongodb.h | 3 + exporting/opentsdb/opentsdb.c | 4 +- exporting/process_data.c | 17 +- exporting/prometheus/prometheus.c | 290 +++++++++---- exporting/prometheus/prometheus.h | 6 +- exporting/prometheus/remote_write/remote_write.c | 2 +- exporting/read_config.c | 183 ++++---- exporting/send_data.c | 50 ++- exporting/send_internal_metrics.c | 166 +++++++- exporting/tests/exporting_doubles.c | 22 +- exporting/tests/exporting_fixtures.c | 32 ++ exporting/tests/netdata_doubles.c | 86 ++++ exporting/tests/test_exporting_engine.c | 471 ++++++++++++++++++++- exporting/tests/test_exporting_engine.h | 11 +- health/REFERENCE.md | 3 +- health/notifications/alarm-notify.sh.in | 84 +++- health/notifications/dynatrace/Makefile.inc | 12 + health/notifications/dynatrace/README.md | 36 ++ health/notifications/health_alarm_notify.conf | 44 +- libnetdata/config/appconfig.c | 3 +- libnetdata/config/appconfig.h | 25 +- packaging/installer/install-required-packages.sh | 6 +- packaging/installer/methods/macos.md | 2 +- packaging/maintainers/README.md | 5 +- 56 files changed, 1647 insertions(+), 442 deletions(-) create mode 100644 health/notifications/dynatrace/Makefile.inc create mode 100644 health/notifications/dynatrace/README.md diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c5254b6e6..ca0ab99946 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1112,6 +1112,8 @@ endif() -Wl,--wrap=recv -Wl,--wrap=send -Wl,--wrap=connect_to_one_of + -Wl,--wrap=create_main_rusage_chart + -Wl,--wrap=send_main_rusage ${PROMETHEUS_REMOTE_WRITE_LINK_OPTIONS} ${KINESIS_LINK_OPTIONS} ${MONGODB_LINK_OPTIONS} diff --git a/Makefile.am b/Makefile.am index e4e240cbda..be84fc09ac 100644 --- a/Makefile.am +++ b/Makefile.am @@ -573,6 +573,7 @@ NETDATA_FILES = \ $(LIBNETDATA_FILES) \ $(API_PLUGIN_FILES) \ $(BACKENDS_PLUGIN_FILES) \ + $(EXPORTING_ENGINE_FILES) \ $(CHECKS_PLUGIN_FILES) \ $(HEALTH_PLUGIN_FILES) \ $(IDLEJITTER_PLUGIN_FILES) \ @@ -608,12 +609,6 @@ if LINUX endif -if ENABLE_EXPORTING - NETDATA_FILES += \ - $(EXPORTING_ENGINE_FILES) \ - $(NULL) -endif - NETDATA_COMMON_LIBS = \ $(OPTIONAL_MATH_LIBS) \ $(OPTIONAL_ZLIB_LIBS) \ @@ -745,23 +740,13 @@ if ENABLE_PLUGIN_SLABINFO $(NULL) endif -if ENABLE_EXPORTING -if ENABLE_BACKEND_KINESIS - netdata_SOURCES += $(KINESIS_EXPORTING_FILES) - netdata_LDADD += $(OPTIONAL_KINESIS_LIBS) -endif -endif - if ENABLE_BACKEND_KINESIS - netdata_SOURCES += $(KINESIS_BACKEND_FILES) + netdata_SOURCES += $(KINESIS_BACKEND_FILES) $(KINESIS_EXPORTING_FILES) netdata_LDADD += $(OPTIONAL_KINESIS_LIBS) endif if ENABLE_BACKEND_PROMETHEUS_REMOTE_WRITE -if ENABLE_EXPORTING - netdata_SOURCES += $(PROMETHEUS_REMOTE_WRITE_EXPORTING_FILES) -endif - netdata_SOURCES += $(PROMETHEUS_REMOTE_WRITE_BACKEND_FILES) + netdata_SOURCES += $(PROMETHEUS_REMOTE_WRITE_BACKEND_FILES) $(PROMETHEUS_REMOTE_WRITE_EXPORTING_FILES) netdata_LDADD += $(OPTIONAL_PROMETHEUS_REMOTE_WRITE_LIBS) BUILT_SOURCES = \ exporting/prometheus/remote_write/remote_write.pb.cc \ @@ -775,15 +760,8 @@ exporting/prometheus/remote_write/remote_write.pb.h: exporting/prometheus/remote endif -if ENABLE_EXPORTING -if ENABLE_BACKEND_MONGODB - netdata_SOURCES += $(MONGODB_EXPORTING_FILES) - netdata_LDADD += $(OPTIONAL_MONGOC_LIBS) -endif -endif - if ENABLE_BACKEND_MONGODB - netdata_SOURCES += $(MONGODB_BACKEND_FILES) + netdata_SOURCES += $(MONGODB_BACKEND_FILES) $(MONGODB_EXPORTING_FILES) netdata_LDADD += $(OPTIONAL_MONGOC_LIBS) endif @@ -895,6 +873,8 @@ if ENABLE_UNITTESTS -Wl,--wrap=recv \ -Wl,--wrap=send \ -Wl,--wrap=connect_to_one_of \ + -Wl,--wrap=create_main_rusage_chart \ + -Wl,--wrap=send_main_rusage \ $(TEST_LDFLAGS) \ $(NULL) exporting_tests_exporting_engine_testdriver_LDADD = $(NETDATA_COMMON_LIBS) $(TEST_LIBS) diff --git a/README.md b/README.md index 2fd1ff70cc..ea93bcc6d7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ @@ -33,7 +34,7 @@ granularity. Run this long-term storage autonomously, or integrate Netdata with Netdata is **fast** and **efficient**, designed to permanently run on all systems (**physical** and **virtual** servers, **containers**, **IoT** devices), without disrupting their core function. -Netdata is **free, open-source software** and it currently runs on **Linux**, **FreeBSD**, and **MacOS**, along with +Netdata is **free, open-source software** and it currently runs on **Linux**, **FreeBSD**, and **macOS**, along with other systems derived from them, such as **Kubernetes** and **Docker**. Netdata is not hosted by the CNCF but is the 3rd most starred open-source project in the [Cloud Native Computing diff --git a/aclk/agent_cloud_link.c b/aclk/agent_cloud_link.c index 1adaf6bcce..a41d17e7bd 100644 --- a/aclk/agent_cloud_link.c +++ b/aclk/agent_cloud_link.c @@ -23,6 +23,8 @@ static char *aclk_password = NULL; static char *global_base_topic = NULL; static int aclk_connecting = 0; int aclk_connected = 0; // Exposed in the web-api +usec_t aclk_session_us = 0; // Used by the mqtt layer +time_t aclk_session_sec = 0; // Used by the mqtt layer static netdata_mutex_t aclk_mutex = NETDATA_MUTEX_INITIALIZER; static netdata_mutex_t query_mutex = NETDATA_MUTEX_INITIALIZER; @@ -185,7 +187,7 @@ biofailed: * should be called with * * mode 0 to reset the delay - * mode 1 to sleep for the calculated amount of time [0 .. ACLK_MAX_BACKOFF_DELAY * 1000] ms + * mode 1 to calculate sleep time [0 .. ACLK_MAX_BACKOFF_DELAY * 1000] ms * */ unsigned long int aclk_reconnect_delay(int mode) @@ -208,8 +210,6 @@ unsigned long int aclk_reconnect_delay(int mode) delay = (delay * 1000) + (random() % 1000); } - // sleep_usec(USEC_PER_MS * delay); - return delay; } @@ -306,7 +306,7 @@ int aclk_queue_query(char *topic, char *data, char *msg_id, char *query, int run if (tmp_query->run_after == run_after) { QUERY_UNLOCK; QUERY_THREAD_WAKEUP; - return 1; + return 0; } if (last_query) @@ -750,8 +750,8 @@ int aclk_execute_query(struct aclk_query *this_query) buffer_flush(local_buffer); local_buffer->contenttype = CT_APPLICATION_JSON; - aclk_create_header(local_buffer, "http", this_query->msg_id); - + aclk_create_header(local_buffer, "http", this_query->msg_id, 0, 0); + buffer_strcat(local_buffer, ",\n\t\"payload\": "); char *encoded_response = aclk_encode_response(w->response.data); buffer_sprintf( @@ -821,11 +821,6 @@ int aclk_process_query() aclk_send_message(this_query->topic, this_query->query, this_query->msg_id); break; - case ACLK_CMD_ALARMS: - debug(D_ACLK, "EXECUTING an alarms update command"); - aclk_send_alarm_metadata(); - break; - case ACLK_CMD_CLOUD: debug(D_ACLK, "EXECUTING a cloud command"); aclk_execute_query(this_query); @@ -868,18 +863,22 @@ int aclk_process_queries() static void aclk_query_thread_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; - static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; info("cleaning up..."); - COLLECTOR_LOCK; - _reset_collector_list(); freez(collector_list); - COLLECTOR_UNLOCK; + // Clean memory for pending queries if any + struct aclk_query *this_query; - static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; + do { + this_query = aclk_queue_pop(); + aclk_query_free(this_query); + } while (this_query); + + freez(static_thread->thread); + freez(static_thread); } /** @@ -916,7 +915,7 @@ void *aclk_query_main_thread(void *ptr) if (unlikely(aclk_queue_query("on_connect", NULL, NULL, NULL, 0, 1, ACLK_CMD_ONCONNECT))) { errno = 0; error("ACLK failed to queue on_connect command"); - aclk_metadata_submitted = 0; + aclk_metadata_submitted = ACLK_METADATA_REQUIRED; } } @@ -939,7 +938,6 @@ void *aclk_query_main_thread(void *ptr) // Thread cleanup static void aclk_main_cleanup(void *ptr) { - char payload[512]; struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; @@ -952,24 +950,11 @@ static void aclk_main_cleanup(void *ptr) // Wakeup thread to cleanup QUERY_THREAD_WAKEUP; // Send a graceful disconnect message - char *msg_id = create_uuid(); - - usec_t time_created_offset_usec = now_realtime_usec(); - time_t time_created = time_created_offset_usec / USEC_PER_SEC; - time_created_offset_usec = time_created_offset_usec % USEC_PER_SEC; - - snprintfz( - payload, 511, - "{ \"type\": \"disconnect\"," - " \"msg-id\": \"%s\"," - " \"timestamp\": %ld," - " \"timestamp-offset-usec\": %llu," - " \"version\": %d," - " \"payload\": \"graceful\" }", - msg_id, time_created, time_created_offset_usec, ACLK_VERSION); - - aclk_send_message(ACLK_METADATA_TOPIC, payload, msg_id); - freez(msg_id); + BUFFER *b = buffer_create(512); + aclk_create_header(b, "disconnect", NULL, 0, 0); + buffer_strcat(b, ",\n\t\"payload\": \"graceful\"}\n"); + aclk_send_message(ACLK_METADATA_TOPIC, (char*)buffer_tostring(b), NULL); + buffer_free(b); event_loop_timeout = now_realtime_sec() + 5; write_q = 1; @@ -990,7 +975,6 @@ static void aclk_main_cleanup(void *ptr) } } - info("Disconnected"); static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } @@ -1295,7 +1279,6 @@ void *aclk_main(void *ptr) { struct netdata_static_thread *query_thread; - netdata_thread_cleanup_push(aclk_main_cleanup, ptr); if (!netdata_cloud_setting) { info("Killing ACLK thread -> cloud functionality has been disabled"); return NULL; @@ -1335,10 +1318,11 @@ void *aclk_main(void *ptr) sleep_usec(USEC_PER_SEC * 60); } create_publish_base_topic(); - create_private_key(); usec_t reconnect_expiry = 0; // In usecs + netdata_thread_disable_cancelability(); + while (!netdata_exit) { static int first_init = 0; size_t write_q, write_q_bytes, read_q; @@ -1392,7 +1376,8 @@ void *aclk_main(void *ptr) } } // forever exited: - aclk_shutdown(); + // Wakeup query thread to cleanup + QUERY_THREAD_WAKEUP; freez(aclk_username); freez(aclk_password); @@ -1401,7 +1386,7 @@ exited: if (aclk_private_key != NULL) RSA_free(aclk_private_key); - netdata_thread_cleanup_pop(1); + aclk_main_cleanup(ptr); return NULL; } @@ -1514,7 +1499,7 @@ void aclk_shutdown() info("Shutdown complete"); } -inline void aclk_create_header(BUFFER *dest, char *type, char *msg_id) +inline void aclk_create_header(BUFFER *dest, char *type, char *msg_id, time_t ts_secs, usec_t ts_us) { uuid_t uuid; char uuid_str[36 + 1]; @@ -1525,9 +1510,11 @@ inline void aclk_create_header(BUFFER *dest, char *type, char *msg_id) msg_id = uuid_str; } - usec_t time_created_offset_usec = now_realtime_usec(); - time_t time_created = time_created_offset_usec / USEC_PER_SEC; - time_created_offset_usec = time_created_offset_usec % USEC_PER_SEC; + if (ts_secs == 0) { + ts_us = now_realtime_usec(); + ts_secs = ts_us / USEC_PER_SEC; + ts_us = ts_us % USEC_PER_SEC; + } buffer_sprintf( dest, @@ -1535,11 +1522,12 @@ inline void aclk_create_header(BUFFER *dest, char *type, char *msg_id) "\t\"msg-id\": \"%s\",\n" "\t\"timestamp\": %ld,\n" "\t\"timestamp-offset-usec\": %llu,\n" - "\t\"version\": %d,\n" - "\t\"payload\": ", - type, msg_id, time_created, time_created_offset_usec, ACLK_VERSION); + "\t\"connect\": %ld,\n" + "\t\"connect-offset-usec\": %llu,\n" + "\t\"version\": %d", + type, msg_id, ts_secs, ts_us, aclk_session_sec, aclk_session_us, ACLK_VERSION); - debug(D_ACLK, "Sending v%d msgid [%s] type [%s] time [%ld]", ACLK_VERSION, msg_id, type, time_created); + debug(D_ACLK, "Sending v%d msgid [%s] type [%s] time [%ld]", ACLK_VERSION, msg_id, type, ts_secs); } /* @@ -1599,7 +1587,15 @@ void aclk_send_alarm_metadata() debug(D_ACLK, "Metadata alarms start"); - aclk_create_header(local_buffer, "connect_alarms", msg_id); + // on_connect messages are sent on a health reload, if the on_connect message is real then we + // use the session time as the fake timestamp to indicate that it starts the session. If it is + // a fake on_connect message then use the real timestamp to indicate it is within the existing + // session. + if (aclk_metadata_submitted == ACLK_METADATA_SENT) + aclk_create_header(local_buffer, "connect_alarms", msg_id, 0, 0); + else + aclk_create_header(local_buffer, "connect_alarms", msg_id, aclk_session_sec, aclk_session_us); + buffer_strcat(local_buffer, ",\n\t\"payload\": "); buffer_sprintf(local_buffer, "{\n\t \"configured-alarms\" : "); health_alarms2json(localhost, local_buffer, 1); @@ -1635,7 +1631,16 @@ int aclk_send_info_metadata() buffer_flush(local_buffer); local_buffer->contenttype = CT_APPLICATION_JSON; - aclk_create_header(local_buffer, "connect", msg_id); + // on_connect messages are sent on a health reload, if the on_connect message is real then we + // use the session time as the fake timestamp to indicate that it starts the session. If it is + // a fake on_connect message then use the real timestamp to indicate it is within the existing + // session. + if (aclk_metadata_submitted == ACLK_METADATA_SENT) + aclk_create_header(local_buffer, "connect", msg_id, 0, 0); + else + aclk_create_header(local_buffer, "connect", msg_id, aclk_session_sec, aclk_session_us); + buffer_strcat(local_buffer, ",\n\t\"payload\": "); + buffer_sprintf(local_buffer, "{\n\t \"info\" : "); web_client_api_request_v1_info_fill_buffer(localhost, local_buffer); debug(D_ACLK, "Metadata %s with info has %zu bytes", msg_id, local_buffer->len); @@ -1728,7 +1733,9 @@ int aclk_send_single_chart(char *hostname, char *chart) buffer_flush(local_buffer); local_buffer->contenttype = CT_APPLICATION_JSON; - aclk_create_header(local_buffer, "chart", msg_id); + aclk_create_header(local_buffer, "chart", msg_id, 0, 0); + buffer_strcat(local_buffer, ",\n\t\"payload\": "); + rrdset2json(st, local_buffer, NULL, NULL, 1); buffer_sprintf(local_buffer, "\t\n}"); @@ -1793,7 +1800,8 @@ int aclk_update_alarm(RRDHOST *host, ALARM_ENTRY *ae) char *msg_id = create_uuid(); buffer_flush(local_buffer); - aclk_create_header(local_buffer, "status-change", msg_id); + aclk_create_header(local_buffer, "status-change", msg_id, 0, 0); + buffer_strcat(local_buffer, ",\n\t\"payload\": "); netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock); health_alarm_entry2json_nolock(local_buffer, ae, host); @@ -1863,6 +1871,12 @@ int aclk_handle_cloud_request(char *payload) return 1; } + // Checked to be "http", not needed anymore + if (likely(cloud_to_agent.type_id)) { + freez(cloud_to_agent.type_id); + cloud_to_agent.type_id = NULL; + } + if (unlikely(aclk_submit_request(&cloud_to_agent))) debug(D_ACLK, "ACLK failed to queue incoming message (%s)", payload); diff --git a/aclk/agent_cloud_link.h b/aclk/agent_cloud_link.h index faf4932f84..f147669e5d 100644 --- a/aclk/agent_cloud_link.h +++ b/aclk/agent_cloud_link.h @@ -44,7 +44,6 @@ typedef enum aclk_cmd { ACLK_CMD_CHART, ACLK_CMD_CHARTDEL, ACLK_CMD_ALARM, - ACLK_CMD_ALARMS, ACLK_CMD_MAX } ACLK_CMD; @@ -74,16 +73,12 @@ void *aclk_main(void *ptr); extern int aclk_send_message(char *sub_topic, char *message, char *msg_id); -//int aclk_init(); -//char *get_base_topic(); - extern char *is_agent_claimed(void); extern void aclk_lws_wss_mqtt_layer_disconect_notif(); char *create_uuid(); // callbacks for agent cloud link int aclk_subscribe(char *topic, int qos); -void aclk_shutdown(); int cloud_to_agent_parse(JSON_ENTRY *e); void aclk_disconnect(); void aclk_connect(); @@ -98,7 +93,7 @@ struct aclk_query * aclk_query_find(char *token, char *data, char *msg_id, char *query, ACLK_CMD cmd, struct aclk_query **last_query); int aclk_update_chart(RRDHOST *host, char *chart_name, ACLK_CMD aclk_cmd); int aclk_update_alarm(RRDHOST *host, ALARM_ENTRY *ae); -void aclk_create_header(BUFFER *dest, char *type, char *msg_id); +void aclk_create_header(BUFFER *dest, char *type, char *msg_id, time_t ts_secs, usec_t ts_us); int aclk_handle_cloud_request(char *payload); int aclk_submit_request(struct aclk_request *); void aclk_add_collector(const char *hostname, const char *plugin_name, const char *module_name); diff --git a/aclk/mqtt.c b/aclk/mqtt.c index dad32b578b..b070f7fb09 100644 --- a/aclk/mqtt.c +++ b/aclk/mqtt.c @@ -5,6 +5,9 @@ #include "mqtt.h" #include "aclk_lws_wss_client.h" +extern usec_t aclk_session_us; +extern time_t aclk_session_sec; + inline const char *_link_strerror(int rc) { return mosquitto_strerror(rc); @@ -49,7 +52,12 @@ void disconnect_callback(struct mosquitto *mosq, void *obj, int rc) UNUSED(obj); UNUSED(rc); - info("Connection to cloud failed"); + if (netdata_exit) + info("Connection to cloud terminated due to agent shutdown"); + else { + errno = 0; + error("Connection to cloud failed"); + } aclk_disconnect(); aclk_lws_wss_mqtt_layer_disconect_notif(); @@ -131,6 +139,11 @@ static int _mqtt_create_connection(char *username, char *password) return MOSQ_ERR_UNKNOWN; } + // Record the session start time to allow a nominal LWT timestamp + usec_t now = now_realtime_usec(); + aclk_session_sec = now / USEC_PER_SEC; + aclk_session_us = now % USEC_PER_SEC; + _link_set_lwt("outbound/meta", 2); mosquitto_connect_callback_set(mosq, connect_callback); @@ -259,7 +272,6 @@ int _link_set_lwt(char *sub_topic, int qos) { int rc; char topic[ACLK_MAX_TOPIC + 1]; - char payload[512]; char *final_topic; final_topic = get_topic(sub_topic, topic, ACLK_MAX_TOPIC); @@ -269,25 +281,13 @@ int _link_set_lwt(char *sub_topic, int qos) return 1; } - usec_t time_created_offset_usec = now_realtime_usec(); - time_t time_created = time_created_offset_usec / USEC_PER_SEC; - time_created_offset_usec = time_created_offset_usec % USEC_PER_SEC; - - char *msg_id = create_uuid(); - - snprintfz( - payload, 511, - "{ \"type\": \"disconnect\"," - " \"msg-id\": \"%s\"," - " \"timestamp\": %ld," - " \"timestamp-offset-usec\": %llu," - " \"version\": %d," - " \"payload\": \"unexpected\" }", - msg_id, time_created, time_created_offset_usec, ACLK_VERSION); - - freez(msg_id); + usec_t lwt_time = aclk_session_sec * USEC_PER_SEC + aclk_session_us + 1; + BUFFER *b = buffer_create(512); + aclk_create_header(b, "disconnect", NULL, lwt_time / USEC_PER_SEC, lwt_time % USEC_PER_SEC); + buffer_strcat(b, ", \"payload\": \"unexpected\" }"); + rc = mosquitto_will_set(mosq, topic, buffer_strlen(b), buffer_tostring(b), qos, 0); + buffer_free(b); - rc = mosquitto_will_set(mosq, topic, strlen(payload), (const void *) payload, qos, 0); return rc; } diff --git a/backends/backends.h b/backends/backends.h index 212823a078..efa88a7f22 100644 --- a/backends/backends.h +++ b/backends/backends.h @@ -27,10 +27,6 @@ typedef enum backend_types { BACKEND_TYPE_NUM // Number of backend types } BACKEND_TYPE; -#ifdef ENABLE_EXPORTING -#include "exporting/exporting_engine.h" -#endif - typedef int (**backend_response_checker_t)(BUFFER *); typedef int (**backend_request_formatter_t)(BUFFER *, const char *, RRDHOST *, const char *, RRDSET *, RRDDIM *, time_t, time_t, BACKEND_OPTIONS); diff --git a/backends/prometheus/backend_prometheus.c b/backends/prometheus/backend_prometheus.c index b3f955e15f..0a7b3a3391 100644 --- a/backends/prometheus/backend_prometheus.c +++ b/backends/prometheus/backend_prometheus.c @@ -44,7 +44,7 @@ static inline time_t prometheus_server_last_access(const char *server, RRDHOST * return 0; } -static inline size_t prometheus_name_copy(char *d, const char *s, size_t usable) { +static inline size_t backends_prometheus_name_copy(char *d, const char *s, size_t usable) { size_t n; for(n = 0; *s && n < usable ; d++, s++, n++) { @@ -58,7 +58,7 @@ static inline size_t prometheus_name_copy(char *d, const char *s, size_t usable) return n; } -static inline size_t prometheus_label_copy(char *d, const char *s, size_t usable) { +static inline size_t backends_prometheus_label_copy(char *d, const char *s, size_t usable) { size_t n; // make sure we can escape one character without overflowing the buffer @@ -78,7 +78,7 @@ static inline size_t prometheus_label_copy(char *d, const char *s, size_t usable return n; } -static inline char *prometheus_units_copy(char *d, const char *s, size_t usable, int showoldunits) { +static inline char *backends_prometheus_units_copy(char *d, const char *s, size_t usable, int showoldunits) { const char *sorig = s; char *ret = d; size_t n; @@ -194,7 +194,7 @@ static int print_host_variables(RRDVAR *rv, void *data) { label_post = "}"; } - prometheus_name_copy(opts->name, rv->name, sizeof(opts->name)); + backends_prometheus_name_copy(opts->name, rv->name, sizeof(opts->name)); if(opts->output_options & BACKENDS_PROMETHEUS_OUTPUT_TIMESTAMPS) buffer_sprintf(opts->wb @@ -227,7 +227,7 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER rrdhost_rdlock(host); char hostname[PROMETHEUS_ELEMENT_MAX + 1]; - prometheus_label_copy(hostname, host->hostname, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_label_copy(hostname, host->hostname, PROMETHEUS_ELEMENT_MAX); char labels[PROMETHEUS_LABELS_MAX + 1] = ""; if(allhosts) { @@ -299,9 +299,9 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER char family[PROMETHEUS_ELEMENT_MAX + 1]; char units[PROMETHEUS_ELEMENT_MAX + 1] = ""; - prometheus_label_copy(chart, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && st->name)?st->name:st->id, PROMETHEUS_ELEMENT_MAX); - prometheus_label_copy(family, st->family, PROMETHEUS_ELEMENT_MAX); - prometheus_name_copy(context, st->context, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_label_copy(chart, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && st->name)?st->name:st->id, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_label_copy(family, st->family, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_name_copy(context, st->context, PROMETHEUS_ELEMENT_MAX); if(likely(backends_can_send_rrdset(backend_options, st))) { rrdset_rdlock(st); @@ -317,7 +317,7 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER } else { if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AVERAGE && !(output_options & BACKENDS_PROMETHEUS_OUTPUT_HIDEUNITS)) - prometheus_units_copy(units, st->units, PROMETHEUS_ELEMENT_MAX, output_options & BACKENDS_PROMETHEUS_OUTPUT_OLDUNITS); + backends_prometheus_units_copy(units, st->units, PROMETHEUS_ELEMENT_MAX, output_options & BACKENDS_PROMETHEUS_OUTPUT_OLDUNITS); } if(unlikely(output_options & BACKENDS_PROMETHEUS_OUTPUT_HELP)) @@ -354,7 +354,7 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER // all the dimensions of the chart, has the same algorithm, multiplier and divisor // we add all dimensions as labels - prometheus_label_copy(dimension, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_label_copy(dimension, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); if(unlikely(output_options & BACKENDS_PROMETHEUS_OUTPUT_HELP)) buffer_sprintf(wb @@ -411,7 +411,7 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER // the dimensions of the chart, do not have the same algorithm, multiplier or divisor // we create a metric per dimension - prometheus_name_copy(dimension, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_name_copy(dimension, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); if(unlikely(output_options & BACKENDS_PROMETHEUS_OUTPUT_HELP)) buffer_sprintf(wb @@ -480,7 +480,7 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER else if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_SUM) suffix = "_sum"; - prometheus_label_copy(dimension, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_label_copy(dimension, (output_options & BACKENDS_PROMETHEUS_OUTPUT_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); if (unlikely(output_options & BACKENDS_PROMETHEUS_OUTPUT_HELP)) buffer_sprintf(wb, "# COMMENT %s_%s%s%s: dimension \"%s\", value is %s, gauge, dt %llu to %llu inclusive\n" @@ -593,7 +593,7 @@ void backends_rrd_stats_remote_write_allmetrics_prometheus( , size_t *count_dims_skipped ) { char hostname[PROMETHEUS_ELEMENT_MAX + 1]; - prometheus_label_copy(hostname, __hostname, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_label_copy(hostname, __hostname, PROMETHEUS_ELEMENT_MAX); backends_add_host_info("netdata_info", hostname, host->program_name, host->program_version, now_realtime_usec() / USEC_PER_MS); @@ -620,9 +620,9 @@ void backends_rrd_stats_remote_write_allmetrics_prometheus( char family[PROMETHEUS_ELEMENT_MAX + 1]; char units[PROMETHEUS_ELEMENT_MAX + 1] = ""; - prometheus_label_copy(chart, (backend_options & BACKEND_OPTION_SEND_NAMES && st->name)?st->name:st->id, PROMETHEUS_ELEMENT_MAX); - prometheus_label_copy(family, st->family, PROMETHEUS_ELEMENT_MAX); - prometheus_name_copy(context, st->context, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_label_copy(chart, (backend_options & BACKEND_OPTION_SEND_NAMES && st->name)?st->name:st->id, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_label_copy(family, st->family, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_name_copy(context, st->context, PROMETHEUS_ELEMENT_MAX); if(likely(backends_can_send_rrdset(backend_options, st))) { rrdset_rdlock(st); @@ -640,7 +640,7 @@ void backends_rrd_stats_remote_write_allmetrics_prometheus( } else { if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AVERAGE) - prometheus_units_copy(units, st->units, PROMETHEUS_ELEMENT_MAX, 0); + backends_prometheus_units_copy(units, st->units, PROMETHEUS_ELEMENT_MAX, 0); } // for each dimension @@ -664,7 +664,7 @@ void backends_rrd_stats_remote_write_allmetrics_prometheus( // all the dimensions of the chart, has the same algorithm, multiplier and divisor // we add all dimensions as labels - prometheus_label_copy(dimension, (backend_options & BACKEND_OPTION_SEND_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_label_copy(dimension, (backend_options & BACKEND_OPTION_SEND_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s%s", prefix, context, suffix); backends_add_metric(name, chart, family, dimension, hostname, rd->last_collected_value, timeval_msec(&rd->last_collected_time)); @@ -674,7 +674,7 @@ void backends_rrd_stats_remote_write_allmetrics_prometheus( // the dimensions of the chart, do not have the same algorithm, multiplier or divisor // we create a metric per dimension - prometheus_name_copy(dimension, (backend_options & BACKEND_OPTION_SEND_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_name_copy(dimension, (backend_options & BACKEND_OPTION_SEND_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s_%s%s", prefix, context, dimension, suffix); backends_add_metric(name, chart, family, NULL, hostname, rd->last_collected_value, timeval_msec(&rd->last_collected_time)); @@ -694,7 +694,7 @@ void backends_rrd_stats_remote_write_allmetrics_prometheus( else if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_SUM) suffix = "_sum"; - prometheus_label_copy(dimension, (backend_options & BACKEND_OPTION_SEND_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); + backends_prometheus_label_copy(dimension, (backend_options & BACKEND_OPTION_SEND_NAMES && rd->name) ? rd->name : rd->id, PROMETHEUS_ELEMENT_MAX); snprintf(name, PROMETHEUS_LABELS_MAX, "%s_%s%s%s", prefix, context, units, suffix); backends_add_metric(name, chart, family, dimension, hostname, value, last_t * MSEC_PER_SEC); diff --git a/build_external/README.md b/build_external/README.md index 3614af1135..d04851e28e 100644 --- a/build_external/README.md +++ b/build_external/README.md @@ -1,6 +1,7 @@ @@ -9,7 +10,7 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/build_external/R This wraps the build-system in Docker so that the host system and the target system are decoupled. This allows: -* Cross-compilation (e.g. linux development from MacOS) +* Cross-compilation (e.g. linux development from macOS) * Cross-distro (e.g. using CentOS user-land while developing on Debian) * Multi-host scenarios (e.g. master/slave configurations) * Bleeding-edge sceneraios (e.g. using the ACLK (**currently for internal-use only**)) diff --git a/collectors/COLLECTORS.md b/collectors/COLLECTORS.md index 190f45f8fd..e62d9f9d00 100644 --- a/collectors/COLLECTORS.md +++ b/collectors/COLLECTORS.md @@ -1,6 +1,7 @@ @@ -37,7 +38,7 @@ collector—we may be looking for contributions from users such as yourself! | [diskspace.plugin](diskspace.plugin/README.md) | Linux | Collects disk space usage metrics on Linux mount points. | | [freebsd.plugin](freebsd.plugin/README.md) | FreeBSD | Collects resource usage and performance data on FreeBSD systems. | | [idlejitter.plugin](idlejitter.plugin/README.md) | any | Measures CPU latency and jitter on all operating systems. | -| [macos.plugin](macos.plugin/README.md) | macos | Collects resource usage and performance data on MacOS systems. | +| [macos.plugin](macos.plugin/README.md) | macos | Collects resource usage and performance data on macOS systems. | | [proc.plugin](proc.plugin/README.md) | Linux | Collects resource usage and performance data on Linux systems. | | [slabinfo.plugin](slabinfo.plugin/README.md) | Linux | Collects kernel SLAB details on Linux systems. | | [statsd.plugin](statsd.plugin/README.md) | any | Implements a high performance `statsd` server for Netdata. | diff --git a/collectors/apps.plugin/apps_groups.conf b/collectors/apps.plugin/apps_groups.conf index c0f82acfca..2ed6192b59 100644 --- a/collectors/apps.plugin/apps_groups.conf +++ b/collectors/apps.plugin/apps_groups.conf @@ -96,7 +96,7 @@ fail2ban: fail2ban* # ----------------------------------------------------------------------------- # web/ftp servers -httpd: apache* httpd nginx* lighttpd +httpd: apache* httpd nginx* lighttpd hiawatha proxy: squid* c-icap squidGuard varnish* php: php* ftpd: proftpd in.tftpd vsftpd @@ -107,7 +107,7 @@ puma: *puma* # ----------------------------------------------------------------------------- # database servers -sql: mysqld* mariad* postgres* postmaster* oracle_* ora_* +sql: mysqld* mariad* postgres* postmaster* oracle_* ora_* sqlservr nosql: mongod redis* memcached *couchdb* timedb: prometheus *carbon-cache.py* *carbon-aggregator.py* *graphite/manage.py* *net.opentsdb.tools.TSDMain* influxd* columndb: clickhouse-server* @@ -223,7 +223,7 @@ torrents: *deluge* transmission* *SickBeard* *CouchPotato* *rtorrent* # ----------------------------------------------------------------------------- # backup servers and clients -backup: rsync lsyncd bacula* +backup: rsync lsyncd bacula* borg # ----------------------------------------------------------------------------- # cron @@ -239,7 +239,7 @@ ups: upsmon upsd */nut/* # media players, servers, clients media: mplayer vlc xine mediatomb omxplayer* kodi* xbmc* mediacenter eventlircd -media: mpd minidlnad mt-daapd avahi* Plex* +media: mpd minidlnad mt-daapd avahi* Plex* squeeze* # ----------------------------------------------------------------------------- # java applications diff --git a/collectors/apps.plugin/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c index 5f40270132..31a241c169 100644 --- a/collectors/apps.plugin/apps_plugin.c +++ b/collectors/apps.plugin/apps_plugin.c @@ -4110,8 +4110,6 @@ int main(int argc, char **argv) { procfile_adaptive_initial_allocation = 1; - time_t started_t = now_monotonic_sec(); - get_system_HZ(); #ifdef __FreeBSD__ time_factor = 1000000ULL / RATES_DETAIL; // FreeBSD uses usecs @@ -4212,8 +4210,5 @@ int main(int argc, char **argv) { show_guest_time_old = show_guest_time; debug_log("done Loop No %zu", global_iterations_counter); - - // restart check (14400 seconds) - if(now_monotonic_sec() - started_t > 14400) exit(0); } } diff --git a/collectors/charts.d.plugin/libreswan/libreswan.chart.sh b/collectors/charts.d.plugin/libreswan/libreswan.chart.sh index 3d0d3e3f7d..1320983a53 100644 --- a/collectors/charts.d.plugin/libreswan/libreswan.chart.sh +++ b/collectors/charts.d.plugin/libreswan/libreswan.chart.sh @@ -37,6 +37,16 @@ declare -A libreswan_established_add_time=() # we need this to avoid converting tunnel names to chart IDs on every iteration declare -A libreswan_tunnel_charts=() +is_able_sudo_ipsec() { + if ! sudo -n -l "${IPSEC_CMD}" whack --status > /dev/null 2>&1; then + return 1 + fi + if ! sudo -n -l "${IPSEC_CMD}" whack --trafficstatus > /dev/null 2>&1; then + return 1 + fi + return 0 +} + # run the ipsec command libreswan_ipsec() { if [ ${libreswan_sudo} -ne 0 ]; then @@ -92,6 +102,11 @@ libreswan_check() { return 1 fi + if [ ${libreswan_sudo} -ne 0 ] && ! is_able_sudo_ipsec; then + error "not enough permissions to execute ipsec with sudo. Disabling Libreswan plugin." + return 1 + fi + # check that we can collect data libreswan_get || return 1 diff --git a/collectors/macos.plugin/README.md b/collectors/macos.plugin/README.md index 6655ab2317..7e61efcd04 100644 --- a/collectors/macos.plugin/README.md +++ b/collectors/macos.plugin/README.md @@ -7,7 +7,7 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/macos # macos.plugin -Collects resource usage and performance data on MacOS systems +Collects resource usage and performance data on macOS systems By default, Netdata will enable monitoring metrics for disks, memory, and network only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins. diff --git a/collectors/python.d.plugin/mysql/mysql.chart.py b/collectors/python.d.plugin/mysql/mysql.chart.py index a2109f6ba9..1737e16b4d 100644 --- a/collectors/python.d.plugin/mysql/mysql.chart.py +++ b/collectors/python.d.plugin/mysql/mysql.chart.py @@ -347,7 +347,7 @@ CHARTS = { ] }, 'threads_creation_rate': { - 'options': [None, 'Threads Creation Rate', 'threads/s', 'threads', 'mysql.threads', 'line'], + 'options': [None, 'Threads Creation Rate', 'threads/s', 'threads', 'mysql.threads_creation_rate', 'line'], 'lines': [ ['Threads_created', 'created', 'incremental'], ] diff --git a/configure.ac b/configure.ac index 7347dac1d5..463a70cbbc 100644 --- a/configure.ac +++ b/configure.ac @@ -433,20 +433,6 @@ fi AC_MSG_RESULT([${enable_https}]) AM_CONDITIONAL([ENABLE_HTTPS], [test "${enable_https}" = "yes"]) -# ----------------------------------------------------------------------------- -# Exporting engine -AC_MSG_CHECKING([if netdata exporting engine should be used]) -if test "${UV_LIBS}"; then - enable_exporting_engine="yes" - AC_DEFINE([ENABLE_EXPORTING], [1], [netdata exporting engine usability]) - OPTIONAL_UV_CFLAGS="${UV_CFLAGS}" - OPTIONAL_UV_LIBS="${UV_LIBS}" -else - enable_exporting_engine="no" -fi -AC_MSG_RESULT([${enable_exporting_engine}]) -AM_CONDITIONAL([ENABLE_EXPORTING], [test "${enable_exporting_engine}" = "yes"]) - # ----------------------------------------------------------------------------- # JSON-C test "${enable_jsonc}" = "yes" -a -z "${JSONC_LIBS}" && \ diff --git a/daemon/common.h b/daemon/common.h index fe799efe09..f86e61543f 100644 --- a/daemon/common.h +++ b/daemon/common.h @@ -50,6 +50,8 @@ // backends for archiving the metrics #include "backends/backends.h" +// the new exporting engine for archiving the metrics +#include "exporting/exporting_engine.h" // the netdata API #include "web/api/web_api_v1.h" diff --git a/daemon/config/README.md b/daemon/config/README.md index 73fbf4fa5c..01913747d3 100644 --- a/daemon/config/README.md +++ b/daemon/config/README.md @@ -1,6 +1,7 @@ @@ -220,7 +221,7 @@ For example, the `system.io` chart has the following default settings: These `dim` settings produce two dimensions, `in` and `out`, both of which use the `incremental` algorithm. By multiplying the value of `out` by -1, Netdata creates the negative values seen in the following area chart: -![The system.io chart on a MacOS +![The system.io chart on a macOS laptop](https://user-images.githubusercontent.com/1153921/69286708-2cfb3900-0bb1-11ea-9fcd-dd8fbb2adf11.png) [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdaemon%2Fconfig%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>) diff --git a/daemon/main.c b/daemon/main.c index 20ca7d883e..e0de2c7357 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -80,9 +80,7 @@ struct netdata_static_thread static_threads[] = { // common plugins for all systems {"BACKENDS", NULL, NULL, 1, NULL, NULL, backends_main}, -#ifdef ENABLE_EXPORTING {"EXPORTING", NULL, NULL, 1, NULL, NULL, exporting_main}, -#endif {"WEB_SERVER[static1]", NULL, NULL, 0, NULL, NULL, socket_listen_main_static_threaded}, {"STREAM", NULL, NULL, 0, NULL, NULL, rrdpush_sender_thread}, diff --git a/docs/generator/requirements.txt b/docs/generator/requirements.txt index ac01be7aef..b68297aa6a 100644 --- a/docs/generator/requirements.txt +++ b/docs/generator/requirements.txt @@ -1,2 +1,2 @@ mkdocs>=1.0.1 -mkdocs-material +mkdocs-material==4.6.3 diff --git a/docs/netdata-security.md b/docs/netdata-security.md index f149f2d453..821aaf3646 100644 --- a/docs/netdata-security.md +++ b/docs/netdata-security.md @@ -81,7 +81,7 @@ You can bind Netdata to multiple IPs and ports. If you use hostnames, Netdata wi For cloud based installations, if your cloud provider does not provide such a private LAN (or if you use multiple providers), you can create a virtual management and administration LAN with tools like `tincd` or `gvpe`. These tools create a mesh VPN allowing all servers to communicate securely and privately. Your administration stations join this mesh VPN to get access to management and administration tasks on all your cloud servers. -For `gvpe` we have developed a [simple provisioning tool](https://github.com/netdata/netdata-demo-site/tree/master/gvpe) you may find handy (it includes statically compiled `gvpe` binaries for Linux and FreeBSD, and also a script to compile `gvpe` on your Mac). We use this to create a management and administration LAN for all Netdata demo sites (spread all over the internet using multiple hosting providers). +For `gvpe` we have developed a [simple provisioning tool](https://github.com/netdata/netdata-demo-site/tree/master/gvpe) you may find handy (it includes statically compiled `gvpe` binaries for Linux and FreeBSD, and also a script to compile `gvpe` on your macOS system). We use this to create a management and administration LAN for all Netdata demo sites (spread all over the internet using multiple hosting providers). --- diff --git a/docs/step-by-step/step-00.md b/docs/step-by-step/step-00.md index f01dcb6532..2ff5cced58 100644 --- a/docs/step-by-step/step-00.md +++ b/docs/step-by-step/step-00.md @@ -1,6 +1,7 @@ @@ -21,7 +22,7 @@ If you have monitoring experience, or would rather get straight into configuring straight into code and configurations with our [getting started guide](../getting-started.md). > This tutorial contains instructions for Netdata installed on a Linux system. Many of the instructions will work on -> other supported operating systems, like FreeBSD and MacOS, but we can't make any guarantees. +> other supported operating systems, like FreeBSD and macOS, but we can't make any guarantees. ## Where to go if you need help diff --git a/docs/step-by-step/step-04.md b/docs/step-by-step/step-04.md index d403e208d7..8b63be34cd 100644 --- a/docs/step-by-step/step-04.md +++ b/docs/step-by-step/step-04.md @@ -1,6 +1,7 @@ @@ -61,7 +62,7 @@ an example file to your Netdata config directory and then allow you to edit it b > change permanent](https://stackoverflow.com/questions/13046624/how-to-permanently-export-a-variable-in-linux). Let's give it a shot. Navigate to your Netdata config directory. To use `edit-config` on `netdata.conf`, you need to -have permissions to edit the file. On Linux/MacOS systems, you can usually use `sudo` to elevate your permissions. +have permissions to edit the file. On Linux/macOS systems, you can usually use `sudo` to elevate your permissions. ```bash cd /etc/netdata # Replace this path with your Netdata config directory, if different as found in the steps above diff --git a/docs/tutorials/collect-unbound-metrics.md b/docs/tutorials/collect-unbound-metrics.md index 4fc7242d68..410bda84ce 100644 --- a/docs/tutorials/collect-unbound-metrics.md +++ b/docs/tutorials/collect-unbound-metrics.md @@ -1,6 +1,7 @@ @@ -11,7 +12,7 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/docs/tutorials/c Labs. In v1.19 of Netdata, we release a completely refactored collector for collecting real-time metrics from Unbound servers and displaying them in Netdata dashboards. -Unbound runs on FreeBSD, OpenBSD, NetBSD, MacOS, Linux, and Windows, and supports DNS-over-TLS, which ensures that DNS +Unbound runs on FreeBSD, OpenBSD, NetBSD, macOS, Linux, and Windows, and supports DNS-over-TLS, which ensures that DNS queries and answers are all encrypted with TLS. In theory, that should reduce the risk of eavesdropping or man-in-the-middle attacks when communicating to DNS servers. diff --git a/docs/what-is-netdata.md b/docs/what-is-netdata.md index 0504efa1d1..2a30124ef6 100644 --- a/docs/what-is-netdata.md +++ b/docs/what-is-netdata.md @@ -1,6 +1,7 @@ @@ -19,7 +20,7 @@ Netdata provides **unparalleled insights**, **in real-time**, of everything happ _Netdata is **fast** and **efficient**, designed to permanently run on all systems (**physical** & **virtual** servers, **containers**, **IoT** devices), without disrupting their core function._ -Netdata is **free, open-source software** and it currently runs on **Linux**, **FreeBSD**, and **MacOS**. +Netdata is **free, open-source software** and it currently runs on **Linux**, **FreeBSD**, and **macOS**. --- diff --git a/exporting/aws_kinesis/aws_kinesis.c b/exporting/aws_kinesis/aws_kinesis.c index 4b0d5f74a4..938569a9eb 100644 --- a/exporting/aws_kinesis/aws_kinesis.c +++ b/exporting/aws_kinesis/aws_kinesis.c @@ -75,9 +75,23 @@ void aws_kinesis_connector_worker(void *instance_p) uv_mutex_lock(&instance->mutex); uv_cond_wait(&instance->cond_var, &instance->mutex); + // reset the monitoring chart counters + stats->received_bytes = + stats->sent_bytes = + stats->sent_metrics = + stats->lost_metrics = + stats->receptions = + stats->transmission_successes = + stats->transmission_failures = + stats->data_lost_events = + stats->lost_bytes = + stats->reconnects = 0; + BUFFER *buffer = (BUFFER *)instance->buffer; size_t buffer_len = buffer_strlen(buffer); + stats->buffered_bytes = buffer_len; + size_t sent = 0; while (sent < buffer_len) { @@ -115,7 +129,7 @@ void aws_kinesis_connector_worker(void *instance_p) connector_specific_data, connector_specific_config->stream_name, partition_key, first_char, record_len); sent += record_len; - stats->chart_transmission_successes++; + stats->transmission_successes++; size_t sent_bytes = 0, lost_bytes = 0; @@ -127,30 +141,34 @@ void aws_kinesis_connector_worker(void *instance_p) "EXPORTING: failed to write data to database backend '%s'. Willing to write %zu bytes, wrote %zu bytes.", instance->config.destination, sent_bytes, sent_bytes - lost_bytes); - stats->chart_transmission_failures++; - stats->chart_data_lost_events++; - stats->chart_lost_bytes += lost_bytes; + stats->transmission_failures++; + stats->data_lost_events++; + stats->lost_bytes += lost_bytes; // estimate the number of lost metrics - stats->chart_lost_metrics += (collected_number)( - stats->chart_buffered_metrics * + stats->lost_metrics += (collected_number)( + stats->buffered_metrics * (buffer_len && (lost_bytes > buffer_len) ? (double)lost_bytes / buffer_len : 1)); break; } else { - stats->chart_receptions++; + stats->receptions++; } if (unlikely(netdata_exit)) break; } - stats->chart_sent_bytes += sent; + stats->sent_bytes += sent; if (likely(sent == buffer_len)) - stats->chart_sent_metrics = stats->chart_buffered_metrics; + stats->sent_metrics = stats->buffered_metrics; buffer_flush(buffer); + send_internal_metrics(instance); + + stats->buffered_metrics = 0; + uv_mutex_unlock(&instance->mutex); #ifdef UNIT_TESTING diff --git a/exporting/exporting_engine.c b/exporting/exporting_engine.c index 0a38d66bbe..93347328cd 100644 --- a/exporting/exporting_engine.c +++ b/exporting/exporting_engine.c @@ -35,6 +35,11 @@ void *exporting_main(void *ptr) goto cleanup; } + RRDSET *st_main_rusage = NULL; + RRDDIM *rd_main_user = NULL; + RRDDIM *rd_main_system = NULL; + create_main_rusage_chart(&st_main_rusage, &rd_main_user, &rd_main_system); + usec_t step_ut = localhost->rrd_update_every * USEC_PER_SEC; heartbeat_t hb; heartbeat_init(&hb); @@ -55,10 +60,7 @@ void *exporting_main(void *ptr) break; } - if (send_internal_metrics(engine) != 0) { - error("EXPORTING: cannot send metrics for the operation of exporting engine"); - break; - } + send_main_rusage(st_main_rusage, rd_main_user, rd_main_system); #ifdef UNIT_TESTING break; diff --git a/exporting/exporting_engine.h b/exporting/exporting_engine.h index 1a3a3ecd09..94daf98e05 100644 --- a/exporting/exporting_engine.h +++ b/exporting/exporting_engine.h @@ -14,10 +14,10 @@ extern struct config exporting_config; #define EXPORTING_UPDATE_EVERY_OPTION_NAME "update every" -#define EXPORTING_UPDATE_EVERY_DEFAULT 10 +#define EXPORTING_UPDATE_EVERY_DEFAULT 10 typedef enum exporting_options { - EXPORTING_OPTION_NONE = 0, + EXPORTING_OPTION_NON = 0, EXPORTING_SOURCE_DATA_AS_COLLECTED = (1 << 0), EXPORTING_SOURCE_DATA_AVERAGE = (1 << 1), @@ -42,10 +42,22 @@ typedef enum exporting_options { (instance->config.options & EXPORTING_OPTION_SEND_AUTOMATIC_LABELS && \ label->label_source != LABEL_SOURCE_NETDATA_CONF)) +typedef enum exporting_connector_types { + EXPORTING_CONNECTOR_TYPE_UNKNOWN, // Invalid type + EXPORTING_CONNECTOR_TYPE_GRAPHITE, // Send plain text to Graphite + EXPORTING_CONNECTOR_TYPE_OPENTSDB_USING_TELNET, // Send data to OpenTSDB using telnet API + EXPORTING_CONNECTOR_TYPE_OPENTSDB_USING_HTTP, // Send data to OpenTSDB using HTTP API + EXPORTING_CONNECTOR_TYPE_JSON, // Stores the data using JSON. + EXPORTING_CONNECTOR_TYPE_PROMETHEUS_REMOTE_WRITE, // The user selected to use Prometheus backend + EXPORTING_CONNECTOR_TYPE_KINESIS, // Send message to AWS Kinesis + EXPORTING_CONNECTOR_TYPE_MONGODB, // Send data to MongoDB collection + EXPORTING_CONNECTOR_TYPE_NUM // Number of backend types +} EXPORTING_CONNECTOR_TYPE; + struct engine; struct instance_config { - BACKEND_TYPE type; + EXPORTING_CONNECTOR_TYPE type; const char *name; const char *destination; @@ -87,18 +99,42 @@ struct engine_config { }; struct stats { - collected_number chart_buffered_metrics; - collected_number chart_lost_metrics; - collected_number chart_sent_metrics; - collected_number chart_buffered_bytes; - collected_number chart_received_bytes; - collected_number chart_sent_bytes; - collected_number chart_receptions; - collected_number chart_transmission_successes; - collected_number chart_transmission_failures; - collected_number chart_data_lost_events; - collected_number chart_lost_bytes; - collected_number chart_reconnects; + collected_number buffered_metrics; + collected_number lost_metrics; + collected_number sent_metrics; + collected_number buffered_bytes; + collected_number lost_bytes; + collected_number sent_bytes; + collected_number received_bytes; + collected_number transmission_successes; + collected_number data_lost_events; + collected_number reconnects; + collected_number transmission_failures; + collected_number receptions; + + int initialized; + + RRDSET *st_metrics; + RRDDIM *rd_buffered_metrics; + RRDDIM *rd_lost_metrics; + RRDDIM *rd_sent_metrics; + + RRDSET *st_bytes; + RRDDIM *rd_buffered_bytes; + RRDDIM *rd_lost_bytes; + RRDDIM *rd_sent_bytes; + RRDDIM *rd_received_bytes; + + RRDSET *st_ops; + RRDDIM *rd_transmission_successes; + RRDDIM *rd_data_lost_events; + RRDDIM *rd_reconnects; + RRDDIM *rd_transmission_failures; + RRDDIM *rd_receptions; + + RRDSET *st_rusage; + RRDDIM *rd_user; + RRDDIM *rd_system; }; struct instance { @@ -150,10 +186,12 @@ struct engine { struct instance *instance_root; }; +extern struct instance *prometheus_exporter_instance; + void *exporting_main(void *ptr); struct engine *read_exporting_config(); -BACKEND_TYPE exporting_select_type(const char *type); +EXPORTING_CONNECTOR_TYPE exporting_select_type(const char *type); int init_connectors(struct engine *engine); @@ -179,12 +217,17 @@ int end_chart_formatting(struct engine *engine, RRDSET *st); int end_host_formatting(struct engine *engine, RRDHOST *host); int end_batch_formatting(struct engine *engine); int flush_host_labels(struct instance *instance, RRDHOST *host); +int simple_connector_update_buffered_bytes(struct instance *instance); int exporting_discard_response(BUFFER *buffer, struct instance *instance); void simple_connector_receive_response(int *sock, struct instance *instance); void simple_connector_send_buffer(int *sock, int *failures, struct instance *instance); void simple_connector_worker(void *instance_p); -int send_internal_metrics(struct engine *engine); +void create_main_rusage_chart(RRDSET **st_rusage, RRDDIM **rd_user, RRDDIM **rd_system); +void send_main_rusage(RRDSET *st_rusage, RRDDIM *rd_user, RRDDIM *rd_system); +void send_internal_metrics(struct instance *instance); + +#include "exporting/prometheus/prometheus.h" #endif /* NETDATA_EXPORTING_ENGINE_H */ diff --git a/exporting/graphite/graphite.c b/exporting/graphite/graphite.c index f815bff89d..d3f928583c 100644 --- a/exporting/graphite/graphite.c +++ b/exporting/graphite/graphite.c @@ -27,7 +27,7 @@ int init_graphite_instance(struct instance *instance) instance->end_chart_formatting = NULL; instance->end_host_formatting = flush_host_labels; - instance->end_batch_formatting = NULL; + instance->end_batch_formatting = simple_connector_update_buffered_bytes; instance->send_header = NULL; instance->check_response = exporting_discard_response; diff --git a/exporting/init_connectors.c b/exporting/init_connectors.c index 798101fd9c..0db0ca1354 100644 --- a/exporting/init_connectors.c +++ b/exporting/init_connectors.c @@ -32,35 +32,35 @@ int init_connectors(struct engine *engine) instance->after = engine->now; switch (instance->config.type) { - case BACKEND_TYPE_GRAPHITE: + case EXPORTING_CONNECTOR_TYPE_GRAPHITE: if (init_graphite_instance(instance) != 0) return 1; break; - case BACKEND_TYPE_JSON: + case EXPORTING_CONNECTOR_TYPE_JSON: if (init_json_instance(instance) != 0) return 1; break; - case BACKEND_TYPE_OPENTSDB_USING_TELNET: + case EXPORTING_CONNECTOR_TYPE_OPENTSDB_USING_TELNET: if (init_opentsdb_telnet_instance(instance) != 0) return 1; break; - case BACKEND_TYPE_OPENTSDB_USING_HTTP: + case EXPORTING_CONNECTOR_TYPE_OPENTSDB_USING_HTTP: if (init_opentsdb_http_instance(instance) != 0) return 1; break; - case BACKEND_TYPE_PROMETHEUS_REMOTE_WRITE: + case EXPORTING_CONNECTOR_TYPE_PROMETHEUS_REMOTE_WRITE: #if ENABLE_PROMETHEUS_REMOTE_WRITE if (init_prometheus_remote_write_instance(instance) != 0) return 1; #endif break; - case BACKEND_TYPE_KINESIS: + case EXPORTING_CONNECTOR_TYPE_KINESIS: #if HAVE_KINESIS if (init_aws_kinesis_instance(instance) != 0) return 1; #endif break; - case BACKEND_TYPE_MONGODB: + case EXPORTING_CONNECTOR_TYPE_MONGODB: #if HAVE_MONGOC if (init_mongodb_instance(instance) != 0) return 1; @@ -77,7 +77,7 @@ int init_connectors(struct engine *engine) error("EXPORTING: cannot create tread worker. uv_thread_create(): %s", uv_strerror(error)); return 1; } - char threadname[NETDATA_THREAD_NAME_MAX+1]; + char threadname[NETDATA_THREAD_NAME_MAX + 1]; snprintfz(threadname, NETDATA_THREAD_NAME_MAX, "EXPORTING-%zu", instance->index); uv_thread_set_name_np(instance->thread, threadname); } diff --git a/exporting/json/json.c b/exporting/json/json.c index 9886b55567..b334804cff 100644 --- a/exporting/json/json.c +++ b/exporting/json/json.c @@ -27,7 +27,7 @@ int init_json_instance(struct instance *instance) instance->end_chart_formatting = NULL; instance->end_host_formatting = flush_host_labels; - instance->end_batch_formatting = NULL; + instance->end_batch_formatting = simple_connector_update_buffered_bytes; instance->send_header = NULL; instance->check_response = exporting_discard_response; diff --git a/exporting/mongodb/mongodb.c b/exporting/mongodb/mongodb.c index b10a8fa664..f20c4f1c80 100644 --- a/exporting/mongodb/mongodb.c +++ b/exporting/mongodb/mongodb.c @@ -183,8 +183,10 @@ int format_batch_mongodb(struct instance *instance) // ring buffer is full, reuse the oldest element connector_specific_data->first_buffer = connector_specific_data->first_buffer->next; free_bson(insert, connector_specific_data->last_buffer->documents_inserted); + connector_specific_data->total_documents_inserted -= connector_specific_data->last_buffer->documents_inserted; + stats->buffered_bytes -= connector_specific_data->last_buffer->buffered_bytes; } - insert = callocz((size_t)stats->chart_buffered_metrics, sizeof(bson_t *)); + insert = callocz((size_t)stats->buffered_metrics, sizeof(bson_t *)); connector_specific_data->last_buffer->insert = insert; BUFFER *buffer = (BUFFER *)instance->buffer; @@ -193,7 +195,7 @@ int format_batch_mongodb(struct instance *instance) size_t documents_inserted = 0; - while (*end && documents_inserted <= (size_t)stats->chart_buffered_metrics) { + while (*end && documents_inserted <= (size_t)stats->buffered_metrics) { while (*end && *end != '\n') end++; @@ -208,7 +210,8 @@ int format_batch_mongodb(struct instance *instance) insert[documents_inserted] = bson_new_from_json((const uint8_t *)start, -1, &bson_error); if (unlikely(!insert[documents_inserted])) { - error("EXPORTING: %s", bson_error.message); + error( + "EXPORTING: Failed creating a BSON document from a JSON string \"%s\" : %s", start, bson_error.message); free_bson(insert, documents_inserted); return 1; } @@ -218,8 +221,16 @@ int format_batch_mongodb(struct instance *instance) documents_inserted++; } + stats->buffered_bytes += connector_specific_data->last_buffer->buffered_bytes = buffer_strlen(buffer); + buffer_flush(buffer); + // The stats->bu