summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorvkalintiris <vasilis@netdata.cloud>2024-02-01 15:51:19 +0200
committerGitHub <noreply@github.com>2024-02-01 15:51:19 +0200
commita25ff1b401c2b9dfd0289a243c1f09a9bceba2e1 (patch)
tree531d60f7aac73de1499c3379d7319e8cf09b67dc /src
parent1e59c424f86922bb656d5834ab5b321cc9f0b5b0 (diff)
Move aclk/ under src/ (#16899)
* Move aclk/ under src/ * Update labeler
Diffstat (limited to 'src')
-rw-r--r--src/aclk/README.md133
m---------src/aclk/aclk-schemas0
-rw-r--r--src/aclk/aclk.c1361
-rw-r--r--src/aclk/aclk.h90
-rw-r--r--src/aclk/aclk_alarm_api.c44
-rw-r--r--src/aclk/aclk_alarm_api.h14
-rw-r--r--src/aclk/aclk_capas.c58
-rw-r--r--src/aclk/aclk_capas.h14
-rw-r--r--src/aclk/aclk_contexts_api.c41
-rw-r--r--src/aclk/aclk_contexts_api.h14
-rw-r--r--src/aclk/aclk_otp.c883
-rw-r--r--src/aclk/aclk_otp.h18
-rw-r--r--src/aclk/aclk_proxy.c172
-rw-r--r--src/aclk/aclk_proxy.h21
-rw-r--r--src/aclk/aclk_query.c370
-rw-r--r--src/aclk/aclk_query.h38
-rw-r--r--src/aclk/aclk_query_queue.c124
-rw-r--r--src/aclk/aclk_query_queue.h87
-rw-r--r--src/aclk/aclk_rrdhost_state.h11
-rw-r--r--src/aclk/aclk_rx_msgs.c571
-rw-r--r--src/aclk/aclk_rx_msgs.h17
-rw-r--r--src/aclk/aclk_stats.c482
-rw-r--r--src/aclk/aclk_stats.h77
-rw-r--r--src/aclk/aclk_tx_msgs.c276
-rw-r--r--src/aclk/aclk_tx_msgs.h20
-rw-r--r--src/aclk/aclk_util.c484
-rw-r--r--src/aclk/aclk_util.h121
-rw-r--r--src/aclk/helpers/mqtt_wss_pal.h19
-rw-r--r--src/aclk/helpers/ringbuffer_pal.h11
-rw-r--r--src/aclk/https_client.c857
-rw-r--r--src/aclk/https_client.h134
-rw-r--r--src/aclk/mqtt_websockets/.github/workflows/run-tests.yaml14
-rw-r--r--src/aclk/mqtt_websockets/.gitignore10
-rw-r--r--src/aclk/mqtt_websockets/README.md7
-rw-r--r--src/aclk/mqtt_websockets/c-rbuf/cringbuffer.c203
-rw-r--r--src/aclk/mqtt_websockets/c-rbuf/cringbuffer.h47
-rw-r--r--src/aclk/mqtt_websockets/c-rbuf/cringbuffer_internal.h37
-rw-r--r--src/aclk/mqtt_websockets/c-rbuf/ringbuffer_test.c485
-rw-r--r--src/aclk/mqtt_websockets/c_rhash/c_rhash.c264
-rw-r--r--src/aclk/mqtt_websockets/c_rhash/c_rhash.h61
-rw-r--r--src/aclk/mqtt_websockets/c_rhash/c_rhash_internal.h19
-rw-r--r--src/aclk/mqtt_websockets/c_rhash/tests.c273
-rw-r--r--src/aclk/mqtt_websockets/common_internal.h27
-rw-r--r--src/aclk/mqtt_websockets/common_public.c9
-rw-r--r--src/aclk/mqtt_websockets/common_public.h33
-rw-r--r--src/aclk/mqtt_websockets/endian_compat.h31
-rw-r--r--src/aclk/mqtt_websockets/mqtt_constants.h103
-rw-r--r--src/aclk/mqtt_websockets/mqtt_ng.c2237
-rw-r--r--src/aclk/mqtt_websockets/mqtt_ng.h99
-rw-r--r--src/aclk/mqtt_websockets/mqtt_wss_client.c1126
-rw-r--r--src/aclk/mqtt_websockets/mqtt_wss_client.h162
-rw-r--r--src/aclk/mqtt_websockets/mqtt_wss_log.c130
-rw-r--r--src/aclk/mqtt_websockets/mqtt_wss_log.h39
-rw-r--r--src/aclk/mqtt_websockets/test.c90
-rw-r--r--src/aclk/mqtt_websockets/ws_client.c744
-rw-r--r--src/aclk/mqtt_websockets/ws_client.h120
-rw-r--r--src/aclk/schema-wrappers/agent_cmds.cc38
-rw-r--r--src/aclk/schema-wrappers/agent_cmds.h27
-rw-r--r--src/aclk/schema-wrappers/alarm_config.cc140
-rw-r--r--src/aclk/schema-wrappers/alarm_config.h71
-rw-r--r--src/aclk/schema-wrappers/alarm_stream.cc221
-rw-r--r--src/aclk/schema-wrappers/alarm_stream.h128
-rw-r--r--src/aclk/schema-wrappers/capability.cc11
-rw-r--r--src/aclk/schema-wrappers/capability.h24
-rw-r--r--src/aclk/schema-wrappers/connection.cc72
-rw-r--r--src/aclk/schema-wrappers/connection.h47
-rw-r--r--src/aclk/schema-wrappers/context.cc125
-rw-r--r--src/aclk/schema-wrappers/context.h53
-rw-r--r--src/aclk/schema-wrappers/context_stream.cc42
-rw-r--r--src/aclk/schema-wrappers/context_stream.h36
-rw-r--r--src/aclk/schema-wrappers/node_connection.cc46
-rw-r--r--src/aclk/schema-wrappers/node_connection.h32
-rw-r--r--src/aclk/schema-wrappers/node_creation.cc39
-rw-r--r--src/aclk/schema-wrappers/node_creation.h31
-rw-r--r--src/aclk/schema-wrappers/node_info.cc136
-rw-r--r--src/aclk/schema-wrappers/node_info.h79
-rw-r--r--src/aclk/schema-wrappers/proto_2_json.cc88
-rw-r--r--src/aclk/schema-wrappers/proto_2_json.h18
-rw-r--r--src/aclk/schema-wrappers/schema_wrapper_utils.cc22
-rw-r--r--src/aclk/schema-wrappers/schema_wrapper_utils.h24
-rw-r--r--src/aclk/schema-wrappers/schema_wrappers.h19
-rw-r--r--src/claim/README.md10
-rw-r--r--src/database/sqlite/sqlite_health.c2
83 files changed, 14507 insertions, 6 deletions
diff --git a/src/aclk/README.md b/src/aclk/README.md
new file mode 100644
index 0000000000..8ff30bd34e
--- /dev/null
+++ b/src/aclk/README.md
@@ -0,0 +1,133 @@
+# Agent-Cloud link (ACLK)
+
+The Agent-Cloud link (ACLK) is the mechanism responsible for securely connecting a Netdata Agent to your web browser
+through Netdata Cloud. The ACLK establishes an outgoing secure WebSocket (WSS) connection to Netdata Cloud on port
+`443`. The ACLK is encrypted, safe, and _is only established if you connect your node_.
+
+The Cloud App lives at app.netdata.cloud which currently resolves to the following list of IPs:
+
+- 54.198.178.11
+- 44.207.131.212
+- 44.196.50.41
+
+> ### Caution
+>
+>This list of IPs can change without notice, we strongly advise you to whitelist following domains `app.netdata.cloud`, `mqtt.netdata.cloud`, if this is not an option in your case always verify the current domain resolution (e.g via the `host` command).
+
+For a guide to connecting a node using the ACLK, plus additional troubleshooting and reference information, read our [connect to Cloud
+documentation](https://github.com/netdata/netdata/blob/master/src/claim/README.md).
+
+## Data privacy
+
+[Data privacy](https://netdata.cloud/privacy/) is very important to us. We firmly believe that your data belongs to
+you. This is why **we don't store any metric data in Netdata Cloud**.
+
+All the data that you see in the web browser when using Netdata Cloud, is actually streamed directly from the Netdata Agent to the Netdata Cloud dashboard. The data passes through our systems, but it isn't stored.
+
+However, to be able to offer the stunning visualizations and advanced functionality of Netdata Cloud, it does store a limited number of _metadata_. Read more about our [security and privacy design](https://github.com/netdata/netdata/blob/master/docs/netdata-security.md).
+
+## Enable and configure the ACLK
+
+The ACLK is enabled by default, with its settings automatically configured and stored in the Agent's memory. No file is
+created at `/var/lib/netdata/cloud.d/cloud.conf` until you either connect a node or create it yourself. The default
+configuration uses two settings:
+
+```conf
+[global]
+ enabled = yes
+ cloud base url = https://app.netdata.cloud
+```
+
+If your Agent needs to use a proxy to access the internet, you must [set up a proxy for
+connecting to cloud](https://github.com/netdata/netdata/blob/master/src/claim/README.md#connect-through-a-proxy).
+
+You can configure following keys in the `netdata.conf` section `[cloud]`:
+```
+[cloud]
+ statistics = yes
+ query thread count = 2
+```
+
+- `statistics` enables/disables ACLK related statistics and their charts. You can disable this to save some space in the database and slightly reduce memory usage of Netdata Agent.
+- `query thread count` specifies the number of threads to process cloud queries. Increasing this setting is useful for nodes with many children (streaming), which can expect to handle more queries (and/or more complicated queries).
+
+## Disable the ACLK
+
+You have two options if you prefer to disable the ACLK and not use Netdata Cloud.
+
+### Disable at installation
+
+You can pass the `--disable-cloud` parameter to the Agent installation when using a kickstart script
+([kickstart.sh](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/kickstart.md), or a [manual installation from
+Git](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/manual.md).
+
+When you pass this parameter, the installer does not download or compile any extra libraries. Once running, the Agent
+kills the thread responsible for the ACLK and connecting behavior, and behaves as though the ACLK, and thus Netdata Cloud,
+does not exist.
+
+### Disable at runtime
+
+You can change a runtime setting in your `cloud.conf` file to disable the ACLK. This setting only stops the Agent from
+attempting any connection via the ACLK, but does not prevent the installer from downloading and compiling the ACLK's
+dependencies.
+
+The file typically exists at `/var/lib/netdata/cloud.d/cloud.conf`, but can change if you set a prefix during
+installation. To disable the ACLK, open that file and change the `enabled` setting to `no`:
+
+```conf
+[global]
+ enabled = no
+```
+
+If the file at `/var/lib/netdata/cloud.d/cloud.conf` doesn't exist, you need to create it.
+
+Copy and paste the first two lines from below, which will change your prompt to `cat`.
+
+```bash
+cd /var/lib/netdata/cloud.d
+cat > cloud.conf << EOF
+```
+
+Copy and paste in lines 3-6, and after the final `EOF`, hit **Enter**. The final line must contain only `EOF`. Hit **Enter** again to return to your normal prompt with the newly-created file.
+
+To get your normal prompt back, the final line
+must contain only `EOF`.
+
+```bash
+[global]
+ enabled = no
+ cloud base url = https://app.netdata.cloud
+EOF
+```
+
+You also need to change the file's permissions. Use `grep "run as user" /etc/netdata/netdata.conf` to figure out which
+user your Agent runs as (typically `netdata`), and replace `netdata:netdata` as shown below if necessary:
+
+```bash
+sudo chmod 0770 cloud.conf
+sudo chown netdata:netdata cloud.conf
+```
+
+Restart your Agent to disable the ACLK.
+
+### Re-enable the ACLK
+
+If you first disable the ACLK and any Cloud functionality and then decide you would like to use Cloud, you must either
+[reinstall Netdata](https://github.com/netdata/netdata/blob/master/packaging/installer/REINSTALL.md) with Cloud enabled or change the runtime setting in your
+`cloud.conf` file.
+
+If you passed `--disable-cloud` to `netdata-installer.sh` during installation, you must
+[reinstall](https://github.com/netdata/netdata/blob/master/packaging/installer/REINSTALL.md) your Agent. Use the same method as before, but pass `--require-cloud` to
+the installer. When installation finishes you can [connect your node](https://github.com/netdata/netdata/blob/master/src/claim/README.md#how-to-connect-a-node).
+
+If you changed the runtime setting in your `var/lib/netdata/cloud.d/cloud.conf` file, edit the file again and change
+`enabled` to `yes`:
+
+```conf
+[global]
+ enabled = yes
+```
+
+Restart your Agent and [connect your node](https://github.com/netdata/netdata/blob/master/src/claim/README.md#how-to-connect-a-node).
+
+
diff --git a/src/aclk/aclk-schemas b/src/aclk/aclk-schemas
new file mode 160000
+Subproject 83c661c0dcddb9526814ebbd0668fbc3e281f03
diff --git a/src/aclk/aclk.c b/src/aclk/aclk.c
new file mode 100644
index 0000000000..6d583a76a6
--- /dev/null
+++ b/src/aclk/aclk.c
@@ -0,0 +1,1361 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "aclk.h"
+
+#ifdef ENABLE_ACLK
+#include "aclk_stats.h"
+#include "mqtt_websockets/mqtt_wss_client.h"
+#include "aclk_otp.h"
+#include "aclk_tx_msgs.h"
+#include "aclk_query.h"
+#include "aclk_query_queue.h"
+#include "aclk_util.h"
+#include "aclk_rx_msgs.h"
+#include "https_client.h"
+#include "schema-wrappers/schema_wrappers.h"
+#include "aclk_capas.h"
+
+#include "aclk_proxy.h"
+
+#ifdef ACLK_LOG_CONVERSATION_DIR
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#endif
+
+#define ACLK_STABLE_TIMEOUT 3 // Minimum delay to mark AGENT as stable
+
+#endif /* ENABLE_ACLK */
+
+int aclk_pubacks_per_conn = 0; // How many PubAcks we got since MQTT conn est.
+int aclk_rcvd_cloud_msgs = 0;
+int aclk_connection_counter = 0;
+int disconnect_req = 0;
+
+int aclk_connected = 0;
+int aclk_ctx_based = 0;
+int aclk_disable_runtime = 0;
+int aclk_stats_enabled;
+int aclk_kill_link = 0;
+
+usec_t aclk_session_us = 0;
+time_t aclk_session_sec = 0;
+
+time_t last_conn_time_mqtt = 0;
+time_t last_conn_time_appl = 0;
+time_t last_disconnect_time = 0;
+time_t next_connection_attempt = 0;
+float last_backoff_value = 0;
+
+time_t aclk_block_until = 0;
+
+#ifdef ENABLE_ACLK
+mqtt_wss_client mqttwss_client;
+
+netdata_mutex_t aclk_shared_state_mutex = NETDATA_MUTEX_INITIALIZER;
+#define ACLK_SHARED_STATE_LOCK netdata_mutex_lock(&aclk_shared_state_mutex)
+#define ACLK_SHARED_STATE_UNLOCK netdata_mutex_unlock(&aclk_shared_state_mutex)
+
+struct aclk_shared_state aclk_shared_state = {
+ .mqtt_shutdown_msg_id = -1,
+ .mqtt_shutdown_msg_rcvd = 0
+};
+
+#ifdef MQTT_WSS_DEBUG
+#include <openssl/ssl.h>
+#define DEFAULT_SSKEYLOGFILE_NAME "SSLKEYLOGFILE"
+const char *ssl_log_filename = NULL;
+FILE *ssl_log_file = NULL;
+static void aclk_ssl_keylog_cb(const SSL *ssl, const char *line)
+{
+ (void)ssl;
+ if (!ssl_log_file)
+ ssl_log_file = fopen(ssl_log_filename, "a");
+ if (!ssl_log_file) {
+ netdata_log_error("Couldn't open ssl_log file (%s) for append.", ssl_log_filename);
+ return;
+ }
+ fputs(line, ssl_log_file);
+ putc('\n', ssl_log_file);
+ fflush(ssl_log_file);
+}
+#endif
+
+#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_300
+OSSL_DECODER_CTX *aclk_dctx = NULL;
+EVP_PKEY *aclk_private_key = NULL;
+#else
+static RSA *aclk_private_key = NULL;
+#endif
+static int load_private_key()
+{
+ if (aclk_private_key != NULL) {
+#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_300
+ EVP_PKEY_free(aclk_private_key);
+ if (aclk_dctx)
+ OSSL_DECODER_CTX_free(aclk_dctx);
+
+ aclk_dctx = NULL;
+#else
+ RSA_free(aclk_private_key);
+#endif
+ }
+ aclk_private_key = NULL;
+ char filename[FILENAME_MAX + 1];
+ snprintfz(filename, FILENAME_MAX, "%s/cloud.d/private.pem", netdata_configured_varlib_dir);
+
+ long bytes_read;
+ char *private_key = read_by_filename(filename, &bytes_read);
+ if (!private_key) {
+ netdata_log_error("Claimed agent cannot establish ACLK - unable to load private key '%s' failed.", filename);
+ return 1;
+ }
+ netdata_log_debug(D_ACLK, "Claimed agent loaded private key len=%ld bytes", bytes_read);
+
+ BIO *key_bio = BIO_new_mem_buf(private_key, -1);
+ if (key_bio==NULL) {
+ netdata_log_error("Claimed agent cannot establish ACLK - failed to create BIO for key");
+ goto biofailed;
+ }
+
+#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_300
+ aclk_dctx = OSSL_DECODER_CTX_new_for_pkey(&aclk_private_key, "PEM", NULL,
+ "RSA",
+ OSSL_KEYMGMT_SELECT_PRIVATE_KEY,
+ NULL, NULL);
+
+ if (!aclk_dctx) {
+ netdata_log_error("Loading private key (from claiming) failed - no OpenSSL Decoders found");
+ goto biofailed;
+ }
+
+ // this is necesseary to avoid RSA key with wrong size
+ if (!OSSL_DECODER_from_bio(aclk_dctx, key_bio)) {
+ netdata_log_error("Decoding private key (from claiming) failed - invalid format.");
+ goto biofailed;
+ }
+#else
+ aclk_private_key = PEM_read_bio_RSAPrivateKey(key_bio, NULL, NULL, NULL);
+#endif
+ BIO_free(key_bio);
+ if (aclk_private_key!=NULL)
+ {
+ freez(private_key);
+ return 0;
+ }
+ char err[512];
+ ERR_error_string_n(ERR_get_error(), err, sizeof(err));
+ netdata_log_error("Claimed agent cannot establish ACLK - cannot create private key: %s", err);
+
+biofailed:
+ freez(private_key);
+ return 1;
+}
+
+static int wait_till_cloud_enabled()
+{
+ nd_log(NDLS_DAEMON, NDLP_INFO,
+ "Waiting for Cloud to be enabled");
+
+ while (!netdata_cloud_enabled) {
+ sleep_usec(USEC_PER_SEC * 1);
+ if (!service_running(SERVICE_ACLK))
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * Will block until agent is claimed. Returns only if agent claimed
+ * or if agent needs to shutdown.
+ *
+ * @return `0` if agent has been claimed,
+ * `1` if interrupted due to agent shutting down
+ */
+static int wait_till_agent_claimed(void)
+{
+ //TODO prevent malloc and freez
+ char *agent_id = get_agent_claimid();
+ while (likely(!agent_id)) {
+ sleep_usec(USEC_PER_SEC * 1);
+ if (!service_running(SERVICE_ACLK))
+ return 1;
+ agent_id = get_agent_claimid();
+ }
+ freez(agent_id);
+ return 0;
+}
+
+/**
+ * Checks everything is ready for connection
+ * agent claimed, cloud url set and private key available
+ *
+ * @param aclk_hostname points to location where string pointer to hostname will be set
+ * @param aclk_port port to int where port will be saved
+ *
+ * @return If non 0 returned irrecoverable error happened (or netdata_exit) and ACLK should be terminated
+ */
+static int wait_till_agent_claim_ready()
+{
+ url_t url;
+ while (service_running(SERVICE_ACLK)) {
+ if (wait_till_agent_claimed())
+ return 1;
+
+ // The NULL return means the value was never initialised, but this value has been initialized in post_conf_load.
+ // We trap the impossible NULL here to keep the linter happy without using a fatal() in the code.
+ char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL);
+ if (cloud_base_url == NULL) {
+ netdata_log_error("Do not move the cloud base url out of post_conf_load!!");
+ return 1;
+ }
+
+ // We jus