From b2b3c182548fe81e6d1c9a599b2571dabfdabcaa Mon Sep 17 00:00:00 2001 From: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Date: Thu, 6 Feb 2020 17:58:51 +0200 Subject: ACLK agent 1 (#7894) * - Add initial mqtt support * [WIP] Agent cloud link - Setup main mqtt thread to connect to a broker using V5 of the MQTT protocol (TBD) - Send alarms to "netdata/alarm" - Add error checks to handle connection failures - Add params for Broker, port Maximum concurrent sent / recev messages - Dummy function to check claiming status - Generic mqtt_send command to publish message to a base topic , sub topic It will end up in the form base_topic/sub_topic - Add host/port in the connection failure error message * Test libmosquitto libs * connect to broker locally (assume localhost:1883) * subscribe to channel netdata/command * Test try a reload command to trigger health reload * publish alerts to netdata/alarm * - Fix compile issues * - Use sleep_usec instead of usleep * - Delay reconnection on failure due to misconfiguration (high cpu usage) * - Remove the TLS connection config * - Fix NETDATA_MQTT_INITIALIZATION_SLEEP_WAIT to use seconds * - Gather ACLK related code under aclk folder - Add aclk_ functions for abstract layer - Moved low level libs intergration in mqtt.c * - Add README.md file with initial comment * - Clean MQTT v5 * - Code cleanup * - Remove alarm log for now - Remove the heart beat * - Remove message properties for V5 * - Remove message properties for V5 (header) * Fixed the netdata target to use a local static version of libmosquitto. The installer does not yet have steps to pull and build the local library. cd project_root git clone ssh://git@github.com/netdata/mosquitto mosquitto/ (cd mosquitto/lib && make) # Ignore the cpp error This will leave mosquitto/lib/libmosquitto.a for the build process to use. * - Fix compile issues with older < 1.6 libmosquitto lib * - Enable alarm events to check it works - Re arrange includes - Rework topic to be agent/guid/. Actual id will be returned by the is_agent_claimed * - Add initial metadata info - Added helper function in web_api - Added a debug command (info) * Update the claiming state to retrieve the claimed id. * - Use define for constants like command and metadata topics - Function to wait for initialization of the ACLK link - New aclk_subscribe command with QOS parameter for the mqtt subscription - Use the is_agent_claimed function to get the real claim id and use it to build the topics that will be used for the cloud communication - Change in netdata-claim.sh.in to write the claim id without a trailing \n * - Use define for constants like command and metadata topics - Function to wait for initialization of the ACLK link - New aclk_subscribe command with QOS parameter for the mqtt subscription - Use the is_agent_claimed function to get the real claim id and use it to build the topics that will be used for the cloud communication - Change in netdata-claim.sh.in to write the claim id without a trailing \n * - Remove the alarm log for now - Add code (but disabled) to send charts * - Use dummy anon, anon as username and password for testing purposes * - Use client id anon as well * Testing without TLS * Switching TLS back on to fix docker environment. * - Added query processing An incoming URL now calls web_client_api_request_v1_data to handle a request and push the results back to the "data" topic - Move the above processing from the message callback to the query handle loop - Added helper "pause" , "resume" commands to stop and resume query processing to stress test loading the queue with queries before executing them - Changed the endpoint topics to "meta", and "cmd" (previously metadata and command) * make info message follow protocol * move metadata msg generation into new func * move metadata msg generation into new func * - Add metadata to the responses - Add hook to queue chart changes on creation and dimensions - Changed the queue mechanism to include delay for X seconds - Add delayed submittion of charts to the cloud so that all DIMs are defined to avoid resubmission * - Add additional data info for aclk_queue command * - Use web_clinet_api_request_v1 to handle the incoming request This will handle all requests coming from the cloud * - Cleanup and aclk_query structure - Add msg_id parameter - Enable the incoming JSON request - Enable the outgoing JSON response * - Added new thread to handle query processing - Add lock and cond wait to wakeup thread when queries are submitted - Cleanup on the main init function * - Add wait time on agent init, to allow for chart, alarms and other definitions to be completed. - During the wait time, no queries will be queued * - Send metadata on query thread init - New generic create header function for the JSON response - Pack info and charts into one message - Modified chart to remove entries (test) - Modified charts mod to remove entries e.g alarms and volatile info - Change input to aclk_update_chart (RRDHOST / instead of hostname) * - When a request fails, add to the payload - We may need to handle in a different key - Error check in json parsing * - Add dummy aclk_update_alarm command * - Move incoming request JSON parsing code away from mqtt.c - Added #ifdef ACLK_ENABLE so that we can have code merged but disabled by default - Added version in incoming and outgoing JSON dict * - Disable code if ACLK_ENABLE is not defined - Remove references to the mqtt (mosquitto) lib - Add dummy stubs in mqtt.c for completeness if ACLK_ENABLE is not defined * - Disable challenge sample code for now * - Remove libmosquitto from makefile * - Fix spaces in Makefile.am - Remove ifdef to avoid warning from LGTM * - Remove for now the code that builds an along log test message to send to the cloud * - Add check for ACLK_ENABLE definition and avoid calling the chart update functions * - Remove commented code * - Move source files to the correct place (ACLK_PLUGIN_FILES) * - Remove include file thats not needed * - Remove include file thats not needed - Add improved checks for load_claiming_state() * - Fix error message. Used error() that also logs errno and message * - Fix some codacy issues * - Fix more codacy issues, code cleanup * - Revert code to address codacy warnings * - Revert spaces added in a previous commit by mistake * clean up if/else nest * print error if fopen fails * minor - error already logs errno * - Fix version formatting * - Cleanup all ACLK related compiler warnings - Re-arrange include files - Removed unused defines * - More compilation warnings fixed - Bug with thread creation fixed * - Add condition to skip compilation of the ACLK code entirely. Add env variable ACLK="yes" to enable * - Add condition to skip the libmosquitto * - Change feature flag from ACLK_ENABLE to ENABLE_ACLK in accordance with the rest of ENABLE_xx flags - Typo in info message fix Co-authored-by: Andrew Moss <1043609+amoss@users.noreply.github.com> Co-authored-by: Timo <6674623+underhood@users.noreply.github.com> --- claim/claim.c | 46 +++++++++++++++++++++++++++++++--------------- claim/claim.h | 2 +- claim/netdata-claim.sh.in | 4 ++-- 3 files changed, 34 insertions(+), 18 deletions(-) (limited to 'claim') diff --git a/claim/claim.c b/claim/claim.c index 75f0a437d6..72b6b42914 100644 --- a/claim/claim.c +++ b/claim/claim.c @@ -21,13 +21,12 @@ static char *claiming_errors[] = { "internal server error" // 12 }; -#define AGENT_UNCLAIMED 0 -#define AGENT_CLAIMED 1 -static uint8_t claiming_status = AGENT_UNCLAIMED; -uint8_t is_agent_claimed(void) +static char *claimed_id = NULL; + +char *is_agent_claimed(void) { - return (AGENT_CLAIMED == claiming_status); + return claimed_id; } #define CLAIMING_COMMAND_LENGTH 16384 @@ -64,8 +63,7 @@ void claim_agent(char *claiming_arguments) exit_code = mypclose(fp, command_pid); info("Agent claiming command returned with code %d", exit_code); if (0 == exit_code) { - claiming_status = AGENT_CLAIMED; - info("Agent successfully claimed."); + load_claiming_state(); return; } if (exit_code < 0) { @@ -85,19 +83,37 @@ void claim_agent(char *claiming_arguments) void load_claiming_state(void) { - info("The claiming feature is under development and still subject to change before the next release"); - return; + if (claimed_id != NULL) { + freez(claimed_id); + claimed_id = NULL; + } char filename[FILENAME_MAX + 1]; struct stat statbuf; - snprintfz(filename, FILENAME_MAX, "%s/claim.d/is_claimed", netdata_configured_user_config_dir); + snprintfz(filename, FILENAME_MAX, "%s/claim.d/claimed_id", netdata_configured_user_config_dir); + // check if the file exists if (lstat(filename, &statbuf) != 0) { - info("File '%s' was not found. Setting state to AGENT_UNCLAIMED.", filename); - claiming_status = AGENT_UNCLAIMED; - } else { - info("File '%s' was found. Setting state to AGENT_CLAIMED.", filename); - claiming_status = AGENT_CLAIMED; + info("lstat on File '%s' failed reason=\"%s\". Setting state to AGENT_UNCLAIMED.", filename, strerror(errno)); + return; } + if (unlikely(statbuf.st_size == 0)) { + info("File '%s' has no contents. Setting state to AGENT_UNCLAIMED.", filename); + return; + } + + FILE *f = fopen(filename, "rt"); + if (unlikely(f == NULL)) { + error("File '%s' cannot be opened. Setting state to AGENT_UNCLAIMED.", filename); + return; + } + + claimed_id = callocz(1, statbuf.st_size + 1); + size_t bytes_read = fread(claimed_id, 1, statbuf.st_size, f); + claimed_id[bytes_read] = 0; + info("File '%s' was found. Setting state to AGENT_CLAIMED.", filename); + fclose(f); + + snprintfz(filename, FILENAME_MAX, "%s/claim.d/private.pem", netdata_configured_user_config_dir); } diff --git a/claim/claim.h b/claim/claim.h index 3b2b867434..8eda3560b6 100644 --- a/claim/claim.h +++ b/claim/claim.h @@ -8,7 +8,7 @@ extern char *claiming_pending_arguments; void claim_agent(char *claiming_arguments); -uint8_t is_agent_claimed(void); +char *is_agent_claimed(void); void load_claiming_state(void); #endif //NETDATA_CLAIM_H diff --git a/claim/netdata-claim.sh.in b/claim/netdata-claim.sh.in index e565e3de28..42eb99d718 100755 --- a/claim/netdata-claim.sh.in +++ b/claim/netdata-claim.sh.in @@ -211,6 +211,6 @@ if [ "${HTTP_STATUS_CODE}" -ne 200 ] ; then fi rm -f "${CLAIMING_DIR}/tmpout.txt" -touch "${CLAIMING_DIR}/is_claimed" +echo -n "${ID}" >"${CLAIMING_DIR}/claimed_id" rm -f "${CLAIMING_DIR}/token" -echo >&2 "Agent was successfully claimed." \ No newline at end of file +echo >&2 "Agent was successfully claimed." -- cgit v1.2.3