diff options
author | Andrew Moss <1043609+amoss@users.noreply.github.com> | 2020-05-20 16:28:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-20 16:28:45 +0200 |
commit | 53efa359d60683cad6dc73ecf84d0df7ee621303 (patch) | |
tree | b1537798907719d4b33ecfed7122be67f789991c /claim | |
parent | ae0d6007f1b2caf32202c95bd47a3e5ccfe80fa0 (diff) |
Regenerate topic base on connect (#9044)
Allow agents to be reclaimed while they are running. Fix a race hazard between claiming and the ACLK. Changes the private key, base topic, username and contents of the LWT.
Co-authored-by: <hilari@hilarimoragrega.com>
Diffstat (limited to 'claim')
-rw-r--r-- | claim/claim.c | 42 |
1 files changed, 33 insertions, 9 deletions
diff --git a/claim/claim.c b/claim/claim.c index af6ec41f76..59c824235d 100644 --- a/claim/claim.c +++ b/claim/claim.c @@ -26,12 +26,19 @@ static char *claiming_errors[] = { "Gateway Timeout", // 16 "Service Unavailable" // 17 }; - +static netdata_mutex_t claim_mutex = NETDATA_MUTEX_INITIALIZER; static char *claimed_id = NULL; -char *is_agent_claimed(void) +/* Retrieve the claim id for the agent. + * Caller owns the string. +*/ +char *is_agent_claimed() { - return claimed_id; + char *result; + netdata_mutex_lock(&claim_mutex); + result = (claimed_id == NULL) ? NULL : strdup(claimed_id); + netdata_mutex_unlock(&claim_mutex); + return result; } #define CLAIMING_COMMAND_LENGTH 16384 @@ -109,12 +116,34 @@ void claim_agent(char *claiming_arguments) #endif } +#ifdef ENABLE_ACLK +extern int aclk_connected, aclk_kill_link; +#endif + +/* Change the claimed state of the agent. + * + * This only happens when the user has explicitly requested it: + * - via the cli tool by reloading the claiming state + * - after spawning the claim because of a command-line argument + * If this happens with the ACLK active under an old claim then we MUST KILL THE LINK + */ void load_claiming_state(void) { + // -------------------------------------------------------------------- + // Check if the cloud is enabled +#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK ) + netdata_cloud_setting = 0; +#else + netdata_mutex_lock(&claim_mutex); if (claimed_id != NULL) { freez(claimed_id); claimed_id = NULL; } + if (aclk_connected) + { + info("Agent was already connected to Cloud - forcing reconnection under new credentials"); + aclk_kill_link = 1; + } // Propagate into aclk and registry. Be kind of atomic... appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL); @@ -124,18 +153,13 @@ void load_claiming_state(void) long bytes_read; claimed_id = read_by_filename(filename, &bytes_read); + netdata_mutex_unlock(&claim_mutex); // Only the main thread can call this function, safe to release and then read if (!claimed_id) { info("Unable to load '%s', setting state to AGENT_UNCLAIMED", filename); return; } info("File '%s' was found. Setting state to AGENT_CLAIMED.", filename); - - // -------------------------------------------------------------------- - // Check if the cloud is enabled -#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK ) - netdata_cloud_setting = 0; -#else netdata_cloud_setting = appconfig_get_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", 1); #endif } |