summaryrefslogtreecommitdiffstats
path: root/aclk
diff options
context:
space:
mode:
authorStelios Fragkakis <52996999+stelfrag@users.noreply.github.com>2020-03-13 23:38:13 +0200
committerGitHub <noreply@github.com>2020-03-13 23:38:13 +0200
commit869a22d28731d6607887c2b8f85828d3806e872a (patch)
treefb9aa55e80ac353da3efda1af02d2cb6503f884a /aclk
parent2bbcb9308be69af2ed5c43c7e10991d083ba51b0 (diff)
ACLK: Improved the agent "pop-corning" phase (#8398)
* Ignore the cloud commands when the agent is initializing * Tune the agent popcorning * Reorder waiting msg, stable timeout back to 10 seconds * Moved checks for popcorning to the calling functions for code clarity
Diffstat (limited to 'aclk')
-rw-r--r--aclk/agent_cloud_link.c39
1 files changed, 26 insertions, 13 deletions
diff --git a/aclk/agent_cloud_link.c b/aclk/agent_cloud_link.c
index 424a30403d..d8e3d20e4e 100644
--- a/aclk/agent_cloud_link.c
+++ b/aclk/agent_cloud_link.c
@@ -300,12 +300,6 @@ int aclk_queue_query(char *topic, char *data, char *msg_id, char *query, int run
if (unlikely(waiting_init))
return 0;
- // Ignore all commands if agent not stable and reset the last_init_sequence mark
- if (agent_state == AGENT_INITIALIZING) {
- last_init_sequence = now_realtime_sec();
- return 0;
- }
-
run_after = now_realtime_sec() + run_after;
QUERY_LOCK;
@@ -689,7 +683,10 @@ void aclk_add_collector(const char *hostname, const char *plugin_name, const cha
return;
}
- aclk_queue_query("connector", NULL, NULL, NULL, 0, 1, ACLK_CMD_ONCONNECT);
+ if (unlikely(agent_state == AGENT_INITIALIZING))
+ last_init_sequence = now_realtime_sec();
+ else
+ aclk_queue_query("connector", NULL, NULL, NULL, 0, 1, ACLK_CMD_ONCONNECT);
COLLECTOR_UNLOCK;
}
@@ -721,7 +718,10 @@ void aclk_del_collector(const char *hostname, const char *plugin_name, const cha
COLLECTOR_UNLOCK;
- aclk_queue_query("on_connect", NULL, NULL, NULL, 0, 1, ACLK_CMD_ONCONNECT);
+ if (unlikely(agent_state == AGENT_INITIALIZING))
+ last_init_sequence = now_realtime_sec();
+ else
+ aclk_queue_query("on_connect", NULL, NULL, NULL, 0, 1, ACLK_CMD_ONCONNECT);
_free_collector(tmp_collector);
}
@@ -898,15 +898,16 @@ void *aclk_query_main_thread(void *ptr)
time_t checkpoint;
checkpoint = now_realtime_sec() - last_init_sequence;
- info("Waiting for agent collectors to initialize");
- sleep_usec(USEC_PER_SEC * ACLK_STABLE_TIMEOUT);
if (checkpoint > ACLK_STABLE_TIMEOUT) {
agent_state = AGENT_STABLE;
info("AGENT stable, last collector initialization activity was %ld seconds ago", checkpoint);
#ifdef ACLK_DEBUG
_dump_connector_list();
#endif
+ break;
}
+ info("Waiting for agent collectors to initialize. Last activity was %ld seconds ago" , checkpoint);
+ sleep_usec(USEC_PER_SEC * 1);
}
while (!netdata_exit) {
@@ -1396,6 +1397,10 @@ void *aclk_main(void *ptr)
}
if (!create_private_key() && !_mqtt_lib_init())
break;
+
+ if (netdata_exit)
+ goto exited;
+
sleep_usec(USEC_PER_SEC * 60);
}
create_publish_base_topic();
@@ -1737,7 +1742,7 @@ void aclk_single_update_enable()
// Trigged by a health reload, sends the alarm metadata
void aclk_alarm_reload()
{
- if (unlikely(agent_state != AGENT_STABLE))
+ if (unlikely(agent_state == AGENT_INITIALIZING))
return;
aclk_queue_query("on_connect", NULL, NULL, NULL, 0, 1, ACLK_CMD_ONCONNECT);
@@ -1789,7 +1794,10 @@ int aclk_update_chart(RRDHOST *host, char *chart_name, ACLK_CMD aclk_cmd)
if (unlikely(aclk_disable_single_updates))
return 0;
- aclk_queue_query("_chart", host->hostname, NULL, chart_name, 0, 1, aclk_cmd);
+ if (unlikely(agent_state == AGENT_INITIALIZING))
+ last_init_sequence = now_realtime_sec();
+ else
+ aclk_queue_query("_chart", host->hostname, NULL, chart_name, 0, 1, aclk_cmd);
return 0;
#endif
}
@@ -1801,7 +1809,7 @@ int aclk_update_alarm(RRDHOST *host, ALARM_ENTRY *ae)
if (host != localhost)
return 0;
- if (agent_state != AGENT_STABLE)
+ if (unlikely(agent_state == AGENT_INITIALIZING))
return 0;
/*
@@ -1842,6 +1850,11 @@ int aclk_handle_cloud_request(char *payload)
.type_id = NULL, .msg_id = NULL, .callback_topic = NULL, .payload = NULL, .version = 0
};
+ if (unlikely(agent_state == AGENT_INITIALIZING)) {
+ debug(D_ACLK, "Ignoring cloud request; agent not in stable state");
+ return 0;
+ }
+
if (unlikely(!payload)) {
debug(D_ACLK, "ACLK incoming message is empty");
return 0;