summaryrefslogtreecommitdiffstats
path: root/src/plugins_d.c
diff options
context:
space:
mode:
authorCosta Tsaousis (ktsaou) <costa@tsaousis.gr>2016-06-09 22:49:44 +0300
committerCosta Tsaousis (ktsaou) <costa@tsaousis.gr>2016-06-09 22:49:44 +0300
commitbb8cdaf9dd25372bbbd09d916e9afe7f907ec1d6 (patch)
treec796ac14b633b3fed34cfa43ff0a44ad19558d2a /src/plugins_d.c
parent61360d367bf33f2590740a50863f739c819a7d81 (diff)
properly handle the exit status of plugins to avoid infinite restart attempts; plugins that report failure are now restarted up to 10 times if they have collected values in the past; apache fixed to work with latest apache; example plugin is now disabled by default to prevent starting bash in systems that dont have any other bash plugin used
Diffstat (limited to 'src/plugins_d.c')
-rw-r--r--src/plugins_d.c68
1 files changed, 52 insertions, 16 deletions
diff --git a/src/plugins_d.c b/src/plugins_d.c
index 0ccbd36e45..d7815433e4 100644
--- a/src/plugins_d.c
+++ b/src/plugins_d.c
@@ -125,6 +125,8 @@ void *pluginsd_worker_thread(void *arg)
uint32_t STOPPING_WAKE_ME_UP_PLEASE_HASH = simple_hash("STOPPING_WAKE_ME_UP_PLEASE");
#endif
+ size_t count = 0;
+
while(likely(1)) {
if(unlikely(netdata_exit)) break;
@@ -137,7 +139,6 @@ void *pluginsd_worker_thread(void *arg)
info("PLUGINSD: '%s' running on pid %d", cd->fullfilename, cd->pid);
RRDSET *st = NULL;
- unsigned long long count = 0;
char *s;
uint32_t hash;
@@ -182,8 +183,6 @@ void *pluginsd_worker_thread(void *arg)
if(unlikely(st->debug)) debug(D_PLUGINSD, "PLUGINSD: '%s' is setting dimension %s/%s to %s", cd->fullfilename, st->id, dimension, value?value:"<nothing>");
if(value) rrddim_set(st, dimension, strtoll(value, NULL, 0));
-
- count++;
}
else if(likely(hash == BEGIN_HASH && !strcmp(s, "BEGIN"))) {
char *id = words[1];
@@ -223,6 +222,8 @@ void *pluginsd_worker_thread(void *arg)
rrdset_done(st);
st = NULL;
+
+ count++;
}
else if(likely(hash == FLUSH_HASH && !strcmp(s, "FLUSH"))) {
debug(D_PLUGINSD, "PLUGINSD: '%s' is requesting a FLUSH", cd->fullfilename);
@@ -386,17 +387,17 @@ void *pluginsd_worker_thread(void *arg)
break;
}
}
+ if(likely(count)) {
+ cd->successful_collections += count;
+ cd->serial_failures = 0;
+ }
+ else
+ cd->serial_failures++;
- info("PLUGINSD: '%s' on pid %d stopped.", cd->fullfilename, cd->pid);
+ info("PLUGINSD: '%s' on pid %d stopped after %zu successful data collections (ENDs).", cd->fullfilename, cd->pid, count);
- // fgets() failed or loop broke
+ // get the return code
int code = mypclose(fp, cd->pid);
- if(code == 1 || code == 127) {
- // 1 = DISABLE
- // 127 = cannot even run it
- error("PLUGINSD: '%s' (pid %d) exited with code %d. Disabling it.", cd->fullfilename, cd->pid, code);
- cd->enabled = 0;
- }
if(netdata_exit) {
cd->pid = 0;
@@ -406,14 +407,49 @@ void *pluginsd_worker_thread(void *arg)
return NULL;
}
- if(unlikely(!count && cd->enabled)) {
- error("PLUGINSD: '%s' (pid %d) does not generate useful output. Waiting a bit before starting it again.", cd->fullfilename, cd->pid);
- sleep((unsigned int) (cd->update_every * 10));
+ if(code != 0) {
+ // the plugin reports failure
+
+ if(likely(!cd->successful_collections)) {
+ // nothing collected - disable it
+ error("PLUGINSD: '%s' exited with error code %d. Disabling it.", cd->fullfilename, code);
+ cd->enabled = 0;
+ }
+ else {
+ // we have collected something
+
+ if(likely(cd->serial_failures <= 10)) {
+ error("PLUGINSD: '%s' exited with error code %d, but has given useful output in the past (%zu times). Waiting a bit before starting it again.", cd->fullfilename, code, cd->successful_collections);
+ sleep((unsigned int) (cd->update_every * 10));
+ }
+ else {
+ error("PLUGINSD: '%s' exited with error code %d, but has given useful output in the past (%zu times). We tried %d times to restart it, but it failed to generate data. Disabling it.", cd->fullfilename, code, cd->successful_collections, cd->serial_failures);
+ cd->enabled = 0;
+ }
+ }
}
+ else {
+ // the plugin reports success
+ if(unlikely(!cd->successful_collections)) {
+ // we have collected nothing so far
+
+ if(likely(cd->serial_failures <= 10)) {
+ error("PLUGINSD: '%s' (pid %d) does not generate useful output but it reports success (exits with 0). Waiting a bit before starting it again.", cd->fullfilename, cd->pid);
+ sleep((unsigned int) (cd->update_every * 10));
+ }
+ else {
+ error("PLUGINSD: '%s' (pid %d) does not generate useful output, although it reports success (exits with 0), but we have tried %d times to collect something. Disabling it.", cd->fullfilename, cd->pid, cd->serial_failures);
+ cd->enabled = 0;
+ }
+ }
+ else
+ sleep((unsigned int) cd->update_every);
+ }
cd->pid = 0;
- if(likely(cd->enabled)) sleep((unsigned int) cd->update_every);
- else break;
+
+ if(unlikely(!cd->enabled))
+ break;
}
cd->obsolete = 1;