summaryrefslogtreecommitdiffstats
path: root/python.d
diff options
context:
space:
mode:
authorCosta Tsaousis (ktsaou) <costa@tsaousis.gr>2016-11-15 23:47:46 +0200
committerCosta Tsaousis (ktsaou) <costa@tsaousis.gr>2016-11-15 23:47:46 +0200
commita6c5119ecb72039351183ccb9a287e98fa686722 (patch)
treeb28f4b803c438c989385c34a09a7ceb7885015b0 /python.d
parent3d760371671a78e26f9ee45ae8917c64dd792b9e (diff)
python.d.plugin now never gives up data collection - it slows down data collection attempts instead; #999
Diffstat (limited to 'python.d')
-rw-r--r--python.d/python_modules/base.py23
1 files changed, 17 insertions, 6 deletions
diff --git a/python.d/python_modules/base.py b/python.d/python_modules/base.py
index 9d428492c7..aa89b223e3 100644
--- a/python.d/python_modules/base.py
+++ b/python.d/python_modules/base.py
@@ -144,16 +144,17 @@ class SimpleService(threading.Thread):
:return: None
"""
step = float(self.timetable['freq'])
+ penalty = 0
self.timetable['last'] = float(time.time() - step)
- self.debug("starting data collection - update frequency:", str(step), ", retries allowed:", str(self.retries))
+ self.debug("starting data collection - update frequency:", str(step), " retries allowed:", str(self.retries))
while True: # run forever, unless something is wrong
now = float(time.time())
- next = self.timetable['next'] = now - (now % step) + step
+ next = self.timetable['next'] = now - (now % step) + step + penalty
# it is important to do this in a loop
# sleep() is interruptable
while now < next:
- self.debug("sleeping for", str(next - now), "secs to reach frequency of", str(step), "secs, now:", str(now), ", next:", str(next))
+ self.debug("sleeping for", str(next - now), "secs to reach frequency of", str(step), "secs, now:", str(now), " next:", str(next), " penalty:", str(penalty))
time.sleep(next - now)
now = float(time.time())
@@ -167,14 +168,24 @@ class SimpleService(threading.Thread):
if status:
# it is good
self.retries_left = self.retries
+ penalty = 0
else:
# it failed
self.retries_left -= 1
if self.retries_left <= 0:
- self.alert("failed to collect data - no more retries allowed - aborting data collection")
- return
+ if penalty == 0:
+ penalty = float(self.retries * step) / 2
+ else:
+ penalty *= 1.5
+
+ if penalty > 600:
+ penalty = 600
+
+ self.retries_left = self.retries
+ self.alert("failed to collect data for " + str(self.retries) + " times - increasing penalty to " + str(penalty) + " sec and trying again")
+
else:
- self.error("failed to collect data. " + str(self.retries_left) + " retries left.")
+ self.error("failed to collect data - " + str(self.retries_left) + " retries left - penalty: " + str(penalty) + " sec")
# --- CHART ---