summaryrefslogtreecommitdiffstats
path: root/health
diff options
context:
space:
mode:
authorthiagoftsm <thiagoftsm@gmail.com>2020-05-26 11:57:56 +0000
committerGitHub <noreply@github.com>2020-05-26 11:57:56 +0000
commitba47e197056601c7db3ce8db0db277d155e8118a (patch)
tree2fac52f2b06f0647820f722acdacf9ef6296fba3 /health
parente92d2ce7a155a33ee6b0acfd928f357251c02c69 (diff)
New alarms (exporting and Backend) (#9075)
New alarms for exporting and backend.
Diffstat (limited to 'health')
-rw-r--r--health/Makefile.am1
-rw-r--r--health/health.d/backend.conf11
-rw-r--r--health/health.d/exporting.conf34
3 files changed, 46 insertions, 0 deletions
diff --git a/health/Makefile.am b/health/Makefile.am
index c9d2451bd0..a69836879c 100644
--- a/health/Makefile.am
+++ b/health/Makefile.am
@@ -45,6 +45,7 @@ dist_healthconfig_DATA = \
health.d/dockerd.conf \
health.d/elasticsearch.conf \
health.d/entropy.conf \
+ health.d/exporting.conf \
health.d/fping.conf \
health.d/ioping.conf \
health.d/fronius.conf \
diff --git a/health/health.d/backend.conf b/health/health.d/backend.conf
index 7af100d8f4..e51b8aa5f7 100644
--- a/health/health.d/backend.conf
+++ b/health/health.d/backend.conf
@@ -1,3 +1,13 @@
+# Alert that backends subsystem will be disabled soon
+ alarm: backend_metrics_eol
+ on: netdata.backend_metrics
+ units: boolean
+ calc: $now - $last_collected_t
+ every: 1m
+ warn: $this > 0
+ delay: down 5m multiplier 1.5 max 1h
+ info: The backends subsystem is deprecated and will be removed soon. Migrate your configuration to exporting.conf.
+ to: sysadmin
# make sure we are sending data to backend
@@ -32,6 +42,7 @@
info: number of metrics lost due to repeating failures to contact the backend server
to: dba
+
# this chart has been removed from netdata
# alarm: backend_slow
# on: netdata.backend_latency
diff --git a/health/health.d/exporting.conf b/health/health.d/exporting.conf
new file mode 100644
index 0000000000..506cb0cf75
--- /dev/null
+++ b/health/health.d/exporting.conf
@@ -0,0 +1,34 @@
+
+template: exporting_last_buffering
+families: *
+ on: exporting_data_size
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful buffering of exporting data
+ to: dba
+
+template: exporting_metrics_sent
+families: *
+ on: exporting_data_size
+ units: %
+ calc: abs($sent) * 100 / abs($buffered)
+ every: 10s
+ warn: $this != 100
+ delay: down 5m multiplier 1.5 max 1h
+ info: percentage of metrics sent to the external database server
+ to: dba
+
+template: exporting_metrics_lost
+families: *
+ on: exporting_data_size
+ units: metrics
+ calc: abs($lost)
+ every: 10s
+ crit: ($this != 0) || ($status == $CRITICAL && abs($sent) == 0)
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of metrics lost due to repeating failures to contact the external database server
+ to: dba