summaryrefslogtreecommitdiffstats
path: root/health
diff options
context:
space:
mode:
authorIlya Mashchenko <ilya@netdata.cloud>2022-03-02 12:50:37 +0200
committerGitHub <noreply@github.com>2022-03-02 12:50:37 +0200
commit57ebc01370d4b67a654db6b1a9b2ff2365956d57 (patch)
tree7d9230321af0aef7ef026959617ad16da4d06eef /health
parent23fab077290b61507f65885b52307291f624ae9b (diff)
feat(health): add charts.d/nut alarms (#12285)
Diffstat (limited to 'health')
-rw-r--r--health/Makefile.am1
-rw-r--r--health/health.d/nut.conf47
2 files changed, 48 insertions, 0 deletions
diff --git a/health/Makefile.am b/health/Makefile.am
index 519c9b5a36..370186d918 100644
--- a/health/Makefile.am
+++ b/health/Makefile.am
@@ -65,6 +65,7 @@ dist_healthconfig_DATA = \
health.d/mysql.conf \
health.d/net.conf \
health.d/netfilter.conf \
+ health.d/nut.conf \
health.d/pihole.conf \
health.d/portcheck.conf \
health.d/processes.conf \
diff --git a/health/health.d/nut.conf b/health/health.d/nut.conf
new file mode 100644
index 0000000000..6231dd97b2
--- /dev/null
+++ b/health/health.d/nut.conf
@@ -0,0 +1,47 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: nut_10min_ups_load
+ on: nut.load
+ class: Utilization
+ type: Power Supply
+component: UPS
+ os: *
+ hosts: *
+ lookup: average -10m unaligned of load
+ units: %
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (70) : (80))
+ crit: $this > (($status == $CRITICAL) ? (85) : (95))
+ delay: down 10m multiplier 1.5 max 1h
+ info: average UPS load over the last 10 minutes
+ to: sitemgr
+
+ template: nut_ups_charge
+ on: nut.charge
+ class: Errors
+ type: Power Supply
+component: UPS
+ os: *
+ hosts: *
+ lookup: average -60s unaligned of battery_charge
+ units: %
+ every: 60s
+ warn: $this < 100
+ crit: $this < (($status == $CRITICAL) ? (60) : (50))
+ delay: down 10m multiplier 1.5 max 1h
+ info: average UPS charge over the last minute
+ to: sitemgr
+
+ template: nut_last_collected_secs
+ on: nut.load
+ class: Latency
+ type: Power Supply
+component: UPS device
+ calc: $now - $last_collected_t
+ every: 10s
+ units: seconds ago
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sitemgr