diff options
author | Ilya Mashchenko <ilya@netdata.cloud> | 2023-09-27 13:23:01 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-27 13:23:01 +0300 |
commit | fd3de3819816c67f69bea105c1e6a801a568084f (patch) | |
tree | 296c0bca2d50ee4fe3bb54fee213b7201c032ba7 /health | |
parent | 2a18bed75556dcdaa7aa2a605b5babdc696e6a85 (diff) |
health: add upsd alerts (#16036)
Diffstat (limited to 'health')
-rw-r--r-- | health/Makefile.am | 1 | ||||
-rw-r--r-- | health/health.d/upsd.conf | 50 |
2 files changed, 51 insertions, 0 deletions
diff --git a/health/Makefile.am b/health/Makefile.am index 20e0008602..36e004779c 100644 --- a/health/Makefile.am +++ b/health/Makefile.am @@ -94,6 +94,7 @@ dist_healthconfig_DATA = \ health.d/tcp_resets.conf \ health.d/udp_errors.conf \ health.d/unbound.conf \ + health.d/upsd.conf \ health.d/vcsa.conf \ health.d/vernemq.conf \ health.d/vsphere.conf \ diff --git a/health/health.d/upsd.conf b/health/health.d/upsd.conf new file mode 100644 index 0000000000..703a648812 --- /dev/null +++ b/health/health.d/upsd.conf @@ -0,0 +1,50 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + + template: upsd_10min_ups_load + on: upsd.ups_load + class: Utilization + type: Power Supply +component: UPS + os: * + hosts: * + lookup: average -10m unaligned of load + units: % + every: 1m + warn: $this > (($status >= $WARNING) ? (70) : (80)) + crit: $this > (($status == $CRITICAL) ? (85) : (95)) + delay: down 10m multiplier 1.5 max 1h + summary: UPS ${label:ups_name} load + info: UPS ${label:ups_name} average load over the last 10 minutes + to: sitemgr + + template: upsd_ups_battery_charge + on: upsd.ups_battery_charge + class: Errors + type: Power Supply +component: UPS + os: * + hosts: * + lookup: average -60s unaligned of charge + units: % + every: 60s + warn: $this < 75 + crit: $this < 40 + delay: down 10m multiplier 1.5 max 1h + summary: UPS ${label:ups_name} battery charge + info: UPS ${label:ups_name} average battery charge over the last minute + to: sitemgr + + template: upsd_ups_last_collected_secs + on: upsd.ups_load + class: Latency + type: Power Supply +component: UPS device + calc: $now - $last_collected_t + every: 10s + units: seconds ago + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + summary: UPS ${label:ups_name} last collected + info: UPS ${label:ups_name} number of seconds since the last successful data collection + to: sitemgr |