summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Mashchenko <ilya@netdata.cloud>2021-03-23 10:44:02 +0300
committerGitHub <noreply@github.com>2021-03-23 10:44:02 +0300
commit6fe27ffe0109d0c9bbeddaa893715cb389fb4234 (patch)
treed40b9748d1fa171bd49deba5cd7c9ef2a9b7b516
parent877021800d3810c154e9c4eec8a7b6cab8855267 (diff)
health: apply megacli alarms for all adapters/physical disks (#10834)
-rw-r--r--health/health.d/megacli.conf54
1 files changed, 31 insertions, 23 deletions
diff --git a/health/health.d/megacli.conf b/health/health.d/megacli.conf
index 73f106530a..434eee3b41 100644
--- a/health/health.d/megacli.conf
+++ b/health/health.d/megacli.conf
@@ -1,12 +1,40 @@
+
+## Adapters (controllers)
+
template: megacli_adapter_state
on: megacli.adapter_degraded
- units: is degraded
- lookup: sum -10s
+ lookup: max -10s foreach *
+ units: boolean
every: 10s
crit: $this > 0
- info: adapter state
+ delay: down 5m multiplier 2 max 10m
+ info: adapter state is degraded
+ to: sysadmin
+
+## Physical Disks
+
+template: megacli_pd_predictive_failures
+ on: megacli.pd_predictive_failure
+ lookup: sum -10s foreach *
+ units: predictive failures
+ every: 10s
+ warn: $this > 0
+ delay: up 1m down 5m multiplier 2 max 10m
+ info: physical drive predictive failures
+ to: sysadmin
+
+template: megacli_pd_media_errors
+ on: megacli.pd_media_error
+ lookup: sum -10s foreach *
+ units: media errors
+ every: 10s
+ warn: $this > 0
+ delay: up 1m down 5m multiplier 2 max 10m
+ info: physical drive media errors
to: sysadmin
+## Battery Backup Units (BBU)
+
template: megacli_bbu_relative_charge
on: megacli.bbu_relative_charge
units: percent
@@ -26,23 +54,3 @@ template: megacli_bbu_cycle_count
crit: $this >= 500
info: BBU cycle count
to: sysadmin
-
-template: megacli_pd_media_errors
- on: megacli.pd_media_error
- units: media errors
- lookup: sum -10s
- every: 10s
- warn: $this > 0
- delay: down 1m multiplier 2 max 10m
- info: physical drive media errors
- to: sysadmin
-
-template: megacli_pd_predictive_failures
- on: megacli.pd_predictive_failure
- units: predictive failures
- lookup: sum -10s
- every: 10s
- warn: $this > 0
- delay: down 1m multiplier 2 max 10m
- info: physical drive predictive failures
- to: sysadmin