summaryrefslogtreecommitdiffstats
path: root/src/health
diff options
context:
space:
mode:
Diffstat (limited to 'src/health')
-rw-r--r--src/health/health.d/megacli.conf2
-rw-r--r--src/health/health.d/storcli.conf61
2 files changed, 62 insertions, 1 deletions
diff --git a/src/health/health.d/megacli.conf b/src/health/health.d/megacli.conf
index 8d71d585bf..27721fa9ad 100644
--- a/src/health/health.d/megacli.conf
+++ b/src/health/health.d/megacli.conf
@@ -38,7 +38,7 @@ component: RAID
type: System
component: RAID
lookup: sum -10s
- units: media errors
+ units: failures
every: 10s
warn: $this > 0
delay: up 1m down 5m multiplier 2 max 10m
diff --git a/src/health/health.d/storcli.conf b/src/health/health.d/storcli.conf
new file mode 100644
index 0000000000..0beda76862
--- /dev/null
+++ b/src/health/health.d/storcli.conf
@@ -0,0 +1,61 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# Controllers
+
+ template: storcli_controller_status
+ on: storcli.controller_status
+ class: Errors
+ type: System
+component: RAID
+ lookup: average -1m unaligned percentage of optimal
+ units: %
+ every: 10s
+ crit: $this < 100
+ delay: down 5m multiplier 2 max 10m
+ summary: RAID controller ${label:controller_number} health
+ info: RAID controller ${label:controller_number} health status is not optimal
+ to: sysadmin
+
+ template: storcli_controller_bbu_status
+ on: storcli.controller_bbu_status
+ class: Errors
+ type: System
+component: RAID
+ lookup: average -1m unaligned percentage of healthy,na
+ units: %
+ every: 10s
+ crit: $this < 100
+ delay: down 5m multiplier 2 max 10m
+ summary: RAID controller ${label:controller_number} BBU health
+ info: RAID controller ${label:controller_number} BBU is unhealthy
+ to: sysadmin
+
+# Physical Drives
+
+ template: storcli_phys_drive_errors
+ on: storcli.phys_drive_errors
+ class: Errors
+ type: System
+component: RAID
+ lookup: sum -10s
+ units: errors
+ every: 10s
+ warn: $this > 0
+ delay: up 1m down 5m multiplier 2 max 10m
+ summary: RAID PD c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors
+ info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors
+ to: sysadmin
+
+ template: storcli_phys_drive_predictive_failures
+ on: storcli.phys_drive_predictive_failures
+ class: Errors
+ type: System
+component: RAID
+ lookup: sum -10s
+ units: failures
+ every: 10s
+ warn: $this > 0
+ delay: up 1m down 5m multiplier 2 max 10m
+ summary: RAID PD c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures
+ info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures
+ to: sysadmin