summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@tsaousis.gr>2018-07-08 04:57:25 +0300
committerGitHub <noreply@github.com>2018-07-08 04:57:25 +0300
commit8e8d74603e24dc800629b0621d33b1868e1deaaf (patch)
tree900179c337ca87eb762de45e3b88b71d3d16dbde
parent30bbfc27442479744b49813261a573fd8c98b343 (diff)
parent81581660c72a0b4d0e9edcdcdbf920717200ab60 (diff)
Merge pull request #3930 from l2isbad/megacli
python megacli plugin
-rw-r--r--conf.d/Makefile.am2
-rw-r--r--conf.d/health.d/megacli.conf48
-rw-r--r--conf.d/python.d/megacli.conf68
-rw-r--r--python.d/Makefile.am1
-rw-r--r--python.d/README.md29
-rw-r--r--python.d/megacli.chart.py280
-rw-r--r--python.d/python_modules/bases/FrameworkServices/ExecutableService.py6
7 files changed, 431 insertions, 3 deletions
diff --git a/conf.d/Makefile.am b/conf.d/Makefile.am
index 7bd3622f65..0026e3f130 100644
--- a/conf.d/Makefile.am
+++ b/conf.d/Makefile.am
@@ -53,6 +53,7 @@ dist_pythonconfig_DATA = \
python.d/litespeed.conf \
python.d/logind.conf \
python.d/mdstat.conf \
+ python.d/megacli.conf \
python.d/memcached.conf \
python.d/mongodb.conf \
python.d/mysql.conf \
@@ -111,6 +112,7 @@ dist_healthconfig_DATA = \
health.d/isc_dhcpd.conf \
health.d/lighttpd.conf \
health.d/mdstat.conf \
+ health.d/megacli.conf \
health.d/memcached.conf \
health.d/memory.conf \
health.d/mongodb.conf \
diff --git a/conf.d/health.d/megacli.conf b/conf.d/health.d/megacli.conf
new file mode 100644
index 0000000000..1881a7be14
--- /dev/null
+++ b/conf.d/health.d/megacli.conf
@@ -0,0 +1,48 @@
+ alarm: adapter_state
+ on: megacli.adapter_degraded
+ units: is degraded
+ lookup: sum -10s
+ every: 10s
+ crit: $this > 0
+ info: adapter state
+ to: sysadmin
+
+ template: bbu_relative_charge
+ on: megacli.bbu_relative_charge
+ units: percent
+ lookup: average -10s
+ every: 10s
+ warn: $this <= (($status >= $WARNING) ? (85) : (80))
+ crit: $this <= (($status == $CRITICAL) ? (50) : (40))
+ info: BBU relative state of charge
+ to: sysadmin
+
+ template: bbu_cycle_count
+ on: megacli.bbu_cycle_count
+ units: cycle count
+ lookup: average -10s
+ every: 10s
+ warn: $this >= 100
+ crit: $this >= 500
+ info: BBU cycle count
+ to: sysadmin
+
+ alarm: pd_media_errors
+ on: megacli.pd_media_error
+ units: media errors
+ lookup: sum -10s
+ every: 10s
+ warn: $this > 0
+ delay: down 1m multiplier 2 max 10m
+ info: physical drive media errors
+ to: sysadmin
+
+ alarm: pd_predictive_failures
+ on: megacli.pd_predictive_failure
+ units: predictive failures
+ lookup: sum -10s
+ every: 10s
+ warn: $this > 0
+ delay: down 1m multiplier 2 max 10m
+ info: physical drive predictive failures
+ to: sysadmin
diff --git a/conf.d/python.d/megacli.conf b/conf.d/python.d/megacli.conf
new file mode 100644
index 0000000000..d84078ecb1
--- /dev/null
+++ b/conf.d/python.d/megacli.conf
@@ -0,0 +1,68 @@
+# netdata python.d.plugin configuration for megacli
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 60
+
+# autodetection_retry sets the job re-check interval in seconds.
+# The job is not deleted if check fails.
+# Attempts to start the job are made once every autodetection_retry.
+# This feature is disabled by default.
+# autodetection_retry: 0
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 60 # the JOB's number of restoration attempts
+# autodetection_retry: 0 # the JOB's re-check interval in seconds
+#
+# Additionally to the above, megacli also supports the following:
+#
+# do_battery: yes/no # default is no. Battery stats (adds additional call to megacli `megacli -AdpBbuCmd -a0`).
+#
+# ----------------------------------------------------------------------
+
+# IMPORTANT
+# The netdata user needs to be able to be able to sudo the megacli program without password:
+# netdata ALL=(root) NOPASSWD: /path/to/megacli
+
+
+# uncomment the line below to collect battery statistics
+# do_battery: yes
diff --git a/python.d/Makefile.am b/python.d/Makefile.am
index caf4a6210b..43fcef6c69 100644
--- a/python.d/Makefile.am
+++ b/python.d/Makefile.am
@@ -41,6 +41,7 @@ dist_python_DATA = \
litespeed.chart.py \
logind.chart.py \
mdstat.chart.py \
+ megacli.chart.py \
memcached.chart.py \
mongodb.chart.py \
mysql.chart.py \
diff --git a/python.d/README.md b/python.d/README.md
index 2417fc4302..3a246cfbd1 100644
--- a/python.d/README.md
+++ b/python.d/README.md
@@ -1141,6 +1141,35 @@ No configuration is needed.
---
+# megacli
+
+Module collects adapter, physical drives and battery stats.
+
+**Requirements:**
+ * `netdata` user needs to be able to be able to sudo the `megacli` program without password
+
+To grab stats it executes:
+ * `sudo -n megacli -LDPDInfo -aAll`
+ * `sudo -n megacli -AdpBbuCmd -a0`
+
+
+It produces:
+
+1. **Adapter State**
+
+2. **Physical Drives Media Errors**
+
+3. **Physical Drives Predictive Failures**
+
+4. **Battery Relative State of Charge**
+
+5. **Battery Cycle Count**
+
+### configuration
+Battery stats disabled by default in the module configuration file.
+
+---
+
# memcached
Memcached monitoring module. Data grabbed from [stats interface](https://github.com/memcached/memcached/wiki/Commands#stats).
diff --git a/python.d/megacli.chart.py b/python.d/megacli.chart.py
new file mode 100644
index 0000000000..594c4fb17b
--- /dev/null
+++ b/python.d/megacli.chart.py
@@ -0,0 +1,280 @@
+# -*- coding: utf-8 -*-
+# Description: megacli netdata python.d module
+# Author: Ilya Mashchenko (l2isbad)
+# SPDX-License-Identifier: GPL-3.0+
+
+
+import re
+
+from bases.FrameworkServices.ExecutableService import ExecutableService
+from bases.collection import find_binary
+
+
+update_every = 5
+
+
+def adapter_charts(ads):
+ order = [
+ 'adapter_degraded',
+ ]
+
+ def dims(ad):
+ return [['adapter_{0}_degraded'.format(a.id), 'adapter {0}'.format(a.id)] for a in ad]
+
+ charts = {
+ 'adapter_degraded': {
+ 'options': [
+ None, 'Adapter State', 'is degraded', 'adapter', 'megacli.adapter_degraded', 'line'],
+ 'lines': dims(ads)
+ },
+ }
+
+ return order, charts
+
+
+def pd_charts(pds):
+ order = [
+ 'pd_media_error',
+ 'pd_predictive_failure',
+ ]
+
+ def dims(k, pd):
+ return [['slot_{0}_{1}'.format(p.id, k), 'slot {0}'.format(p.id), 'incremental'] for p in pd]
+
+ charts = {
+ 'pd_media_error': {
+ 'options': [
+ None, 'Physical Drives Media Errors', 'errors/s', 'pd', 'megacli.pd_media_error', 'line'
+ ],
+ 'lines': dims("media_error", pds)},
+ 'pd_predictive_failure': {
+ 'options': [
+ None, 'Physical Drives Predictive Failures', 'failures/s', 'pd', 'megacli.pd_predictive_failure', 'line'
+ ],
+ 'lines': dims("predictive_failure", pds)}
+ }
+
+ return order, charts
+
+
+def battery_charts(bats):
+ order = list()
+ charts = dict()
+
+ for b in bats:
+ order.append('bbu_{0}_relative_charge'.format(b.id))
+ charts.update(
+ {
+ 'bbu_{0}_relative_charge'.format(b.id): {
+ 'options': [
+ None, 'Relative State of Charge', '%', 'battery', 'megacli.bbu_relative_charge', 'line'],
+ 'lines': [
+ ['bbu_{0}_relative_charge'.format(b.id), 'adapter {0}'.format(b.id)],
+ ]
+ }
+ }
+ )
+
+ for b in bats:
+ order.append('bbu_{0}_cycle_count'.format(b.id))
+ charts.update(
+ {
+ 'bbu_{0}_cycle_count'.format(b.id): {
+ 'options': [
+ None, 'Cycle Count', 'cycle count', 'battery', 'megacli.bbu_cycle_count', 'line'],
+ 'lines': [
+ ['bbu_{0}_cycle_count'.format(b.id), 'adapter {0}'.format(b.id)],
+ ]
+ }
+ }
+ )
+
+ return order, charts
+
+
+RE_ADAPTER = re.compile(
+ r'Adapter #([0-9]+) State\s+: ([a-zA-Z]+)'
+)
+
+RE_VD = re.compile(
+ r'Slot Number: ([0-9]+) Media Error Count: ([0-9]+) Predictive Failure Count: ([0-9]+)'
+)
+
+RE_BATTERY = re.compile(
+ r'BBU Capacity Info for Adapter: ([0-9]+) Relative State of Charge: ([0-9]+) % Cycle Count: ([0-9]+)'
+)
+
+
+def find_adapters(d):
+ keys = ("Adapter #", "State")
+ d = ' '.join(v.strip() for v in d if v.startswith(keys))
+ return [Adapter(*v) for v in RE_ADAPTER.findall(d)]
+
+
+def find_pds(d):
+ keys = ("Slot Number", "Media Error Count", "Predictive Failure Count")
+ d = ' '.join(v.strip() for v in d if v.startswith(keys))
+ return [PD(*v) for v in RE_VD.findall(d)]
+
+
+def find_batteries(d):
+ keys = ('BBU Capacity Info for Adapter', 'Relative State of Charge', 'Cycle Count')
+ d = ' '.join(v.strip() for v in d if v.strip().startswith(keys))
+ return [Battery(*v) for v in RE_BATTERY.findall(d)]
+
+
+class Adapter:
+ def __init__(self, n, state):
+ self.id = n
+ self.state = int(state == 'Degraded')
+
+ def data(self):
+ return {
+ 'adapter_{0}_degraded'.format(self.id): self.state,
+ }
+
+
+class PD:
+ def __init__(self, n, media_err, predict_fail):
+ self.id = n
+ self.media_err = media_err
+ self.predict_fail = predict_fail
+
+ def data(self):
+ return {
+ 'slot_{0}_media_error'.format(self.id): self.media_err,
+ 'slot_{0}_predictive_failure'.format(self.id): self.predict_fail,
+ }
+
+
+class Battery:
+ def __init__(self, adapt_id, rel_charge, cycle_count):
+ self.id = adapt_id
+ self.rel_charge = rel_charge
+ self.cycle_count = cycle_count
+
+ def data(self):
+ return {
+ 'bbu_{0}_relative_charge'.format(self.id): self.rel_charge,
+ 'bbu_{0}_cycle_count'.format(self.id): self.cycle_count,
+ }
+
+
+# TODO: hardcoded sudo...
+class Megacli:
+ def __init__(self):
+ self.s = find_binary('sudo')
+ self.m = find_binary('megacli')
+ self.sudo_check = [self.s, '-n', '-v']
+ self.disk_info = [self.s, '-n', self.m, '-LDPDInfo', '-aAll']
+ self.battery_info = [self.s, '-n', self.m, '-AdpBbuCmd', '-a0']
+
+ def __bool__(self):
+ return bool(self.s and self.m)
+
+ def __nonzero__(self):
+ return self.__bool__()
+
+
+class Service(ExecutableService):
+ def __init__(self, configuration=None, name=None):
+ ExecutableService.__init__(self, configuration=configuration, name=name)
+ self.order = list()
+ self.definitions = dict()
+ self.megacli = Megacli()
+ self.do_battery = self.configuration.get('do_battery')
+
+ def check_sudo(self):
+ err = self._get_raw_data(command=self.megacli.sudo_check, stderr=True)
+ if err:
+ self.error(''.join(err))
+ return False
+ return True
+
+ def check_disk_info(self):
+ d = self._get_raw_data(command=self.megacli.disk_info)
+ if not d:
+ return False
+
+ ads = find_adapters(d)
+ pds = find_pds(d)
+
+ if not (ads and pds):
+ self.error('failed to parse "{0}" output'.format(' '.join(self.megacli.disk_info)))
+ return False
+
+ o, c = adapter_charts(ads)
+ self.order.extend(o)
+ self.definitions.update(c)
+
+ o, c = pd_charts(pds)
+ self.order.extend(o)
+ self.definitions.update(c)
+
+ return True
+
+ def check_battery(self):
+ d = self._get_raw_data(command=self.megacli.battery_info)
+ if not d:
+ return False
+
+ bats = find_batteries(d)
+
+ if not bats:
+ self.error('failed to parse "{0}" output'.format(' '.join(self.megacli.battery_info)))
+ return False
+
+ o, c = battery_charts(bats)
+ self.order.extend(o)
+ self.definitions.update(c)
+ return True
+
+ def check(self):
+ if not self.megacli:
+ self.error('can\'t locate "sudo" or "megacli" binary')
+ return None
+
+ if not (self.check_sudo() and self.check_disk_info()):
+ return False
+
+ if self.do_battery:
+ self.do_battery = self.check_battery()
+
+ return True
+
+ def get_data(self):
+ data = dict()
+
+ data.update(self.get_adapter_pd_data())
+
+ if self.do_battery:
+ data.update(self.get_battery_data())
+
+ return data or None
+
+ def get_adapter_pd_data(self):
+ raw = self._get_raw_data(command=self.megacli.disk_info)
+ data = dict()
+
+ if not raw:
+ return data
+
+ for a in find_adapters(raw):
+ data.update(a.data())
+
+ for p in find_pds(raw):
+ data.update(p.data())
+
+ return data
+
+ def get_battery_data(self):
+ raw = self._get_raw_data(command=self.megacli.battery_info)
+ data = dict()
+
+ if not raw:
+ return data
+
+ for b in find_batteries(raw):
+ data.update(b.data())
+
+ return data
diff --git a/python.d/python_modules/bases/FrameworkServices/ExecutableService.py b/python.d/python_modules/bases/FrameworkServices/ExecutableService.py
index 9b2e945e79..625f643234 100644
--- a/python.d/python_modules/bases/FrameworkServices/ExecutableService.py
+++ b/python.d/python_modules/bases/FrameworkServices/ExecutableService.py
@@ -17,15 +17,15 @@ class ExecutableService(SimpleService):
SimpleService.__init__(self, configuration=configuration, name=name)
self.command = None
- def _get_raw_data(self, stderr=False):
+ def _get_raw_data(self, stderr=False, command=None):
"""
Get raw data from executed command
:return: <list>
"""
try:
- p = Popen(self.command, stdout=PIPE, stderr=PIPE)
+ p = Popen(command if command else self.command, stdout=PIPE, stderr=PIPE)
except Exception as error:
- self.error('Executing command {command} resulted in error: {error}'.format(command=self.command,
+ self.error('Executing command {command} resulted in error: {error}'.format(command=command or self.command,
error=error))
return None
data = list()