Merge pull request #3166 from lets00/ceph

Add ceph plugin
author: Costa Tsaousis <costa@tsaousis.gr> 2018-02-11 23:36:36 +0200
committer: GitHub <noreply@github.com> 2018-02-11 23:36:36 +0200
commit: 364c4fa2de20eb6ccb60256ce94c34835b79cb0f (patch)
tree: 489a145ee31e1a4100cb7c6f701e13b22e0f84a7
parent: db83f36f4dc71c746e8bb1c8a04c1cc4312f44a1 (diff)
parent: c5bf7ec0d5138a570027d8cd6a12ac8ec58cad66 (diff)
12 files changed, 523 insertions, 3 deletions
diff --git a/README.md b/README.md
index 66eda38646..7ed9aecd50 100644
--- a/README.md
+++ b/README.md
@@ -285,6 +285,9 @@ This is a list of what it currently monitors:
 - **statsd**<br/>
   [netdata is a fully featured statsd server](https://github.com/firehol/netdata/wiki/statsd)
 
+- **ceph**<br/>
+  OSD usage, Pool usage, number of objects, etc.
+
 And you can extend it, by writing plugins that collect data from any source, using any computer language.
 
 ---
diff --git a/conf.d/Makefile.am b/conf.d/Makefile.am
index 2450c9a993..69c662157e 100644
--- a/conf.d/Makefile.am
+++ b/conf.d/Makefile.am
@@ -29,6 +29,7 @@ dist_pythonconfig_DATA = \
     python.d/apache.conf \
     python.d/beanstalk.conf \
     python.d/bind_rndc.conf \
+    python.d/ceph.conf \
     python.d/chrony.conf \
     python.d/couchdb.conf \
     python.d/cpufreq.conf \
@@ -77,6 +78,7 @@ dist_healthconfig_DATA = \
     health.d/beanstalkd.conf \
     health.d/bind_rndc.conf \
     health.d/btrfs.conf \
+    health.d/ceph.conf \
     health.d/cpu.conf \
     health.d/couchdb.conf \
     health.d/disks.conf \
diff --git a/conf.d/health.d/ceph.conf b/conf.d/health.d/ceph.conf
new file mode 100644
index 0000000000..de16f7b6ff
--- /dev/null
+++ b/conf.d/health.d/ceph.conf
@@ -0,0 +1,13 @@
+# low ceph disk available
+
+template: cluster_space_usage
+      on: ceph.general_usage
+    calc: $avail * 100 / ($avail + $used)
+   units: %
+   every: 10s
+    warn: $this < 10
+    crit: $this < 1
+   delay: down 5m multiplier 1.2 max 1h
+    info: ceph disk usage is almost full
+      to: sysadmin
+
diff --git a/conf.d/python.d.conf b/conf.d/python.d.conf
index 25ddcde55c..e36392a9a1 100644
--- a/conf.d/python.d.conf
+++ b/conf.d/python.d.conf
@@ -15,7 +15,7 @@ enabled: yes
 #
 # If "default_run" = "yes" the default for all modules is enabled (yes).
 # Setting any of these to "no" will disable it.
-# 
+#
 # If "default_run" = "no" the default for all modules is disabled (no).
 # Setting any of these to "yes" will enable it.
 
@@ -24,6 +24,7 @@ apache_cache: no
 # apache: yes
 # beanstalk: yes
 # bind_rndc: yes
+# ceph: yes
 chrony: no
 # couchdb: yes
 # cpufreq: yes
diff --git a/conf.d/python.d/ceph.conf b/conf.d/python.d/ceph.conf
new file mode 100644
index 0000000000..78ac1e2511
--- /dev/null
+++ b/conf.d/python.d/ceph.conf
@@ -0,0 +1,75 @@
+# netdata python.d.plugin configuration for ceph stats
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+#  - global variables
+#  - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 10
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 60
+
+# autodetection_retry sets the job re-check interval in seconds.
+# The job is not deleted if check fails.
+# Attempts to start the job are made once every autodetection_retry.
+# This feature is disabled by default.
+# autodetection_retry: 0
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+#     name: myname            # the JOB's name as it will appear at the
+#                             # dashboard (by default is the job_name)
+#                             # JOBs sharing a name are mutually exclusive
+#     update_every: 10         # the JOB's data collection frequency
+#     priority: 60000         # the JOB's order on the dashboard
+#     retries: 60             # the JOB's number of restoration attempts
+#     autodetection_retry: 0  # the JOB's re-check interval in seconds
+#
+# Additionally to the above, ceph plugin also supports the following:
+#
+#     config_file: 'config_file'       # Ceph config file.
+#     keyring_file: 'keyring_file'     # Ceph keyring file. netdata user must be added into ceph group
+#                                      # and keyring file must be read group permission.
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+#
+config_file: '/etc/ceph/ceph.conf'
+keyring_file: '/etc/ceph/ceph.client.admin.keyring'
+
diff --git a/configs.signatures b/configs.signatures
index 8730ad042f..4bf473e375 100644
--- a/configs.signatures
+++ b/configs.signatures
@@ -112,6 +112,7 @@ declare -A configs_signatures=(
   ['2fa8fb929fd597f2ab97b6efc540a043']='health_alarm_notify.conf'
   ['307ac41f6c67fcf007d6f7135fac314c']='stream.conf'
   ['312b4b8e2805e19cf9be554b319567d6']='health.d/softnet.conf'
+  ['3161290af7c1909768253e714ea2c3de']='python.d/ceph.conf'
   ['318bb45755726a25120bb33413d4b582']='health.d/net.conf'
   ['318db50a701442890c269ab547041e97']='health.d/tcp_orphans.conf'
   ['325617412a628e3bc776e3fbb777a2a6']='health.d/redis.conf'
@@ -152,6 +153,7 @@ declare -A configs_signatures=(
   ['3cc6255457d4cba881ae0554ae5d9190']='health.d/squid.conf'
   ['3d974ac9fdaa44d4527d6503bec35e34']='stream.conf'
   ['3f170e3343cd784983b019163393f5af']='health.d/nginx.conf'
+  ['3f7b669fde5c63bd55cb6dd88866d306']='python.d/ceph.conf'
   ['3fbe85671efd5d07e51584ab8262b48b']='health.d/tcp_listen.conf'
   ['3fc45cc18e884c22482524dff6d27833']='python.d/hddtemp.conf'
   ['3fcc3c449ce8e0388f9c23ca07cab608']='health.d/backend.conf'
@@ -424,6 +426,7 @@ declare -A configs_signatures=(
   ['af14667ee7993acea810f6d50923bdc9']='health.d/web_log.conf'
   ['af44cc53aa2bc5cc8935667119567522']='python.d.conf'
   ['afdae4646c755ff2d117527fbf761c8e']='health.d/disks.conf'
+  ['b06d1063bc2200bb2d864021fa1a9cbd']='python.d.conf'
   ['b07eebc6f58d19721ac069171b911d2a']='health_alarm_notify.conf'
   ['b0c59b2bd7a10f6a3f2be6b4b27857db']='health.d/haproxy.conf'
   ['b0f0a0ac415e4b1a82187b80d211e83b']='python.d/mysql.conf'
@@ -458,6 +461,7 @@ declare -A configs_signatures=(
   ['bf66f113b2dd8d8fb444cbd5650f284c']='health_alarm_notify.conf'
   ['c004430f55310ae9ed489c4905ed02cb']='charts.d/apache.conf'
   ['c080e006f544c949baca33cc24a9c126']='health_alarm_notify.conf'
+  ['c0c4c63384ef408f0715331e7615aa60']='python.d/ceph.conf'
   ['c132d2e257fc4df2925be7ad75100d5b']='health.d/entropy.conf'
   ['c1a7e634b5b8aad523a0d115a93379cd']='health.d/memcached.conf'
   ['c1d014ffaebfa0952968aeaf330e5337']='python.d.conf'
@@ -544,6 +548,7 @@ declare -A configs_signatures=(
   ['e3112d8e06fa77888aab02e8fcd22e25']='apps_groups.conf'
   ['e3996f70a4b09315b4a64e3df7d34d43']='python.d/rabbitmq.conf'
   ['e3d100c2d0347c08efbf6245e05620c6']='python.d/fail2ban.conf'
+  ['e3e0c742427c9609ce923e845a0c8532']='health.d/ceph.conf'
   ['e3e5bc57335c489f01b8559f5c70e112']='python.d/squid.conf'
   ['e40947d22f7ed5359f12fc89e3512963']='python.d/dovecot.conf'
   ['e449e5582279742496550df14b6fca95']='health.d/entropy.conf'
diff --git a/installer/functions.sh b/installer/functions.sh
index 21859e72e8..55554ba3ed 100644
--- a/installer/functions.sh
+++ b/installer/functions.sh
@@ -798,6 +798,7 @@ NETDATA_ADDED_TO_ADM=0
 NETDATA_ADDED_TO_NSD=0
 NETDATA_ADDED_TO_PROXY=0
 NETDATA_ADDED_TO_SQUID=0
+NETDATA_ADDED_TO_CEPH=0
 add_netdata_user_and_group() {
     if [ ${UID} -eq 0 ]
         then
@@ -811,6 +812,7 @@ add_netdata_user_and_group() {
         portable_add_user_to_group nsd      netdata && NETDATA_ADDED_TO_NSD=1
         portable_add_user_to_group proxy    netdata && NETDATA_ADDED_TO_PROXY=1
         portable_add_user_to_group squid    netdata && NETDATA_ADDED_TO_SQUID=1
+        portable_add_user_to_group ceph     netdata && NETDATA_ADDED_TO_CEPH=1
         return 0
     fi
 
diff --git a/netdata-installer.sh b/netdata-installer.sh
index a756c05921..7acf2951a4 100755
--- a/netdata-installer.sh
+++ b/netdata-installer.sh
@@ -1170,6 +1170,15 @@ if [ $? -eq 0 -a "${NETDATA_ADDED_TO_SQUID}" = "1" ]
     echo "   gpasswd -d netdata squid"
 fi
 
+getent group ceph > /dev/null
+if [ $? -eq 0 -a "${NETDATA_ADDED_TO_CEPH}" = "1" ]
+    then
+    echo
+    echo "You may also want to remove the netdata user from the squid group"
+    echo "by running:"
+    echo "   gpasswd -d netdata ceph"
+fi
+
 UNINSTALL
 chmod 750 netdata-uninstaller.sh
 
diff --git a/python.d/Makefile.am b/python.d/Makefile.am
index 945d16d4f5..8acd2d9304 100644
--- a/python.d/Makefile.am
+++ b/python.d/Makefile.am
@@ -16,6 +16,7 @@ dist_python_DATA = \
     apache.chart.py \
     beanstalk.chart.py \
     bind_rndc.chart.py \
+    ceph.chart.py \
     chrony.chart.py \
     couchdb.chart.py \
     cpufreq.chart.py \
@@ -195,4 +196,3 @@ dist_python_urllib3_securetransport_DATA = \
     python_modules/urllib3/contrib/_securetransport/bindings.py \
     python_modules/urllib3/contrib/_securetransport/low_level.py \
     $(NULL)
-
diff --git a/python.d/README.md b/python.d/README.md
index 8f56d9eae7..c764919136 100644
--- a/python.d/README.md
+++ b/python.d/README.md
@@ -323,6 +323,39 @@ local:
 
 ---
 
+# ceph
+
+This module monitors the ceph cluster usage and consuption data of a server.
+
+It produces:
+
+* Cluster statistics (usage, available, latency, objects, read/write rate)
+* OSD usage
+* OSD latency
+* Pool usage
+* Pool read/write operations
+* Pool read/write rate
+* number of objects per pool
+
+**Requirements:**
+
+- `rados` python module
+- Granting read permissions to ceph group from keyring file
+```shell
+# chmod 640 /etc/ceph/ceph.client.admin.keyring
+```
+
+### Configuration
+
+Sample:
+```yaml
+local:
+  config_file: '/etc/ceph/ceph.conf'
+  keyring_file: '/etc/ceph/ceph.client.admin.keyring'
+```
+
+---
+
 # couchdb
 
 This module monitors vital statistics of a local Apache CouchDB 2.x server, including:
diff --git a/python.d/ceph.chart.py b/python.d/ceph.chart.py
new file mode 100644
index 0000000000..fb78397d02
--- /dev/null
+++ b/python.d/ceph.chart.py
@@ -0,0 +1,313 @@
+# -*- coding: utf-8 -*-
+# Description: ceph netdata python.d module
+# Author: Luis Eduardo (lets00)
+
+try:
+    import rados
+    CEPH = True
+except ImportError:
+    CEPH = False
+
+import json
+from bases.FrameworkServices.SimpleService import SimpleService
+
+# default module values (can be overridden per job in `config`)
+update_every = 10
+priority = 60000
+retries = 60
+
+ORDER = ['general_usage', 'general_objects', 'general_bytes', 'general_operations',
+         'general_latency', 'pool_usage', 'pool_objects', 'pool_read_bytes',
+         'pool_write_bytes', 'pool_read_operations', 'pool_write_operations', 'osd_usage',
+         'osd_apply_latency', 'osd_commit_latency']
+
+CHARTS = {
+    'general_usage': {
+        'options': [None, 'Ceph General Space', 'KB', 'general', 'ceph.general_usage', 'stacked'],
+        'lines': [
+            ['general_available', 'avail', 'absolute', 1, 1024],
+            ['general_usage', 'used', 'absolute', 1, 1024]
+        ]
+    },
+    'general_objects': {
+        'options': [None, 'Ceph General Objects', 'objects', 'general', 'ceph.general_objects', 'area'],
+        'lines': [
+            ['general_objects', 'cluster', 'absolute']
+        ]
+    },
+    'general_bytes': {
+        'options': [None, 'Ceph General Read/Write Data/s', 'KB', 'general', 'ceph.general_bytes',
+                    'area'],
+        'lines': [
+            ['general_read_bytes', 'read', 'absolute', 1, 1024],
+            ['general_write_bytes', 'write', 'absolute', -1, 1024]
+        ]
+    },
+    'general_operations': {
+        'options': [None, 'Ceph General Read/Write Operations/s', 'operations', 'general', 'ceph.general_operations',
+                    'area'],
+        'lines': [
+            ['general_read_operations', 'read', 'absolute', 1],
+            ['general_write_operations', 'write', 'absolute', -1]
+        ]
+    },
+    'general_latency': {
+        'options': [None, 'Ceph General Apply/Commit latency', 'milliseconds', 'general', 'ceph.general_latency',
+                    'area'],
+        'lines': [
+            ['general_apply_latency', 'apply', 'absolute'],
+            ['general_commit_latency', 'commit', 'absolute']
+        ]
+    },
+    'pool_usage': {
+        'options': [None, 'Ceph Pools', 'KB', 'pool', 'ceph.pool_usage', 'line'],
+        'lines': []
+    },
+    'pool_objects': {
+        'options': [None, 'Ceph Pools', 'objects', 'pool', 'ceph.pool_objects', 'line'],
+        'lines': []
+    },
+    'pool_read_bytes': {
+        'options': [None, 'Ceph Read Pool Data/s', 'KB', 'pool', 'ceph.pool_read_bytes', 'area'],
+        'lines': []
+    },
+    'pool_write_bytes': {
+        'options': [None, 'Ceph Write Pool Data/s', 'KB', 'pool', 'ceph.pool_write_bytes', 'area'],
+        'lines': []
+    },
+    'pool_read_operations': {
+        'options': [None, 'Ceph Read Pool Operations/s', 'operations', 'pool', 'ceph.pool_read_operations', 'area'],
+        'lines': []
+    },
+    'pool_write_operations': {
+        'options': [None, 'Ceph Write Pool Operations/s', 'operations', 'pool', 'ceph.pool_write_operations', 'area'],
+        'lines': []
+    },
+    'osd_usage': {
+        'options': [None, 'Ceph OSDs', 'KB', 'osd', 'ceph.osd_usage', 'line'],
+        'lines': []
+    },
+    'osd_apply_latency': {
+        'options': [None, 'Ceph OSDs apply latency', 'milliseconds', 'osd', 'ceph.apply_latency', 'line'],
+        'lines': []
+    },
+    'osd_commit_latency': {
+        'options': [None, 'Ceph OSDs commit latency', 'milliseconds', 'osd', 'ceph.commit_latency', 'line'],
+        'lines': []
+    }
+
+}
+
+
+class Service(SimpleService):
+    def __init__(self, configuration=None, name=None):
+        SimpleService.__init__(self, configuration=configuration, name=name)
+        self.order = ORDER
+        self.definitions = CHARTS
+        self.config_file = self.configuration.get('config_file')
+        self.keyring_file = self.configuration.get('keyring_file')
+
+    def check(self):
+        """
+        Checks module
+        :return:
+        """
+        if not CEPH:
+            self.error('rados module is needed to use ceph.chart.py')
+            return False
+        if not (self.config_file and self.keyring_file):
+            self.error('config_file and/or keyring_file is not defined')
+            return False
+        try:
+            self.cluster = rados.Rados(conffile=self.config_file,
+                                       conf=dict(keyring=self.keyring_file))
+            self.cluster.connect()
+        except rados.Error as error:
+            self.error(error)
+            return False
+        self.create_definitions()
+        return True
+
+    def create_definitions(self):
+        """
+        Create dynamically charts options
+        :return: None
+        """
+        # Pool lines
+        for pool in sorted(self._get_df()['pools']):
+            self.definitions['pool_usage']['lines'].append([pool['name'],
+                                                            pool['name'],
+                                                            'absolute'])
+            self.definitions['pool_objects']['lines'].append(["obj_{0}".format(pool['name']),
+                                                              pool['name'],
+                                                              'absolute'])
+            self.definitions['pool_read_bytes']['lines'].append(['read_{0}'.format(pool['name']),
+                                                                pool['name'],
+                                                                'absolute', 1, 1024])
+            self.definitions['pool_write_bytes']['lines'].append(['write_{0}'.format(pool['name']),
+                                                                 pool['name'],
+                                                                 'absolute', 1, 1024])
+            self.definitions['pool_read_operations']['lines'].append(['read_operations_{0}'.format(pool['name']),
+                                                                pool['name'],
+                                                                'absolute'])
+            self.definitions['pool_write_operations']['lines'].append(['write_operations_{0}'.format(pool['name']),
+                                                                 pool['name'],
+                                                                 'absolute'])
+
+        # OSD lines
+        for osd in sorted(self._get_osd_df()['nodes']):
+            self.definitions['osd_usage']['lines'].append([osd['name'],
+                                                           osd['name'],
+                                                           'absolute'])
+            self.definitions['osd_apply_latency']['lines'].append(['apply_latency_{0}'.format(osd['name']),
+                                                                   osd['name'],
+                                                                   'absolute'])
+            self.definitions['osd_commit_latency']['lines'].append(['commit_latency_{0}'.format(osd['name']),
+                                                                    osd['name'],
+                                                                    'absolute'])
+
+    def get_data(self):
+        """
+        Catch all ceph data
+        :return: dict
+        """
+        try:
+            data = {}
+            df = self._get_df()
+            osd_df = self._get_osd_df()
+            osd_perf = self._get_osd_perf()
+            pool_stats = self._get_osd_pool_stats()
+            data.update(self._get_general(osd_perf, pool_stats))
+            for pool in df['pools']:
+                data.update(self._get_pool_usage(pool))
+                data.update(self._get_pool_objects(pool))
+            for pool_io in pool_stats:
+                data.update(self._get_pool_rw(pool_io))
+            for osd in osd_df['nodes']:
+                data.update(self._get_osd_usage(osd))
+            for osd_apply_commit in osd_perf['osd_perf_infos']:
+                data.update(self._get_osd_latency(osd_apply_commit))
+            return data
+        except (ValueError, AttributeError) as error:
+            self.error(error)
+            return None
+
+    def _get_general(self, osd_perf, pool_stats):
+        """
+        Get ceph's general usage
+        :return: dict
+        """
+        status = self.cluster.get_cluster_stats()
+        read_bytes_sec = 0
+        write_bytes_sec = 0
+        read_op_per_sec = 0
+        write_op_per_sec = 0
+        apply_latency = 0
+        commit_latency = 0
+
+        for pool_rw_io_b in pool_stats:
+            read_bytes_sec += pool_rw_io_b['client_io_rate'].get('read_bytes_sec', 0)
+            write_bytes_sec += pool_rw_io_b['client_io_rate'].get('write_bytes_sec', 0)
+            read_op_per_sec += pool_rw_io_b['client_io_rate'].get('read_op_per_sec', 0)
+            write_op_per_sec += pool_rw_io_b['client_io_rate'].get('write_op_per_sec', 0)
+        for perf in osd_perf['osd_perf_infos']:
+            apply_latency += perf['perf_stats']['apply_latency_ms']
+            commit_latency += perf['perf_stats']['commit_latency_ms']
+
+        return {'general_usage': int(status['kb_used']),
+                'general_available': int(status['kb_avail']),
+                'general_objects': int(status['num_objects']),
+                'general_read_bytes': read_bytes_sec,
+                'general_write_bytes': write_bytes_sec,
+                'general_read_operations': read_op_per_sec,
+                'general_write_operations': write_op_per_sec,
+                'general_apply_latency': apply_latency,
+                'general_commit_latency': commit_latency
+                }
+
+    @staticmethod
+    def _get_pool_usage(pool):
+        """
+        Process raw data into pool usage dict information
+        :return: A pool dict with pool name's key and usage bytes' value
+        """
+        return {pool['name']: pool['stats']['kb_used']}
+
+    @staticmethod
+    def _get_pool_objects(pool):
+        """
+        Process raw data into pool usage dict information
+        :return: A pool dict with pool name's key and object numbers
+        """
+        return {'obj_{0}'.format(pool['name']): pool['stats']['objects']}
+
+    @staticmethod
+    def _get_pool_rw(pool):
+        """
+        Get read/write kb and operations in a pool
+        :return: A pool dict with both read/write bytes and operations.
+        """
+        return {'read_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('read_bytes_sec', 0)),
+                'write_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('write_bytes_sec', 0)),
+                'read_operations_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('read_op_per_sec', 0)),
+                'write_operations_{0}'.format(pool['pool_name']): int(pool['client_io_rate'].get('write_op_per_sec', 0))
+                }
+
+    @staticmethod
+    def _get_osd_usage(osd):
+        """
+        Process raw data into osd dict information to get osd usage
+        :return: A osd dict with osd name's key and usage bytes' value
+        """
+        return {osd['name']: float(osd['kb_used'])}
+
+    @staticmethod
+    def _get_osd_latency(osd):
+        """
+        Get ceph osd apply and commit latency
+        :return: A osd dict with osd name's key with both apply and commit latency values
+        """
+        return {'apply_latency_osd.{0}'.format(osd['id']): osd['perf_stats']['apply_latency_ms'],
+                'commit_latency_osd.{0}'.format(osd['id']): osd['perf_stats']['commit_latency_ms']}
+
+    def _get_df(self):
+        """
+        Get ceph df output
+        :return: ceph df --format json
+        """
+        return json.loads(self.cluster.mon_command(json.dumps({
+            'prefix': 'df',
+            'format': 'json'
+        }), '')[1])
+
+    def _get_osd_df(self):
+        """
+        Get ceph osd df output
+        :return: ceph osd df --format json
+        """
+        return json.loads(self.cluster.mon_command(json.dumps({
+            'prefix': 'osd df',
+            'format': 'json'
+        }), '')[1])
+
+    def _get_osd_perf(self):
+        """
+        Get ceph osd performance
+        :return: ceph osd perf --format json
+        """
+        return json.loads(self.cluster.mon_command(json.dumps({
+            'prefix': 'osd perf',
+            'format': 'json'
+        }), '')[1])
+
+    def _get_osd_pool_stats(self):
+        """
+        Get ceph osd pool status.
+        This command is used to get information about both
+        read/write operation and bytes per second on each pool
+        :return: ceph osd pool stats --format json
+        """
+        return json.loads(self.cluster.mon_command(json.dumps({
+            'prefix': 'osd pool stats',
+            'format': 'json'
+        }), '')[1])
diff --git a/web/dashboard_info.js b/web/dashboard_info.js
index 0bf482f1f2..dbc821227b 100644
--- a/web/dashboard_info.js
+++ b/web/dashboard_info.js
@@ -325,7 +325,6 @@ netdataDashboard.menu = {
         info: 'Performance metrics for <b><a href="https://couchdb.apache.org/">CouchDB</a></b>, the open-source, JSON document-based database with an HTTP API and multi-master replication.'
     },
 
-
     'beanstalk': {
         title: 'Beanstalkd',
         icon: '<i class="fas fa-tasks"></i>',
@@ -336,6 +335,12 @@ netdataDashboard.menu = {
         title: 'RabbitMQ',
         icon: '<i class="fas fa-comments"></i>',
         info: 'Performance data for the <b><a href="https://www.rabbitmq.com/">RabbitMQ</a></b> open-source message broker.'
+    },
+
+    'ceph': {
+        title: 'Ceph',
+        icon: '<i class="fas fa-database"></i>',
+        info: 'Provides statistics on the <b><a href="http://ceph.com/">ceph</a></b> cluster server, the open-source distributed storage system.'
     }
 };
 
@@ -1311,6 +1316,65 @@ netdataDashboard.context = {
     },
 
     // ------------------------------------------------------------------------
+    // ceph
+
+    'ceph.general_usage': {
+        info: 'The usage and available space in all ceph cluster.'
+    },
+
+    'ceph.general_objects': {
+        info: 'Total number of objects storage on ceph cluster.'
+    },
+
+    'ceph.general_bytes': {
+        info: 'Cluster read and write data per second.'
+    },
+
+    'ceph.general_operations': {
+        info: 'Number of read and write operations per second.'
+    },
+
+    'ceph.general_latency': {
+        info: 'Total of apply and commit latency in all OSDs. The apply latency is the total time taken to flush an update to disk. The commit latency is the total time taken to commit an operation to the journal.'
+    },
+
+    'ceph.pool_usage': {
+        info: 'The usage space in each pool.'
+    },
+
+    'ceph.pool_objects': {
+        info: 'Number of objects presents in each pool.'
+    },
+
+    'ceph.pool_read_bytes': {
+        info: 'The rate of read data per second in each pool.'
+    },
+
+    'ceph.pool_write_bytes': {
+        info: 'The rate of write data per second in each pool.'
+    },
+
+    'ceph.pool_read_objects': {
+        info: 'Number of read objects per second in each pool.'
+    },
+
+    'ceph.pool_write_objects': {
+        info: 'Number of write objects per second in each pool.'
+    },
+
+    'ceph.osd_usage': {
+        info: 'The usage space in each OSD.'
+    },
+
+    'ceph.apply_latency': {
+        info: 'Time taken to flush an update in each OSD.'
+    },
+
+    'ceph.commit_latency': {
+        info: 'Time taken to commit an operation to the journal in each OSD.'
+    },
+
+    // ------------------------------------------------------------------------
     // web_log
 
     'web_log.response_statuses': {
author	Costa Tsaousis <costa@tsaousis.gr>	2018-02-11 23:36:36 +0200
committer	GitHub <noreply@github.com>	2018-02-11 23:36:36 +0200
commit	364c4fa2de20eb6ccb60256ce94c34835b79cb0f (patch)
tree	489a145ee31e1a4100cb7c6f701e13b22e0f84a7
parent	db83f36f4dc71c746e8bb1c8a04c1cc4312f44a1 (diff)
parent	c5bf7ec0d5138a570027d8cd6a12ac8ec58cad66 (diff)