summaryrefslogtreecommitdiffstats
path: root/python.d
diff options
context:
space:
mode:
authorIlya <ilyamaschenko@gmail.com>2017-02-24 19:30:28 +0900
committerIlya <ilyamaschenko@gmail.com>2017-02-24 19:30:28 +0900
commit878a350b94fda1838240811d126bf39cbf659500 (patch)
tree9954dcd911e21c3740aab61683257f4ca9d01256 /python.d
parentcadedd2acd028d6d6d1a129dc3161fc14d7db813 (diff)
mongodb_plugin: oplog window and "optimeDate" diff between nodes charts added
Diffstat (limited to 'python.d')
-rw-r--r--python.d/mongodb.chart.py113
1 files changed, 85 insertions, 28 deletions
diff --git a/python.d/mongodb.chart.py b/python.d/mongodb.chart.py
index 37e6c7d208..c2852b0389 100644
--- a/python.d/mongodb.chart.py
+++ b/python.d/mongodb.chart.py
@@ -5,8 +5,9 @@
from base import SimpleService
from copy import deepcopy
from datetime import datetime
+from sys import exc_info
try:
- from pymongo import MongoClient
+ from pymongo import MongoClient, ASCENDING, DESCENDING
from pymongo.errors import PyMongoError
PYMONGO = True
except ImportError:
@@ -123,6 +124,7 @@ CHARTS = {
'lines': [
['memory_virtual', 'virtual', 'absolute', 1, 1],
['memory_resident', 'resident', 'absolute', 1, 1],
+ ['memory_nonmapped', 'nonmapped', 'absolute', 1, 1],
['memory_mapped', 'mapped', 'absolute', 1, 1]
]},
'page_faults': {
@@ -154,11 +156,11 @@ CHARTS = {
['errors_user', 'user', 'incremental', 1, 1]
]},
'wiredtiger_cache': {
- 'options': [None, 'Amount of space taken by cached data and by dirty data in the cache',
- 'KB', 'resource utilization', 'mongodb.wiredtiger_cache', 'stacked'],
+ 'options': [None, 'The percentage of the wiredTiger cache that is in use and cache with dirty bytes',
+ 'percent', 'resource utilization', 'mongodb.wiredtiger_cache', 'stacked'],
'lines': [
- ['wiredTiger_bytes_in_cache', 'cached', 'absolute', 1, 1024],
- ['wiredTiger_dirty_in_cache', 'dirty', 'absolute', 1, 1024]
+ ['wiredTiger_bytes_in_cache', 'inuse', 'absolute', 1, 1000],
+ ['wiredTiger_dirty_in_cache', 'dirty', 'absolute', 1, 1000]
]},
'wiredtiger_pages_evicted': {
'options': [None, 'Pages evicted from the cache',
@@ -231,25 +233,29 @@ class Service(SimpleService):
self.error(error)
return False
- self.repl = 'repl' in server_status
try:
self.databases = self.connection.database_names()
except PyMongoError as error:
self.databases = list()
self.info('Can\'t collect databases: %s' % str(error))
- self.create_charts_(server_status)
+ self.ss = dict()
+ for elem in ['dur', 'backgroundFlushing', 'wiredTiger', 'tcmalloc', 'cursor', 'commands', 'repl']:
+ self.ss[elem] = in_server_status(elem, server_status)
- return True
+ try:
+ self._get_data()
+ except (LookupError, SyntaxError, AttributeError):
+ self.error('Type: %s, error: %s' % (str(exc_info()[0]), str( exc_info()[1])))
+ return False
+ else:
+ self.create_charts_(server_status)
+ return True
def create_charts_(self, server_status):
self.order = ORDER[:]
self.definitions = deepcopy(CHARTS)
- self.ss = dict()
-
- for elem in ['dur', 'backgroundFlushing', 'wiredTiger', 'tcmalloc', 'cursor', 'commands']:
- self.ss[elem] = in_server_status(elem, server_status)
if not self.ss['dur']:
self.order.remove('journaling_transactions')
@@ -288,11 +294,11 @@ class Service(SimpleService):
]}
self.definitions['dbstats_objects']['lines'].append(['_'.join([dbase, 'objects']), dbase, 'absolute'])
- if server_status.get('repl'):
- def create_heartbeat_lines(hosts):
+ if self.ss['repl']:
+ def create_lines(hosts, string):
lines = list()
for host in hosts:
- dim_id = '_'.join([host, 'heartbeat_lag'])
+ dim_id = '_'.join([host, string])
lines.append([dim_id, host, 'absolute', 1, 1000])
return lines
@@ -307,12 +313,25 @@ class Service(SimpleService):
this_host = server_status['repl']['me']
other_hosts = [host for host in all_hosts if host != this_host]
- # Create "heartbeat delay" charts
+ if 'local' in self.databases:
+ self.order.append('oplog_window')
+ self.definitions['oplog_window'] = {
+ 'options': [None, 'Interval of time between the oldest and the latest entries in the oplog',
+ 'seconds', 'replication', 'mongodb.oplog_window', 'line'],
+ 'lines': [['timeDiff', 'window', 'absolute', 1, 1000]]}
+ # Create "heartbeat delay" chart
self.order.append('heartbeat_delay')
self.definitions['heartbeat_delay'] = {
'options': [None, 'Latency between this node and replica set members (lastHeartbeatRecv)',
'seconds', 'replication', 'mongodb.replication_heartbeat_delay', 'stacked'],
- 'lines': create_heartbeat_lines(other_hosts)}
+ 'lines': create_lines(other_hosts, 'heartbeat_lag')}
+ # Create "optimedate delay" chart
+ self.order.append('optimedate_delay')
+ self.definitions['optimedate_delay'] = {
+ 'options': [None, '"optimeDate"(time when last entry from the oplog was applied)'
+ ' diff between all nodes',
+ 'seconds', 'replication', 'mongodb.replication_optimedate_delay', 'stacked'],
+ 'lines': create_lines(all_hosts, 'optimedate')}
# Create "replica set members state" chart
for host in all_hosts:
chart_name = '_'.join([host, 'state'])
@@ -328,6 +347,7 @@ class Service(SimpleService):
raw_data.update(self.get_serverstatus_() or dict())
raw_data.update(self.get_dbstats_() or dict())
raw_data.update(self.get_replsetgetstatus_() or dict())
+ raw_data.update(self.get_getreplicationinfo_() or dict())
return raw_data or None
@@ -355,7 +375,7 @@ class Service(SimpleService):
return raw_data
def get_replsetgetstatus_(self):
- if not self.repl:
+ if not self.ss['repl']:
return None
raw_data = dict()
@@ -366,6 +386,22 @@ class Service(SimpleService):
else:
return raw_data
+ def get_getreplicationinfo_(self):
+ if not (self.ss['repl'] and 'local' in self.databases):
+ return None
+
+ raw_data = dict()
+ raw_data['getReplicationInfo'] = dict()
+ try:
+ raw_data['getReplicationInfo']['ASCENDING'] = self.connection.local.oplog.rs.find().sort(
+ "$natural", ASCENDING).limit(1)[0]
+ raw_data['getReplicationInfo']['DESCENDING'] = self.connection.local.oplog.rs.find().sort(
+ "$natural", DESCENDING).limit(1)[0]
+ except PyMongoError:
+ return None
+ else:
+ return raw_data
+
def _get_data(self):
"""
:return: dict
@@ -379,6 +415,7 @@ class Service(SimpleService):
serverStatus = raw_data['serverStatus']
dbStats = raw_data.get('dbStats')
replSetGetStatus = raw_data.get('replSetGetStatus')
+ getReplicationInfo = raw_data.get('getReplicationInfo')
utc_now = datetime.utcnow()
# serverStatus
@@ -386,6 +423,8 @@ class Service(SimpleService):
to_netdata.update(update_dict_key(serverStatus['globalLock']['activeClients'], 'activeClients'))
to_netdata.update(update_dict_key(serverStatus['connections'], 'connections'))
to_netdata.update(update_dict_key(serverStatus['mem'], 'memory'))
+ to_netdata['memory_nonmapped'] = (serverStatus['mem']['virtual']
+ - serverStatus['mem'].get('mappedWithJournal', serverStatus['mem']['mapped']))
to_netdata.update(update_dict_key(serverStatus['globalLock']['currentQueue'], 'currentQueue'))
to_netdata.update(update_dict_key(serverStatus['asserts'], 'errors'))
to_netdata['page_faults'] = serverStatus['extra_info']['page_faults']
@@ -410,8 +449,12 @@ class Service(SimpleService):
'wiredTigerRead'))
to_netdata.update(update_dict_key(serverStatus['wiredTiger']['concurrentTransactions']['write'],
'wiredTigerWrite'))
- to_netdata['wiredTiger_bytes_in_cache'] = wired_tiger['cache']['bytes currently in the cache']
- to_netdata['wiredTiger_dirty_in_cache'] = wired_tiger['cache']['tracked dirty bytes in the cache']
+ to_netdata['wiredTiger_bytes_in_cache'] = (int(wired_tiger['cache']['bytes currently in the cache']
+ * 100 / wired_tiger['cache']['maximum bytes configured']
+ * 1000))
+ to_netdata['wiredTiger_dirty_in_cache'] = (int(wired_tiger['cache']['tracked dirty bytes in the cache']
+ * 100 / wired_tiger['cache']['maximum bytes configured']
+ * 1000))
to_netdata['wiredTiger_unmodified_pages_evicted'] = wired_tiger['cache']['unmodified pages evicted']
to_netdata['wiredTiger_modified_pages_evicted'] = wired_tiger['cache']['modified pages evicted']
@@ -433,19 +476,33 @@ class Service(SimpleService):
if replSetGetStatus:
other_hosts = list()
members = replSetGetStatus['members']
+ unix_epoch = datetime(1970, 1, 1, 0, 0)
+
for member in members:
if not member.get('self'):
other_hosts.append(member)
+ # Replica set time diff between current time and time when last entry from the oplog was applied
+ if member['optimeDate'] != unix_epoch:
+ member_optimedate = member['name'] + '_optimedate'
+ to_netdata.update({member_optimedate: int(delta_calculation(delta=utc_now - member['optimeDate'],
+ multiplier=1000))})
# Replica set members state
+ member_state = member['name'] + '_state'
for elem in REPLSET_STATES:
state = elem[0]
- to_netdata.update({'_'.join([member['name'], 'state', state]): 0})
- to_netdata.update({'_'.join([member['name'], 'state', str(member['state'])]): member['state']})
+ to_netdata.update({'_'.join([member_state, state]): 0})
+ to_netdata.update({'_'.join([member_state, str(member['state'])]): member['state']})
# Heartbeat lag calculation
for other in other_hosts:
- if other['lastHeartbeatRecv'] != datetime(1970, 1, 1, 0, 0):
+ if other['lastHeartbeatRecv'] != unix_epoch:
node = other['name'] + '_heartbeat_lag'
- to_netdata[node] = int(lag_calculation(utc_now - other['lastHeartbeatRecv']) * 1000)
+ to_netdata[node] = int(delta_calculation(delta=utc_now - other['lastHeartbeatRecv'],
+ multiplier=1000))
+
+ if getReplicationInfo:
+ first_event = getReplicationInfo['ASCENDING']['ts'].as_datetime()
+ last_event = getReplicationInfo['DESCENDING']['ts'].as_datetime()
+ to_netdata['timeDiff'] = int(delta_calculation(delta=last_event - first_event, multiplier=1000))
return to_netdata
@@ -478,8 +535,8 @@ def in_server_status(elem, server_status):
return elem in server_status or elem in server_status['metrics']
-def lag_calculation(lag):
- if hasattr(lag, 'total_seconds'):
- return lag.total_seconds()
+def delta_calculation(delta, multiplier=1):
+ if hasattr(delta, 'total_seconds'):
+ return delta.total_seconds() * multiplier
else:
- return (lag.microseconds + (lag.seconds + lag.days * 24 * 3600) * 10 ** 6) / 10.0 ** 6
+ return (delta.microseconds + (delta.seconds + delta.days * 24 * 3600) * 10 ** 6) / 10.0 ** 6 * multiplier