summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Kobal <vlad@prokk.net>2021-03-03 16:02:13 +0200
committerGitHub <noreply@github.com>2021-03-03 16:02:13 +0200
commit9b48ae8690586d8fdba9c7e482bd4afe89461d36 (patch)
treedd9b20218391f57a450737b85b8eee72f7d0d915
parentc629e561eb54cc4bfae0b508cc76f278790e3ad6 (diff)
Fix disk utilization and backlog charts (#10705)
-rw-r--r--collectors/all.h9
-rw-r--r--collectors/proc.plugin/proc_diskstats.c41
-rw-r--r--web/gui/dashboard_info.js5
3 files changed, 48 insertions, 7 deletions
diff --git a/collectors/all.h b/collectors/all.h
index 153fce9316..295261b56a 100644
--- a/collectors/all.h
+++ b/collectors/all.h
@@ -105,10 +105,11 @@
#define NETDATA_CHART_PRIO_DISK_OPS 2001
#define NETDATA_CHART_PRIO_DISK_QOPS 2002
#define NETDATA_CHART_PRIO_DISK_BACKLOG 2003
-#define NETDATA_CHART_PRIO_DISK_UTIL 2004
-#define NETDATA_CHART_PRIO_DISK_AWAIT 2005
-#define NETDATA_CHART_PRIO_DISK_AVGSZ 2006
-#define NETDATA_CHART_PRIO_DISK_SVCTM 2007
+#define NETDATA_CHART_PRIO_DISK_BUSY 2004
+#define NETDATA_CHART_PRIO_DISK_UTIL 2005
+#define NETDATA_CHART_PRIO_DISK_AWAIT 2006
+#define NETDATA_CHART_PRIO_DISK_AVGSZ 2007
+#define NETDATA_CHART_PRIO_DISK_SVCTM 2008
#define NETDATA_CHART_PRIO_DISK_MOPS 2021
#define NETDATA_CHART_PRIO_DISK_IOTIME 2022
#define NETDATA_CHART_PRIO_BCACHE_CACHE_ALLOC 2120
diff --git a/collectors/proc.plugin/proc_diskstats.c b/collectors/proc.plugin/proc_diskstats.c
index eee0cbe7f8..22224df8f4 100644
--- a/collectors/proc.plugin/proc_diskstats.c
+++ b/collectors/proc.plugin/proc_diskstats.c
@@ -73,6 +73,9 @@ static struct disk {
RRDSET *st_backlog;
RRDDIM *rd_backlog_backlog;
+ RRDSET *st_busy;
+ RRDDIM *rd_busy_busy;
+
RRDSET *st_util;
RRDDIM *rd_util_utilization;
@@ -1094,7 +1097,7 @@ int do_proc_diskstats(int update_every, usec_t dt) {
rrdset_flag_set(d->st_backlog, RRDSET_FLAG_DETAIL);
- d->rd_backlog_backlog = rrddim_add(d->st_backlog, "backlog", NULL, 1, 10, RRD_ALGORITHM_INCREMENTAL);
+ d->rd_backlog_backlog = rrddim_add(d->st_backlog, "backlog", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
else rrdset_next(d->st_backlog);
@@ -1108,6 +1111,34 @@ int do_proc_diskstats(int update_every, usec_t dt) {
(busy_ms || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
d->do_util = CONFIG_BOOLEAN_YES;
+ if(unlikely(!d->st_busy)) {
+ d->st_busy = rrdset_create_localhost(
+ "disk_busy"
+ , d->device
+ , d->disk
+ , family
+ , "disk.busy"
+ , "Disk Busy Time"
+ , "milliseconds"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_DISKSTATS_NAME
+ , NETDATA_CHART_PRIO_DISK_BUSY
+ , update_every
+ , RRDSET_TYPE_AREA
+ );
+
+ rrdset_flag_set(d->st_busy, RRDSET_FLAG_DETAIL);
+
+ d->rd_busy_busy =
+ rrddim_add(d->st_busy, "busy", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ else rrdset_next(d->st_busy);
+
+ last_busy_ms = rrddim_set_by_pointer(d->st_busy, d->rd_busy_busy, busy_ms);
+ rrdset_done(d->st_busy);
+
+ // --------------------------------------------------------------------
+
if(unlikely(!d->st_util)) {
d->st_util = rrdset_create_localhost(
"disk_util"
@@ -1126,11 +1157,15 @@ int do_proc_diskstats(int update_every, usec_t dt) {
rrdset_flag_set(d->st_util, RRDSET_FLAG_DETAIL);
- d->rd_util_utilization = rrddim_add(d->st_util, "utilization", NULL, 1, 10, RRD_ALGORITHM_INCREMENTAL);
+ d->rd_util_utilization = rrddim_add(d->st_util, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
}
else rrdset_next(d->st_util);
- last_busy_ms = rrddim_set_by_pointer(d->st_util, d->rd_util_utilization, busy_ms);
+ collected_number disk_utilization = (busy_ms - last_busy_ms) / (10 * update_every);
+ if (disk_utilization > 100)
+ disk_utilization = 100;
+
+ rrddim_set_by_pointer(d->st_util, d->rd_util_utilization, disk_utilization);
rrdset_done(d->st_util);
}
diff --git a/web/gui/dashboard_info.js b/web/gui/dashboard_info.js
index df15a63723..4529413175 100644
--- a/web/gui/dashboard_info.js
+++ b/web/gui/dashboard_info.js
@@ -1342,6 +1342,11 @@ netdataDashboard.context = {
info: 'Disk Utilization measures the amount of time the disk was busy with something. This is not related to its performance. 100% means that the system always had an outstanding operation on the disk. Keep in mind that depending on the underlying technology of the disk, 100% here may or may not be an indication of congestion.'
},
+ 'disk.busy': {
+ colors: '#FF5588',
+ info: 'Disk Busy Time measures the amount of time the disk was busy with something.'
+ },
+
'disk.backlog': {
colors: '#0099CC',
info: 'Backlog is an indication of the duration of pending disk operations. On every I/O event the system is multiplying the time spent doing I/O since the last update of this field with the number of pending operations. While not accurate, this metric can provide an indication of the expected completion time of the operations in progress.'