summaryrefslogtreecommitdiffstats
path: root/collectors/proc.plugin
diff options
context:
space:
mode:
authorHaochen Tong <hexchain@users.noreply.github.com>2019-12-02 22:04:50 +0100
committerthiagoftsm <thiagoftsm@gmail.com>2019-12-02 21:04:50 +0000
commit8a70725c132deb6fc313edcdccaea756a685fc49 (patch)
treeed4c52d299be95ede4ee23970bd7edc777a27c0d /collectors/proc.plugin
parent998470b66b3309b77db9b64d20a83e7aec8185be (diff)
proc.plugin: add pressure stall information (#7209)
* proc.plugin: add pressure stall information * dashboard_info: add "Pressure" section * proc.plugin: mention PSI collector in doc * dashboard_info: fix grammar in PSI section * proc_pressure: fix wrong line name for "full" metrics * proc_pressure: fix copypasta * proc_pressure: refactor to prepare for cgroup changes * cgroups.plugin: add pressure monitoring * add proc_pressure.h to targets * Makefile.am: fix indentation * cgroups.plugin: remove a useless comment * cgroups.plugin: fix pressure config name * proc.plugin: arrange pressure charts under corresponding sections * dashboard_info: rearrange pressure chart descriptions * dashboard_info: reword PSI descriptions
Diffstat (limited to 'collectors/proc.plugin')
-rw-r--r--collectors/proc.plugin/README.md1
-rw-r--r--collectors/proc.plugin/plugin_proc.c3
-rw-r--r--collectors/proc.plugin/plugin_proc.h2
-rw-r--r--collectors/proc.plugin/proc_pressure.c177
-rw-r--r--collectors/proc.plugin/proc_pressure.h31
5 files changed, 214 insertions, 0 deletions
diff --git a/collectors/proc.plugin/README.md b/collectors/proc.plugin/README.md
index 7e2aa10969..b81dd82f1b 100644
--- a/collectors/proc.plugin/README.md
+++ b/collectors/proc.plugin/README.md
@@ -18,6 +18,7 @@
- `/proc/interrupts` (total and per core hardware interrupts)
- `/proc/softirqs` (total and per core software interrupts)
- `/proc/loadavg` (system load and total processes running)
+- `/proc/pressure/{cpu,memory,io}` (pressure stall information)
- `/proc/sys/kernel/random/entropy_avail` (random numbers pool availability - used in cryptography)
- `/sys/class/power_supply` (power supply properties)
- `ipc` (IPC semaphores and message queues)
diff --git a/collectors/proc.plugin/plugin_proc.c b/collectors/proc.plugin/plugin_proc.c
index fcb1babcfa..c9eef2c257 100644
--- a/collectors/proc.plugin/plugin_proc.c
+++ b/collectors/proc.plugin/plugin_proc.c
@@ -21,6 +21,9 @@ static struct proc_module {
{ .name = "/proc/loadavg", .dim = "loadavg", .func = do_proc_loadavg },
{ .name = "/proc/sys/kernel/random/entropy_avail", .dim = "entropy", .func = do_proc_sys_kernel_random_entropy_avail },
+ // pressure metrics
+ { .name = "/proc/pressure", .dim = "pressure", .func = do_proc_pressure },
+
// CPU metrics
{ .name = "/proc/interrupts", .dim = "interrupts", .func = do_proc_interrupts },
{ .name = "/proc/softirqs", .dim = "softirqs", .func = do_proc_softirqs },
diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h
index cb9a0c5f34..0c8dbaf47c 100644
--- a/collectors/proc.plugin/plugin_proc.h
+++ b/collectors/proc.plugin/plugin_proc.h
@@ -40,6 +40,7 @@ extern int do_proc_net_rpc_nfsd(int update_every, usec_t dt);
extern int do_proc_sys_kernel_random_entropy_avail(int update_every, usec_t dt);
extern int do_proc_interrupts(int update_every, usec_t dt);
extern int do_proc_softirqs(int update_every, usec_t dt);
+extern int do_proc_pressure(int update_every, usec_t dt);
extern int do_sys_kernel_mm_ksm(int update_every, usec_t dt);
extern int do_sys_block_zram(int update_every, usec_t dt);
extern int do_proc_loadavg(int update_every, usec_t dt);
@@ -66,6 +67,7 @@ extern void netdev_rename_device_add(const char *host_device, const char *contai
extern void netdev_rename_device_del(const char *host_device);
#include "proc_self_mountinfo.h"
+#include "proc_pressure.h"
#include "zfs_common.h"
#else // (TARGET_OS == OS_LINUX)
diff --git a/collectors/proc.plugin/proc_pressure.c b/collectors/proc.plugin/proc_pressure.c
new file mode 100644
index 0000000000..64974ae2eb
--- /dev/null
+++ b/collectors/proc.plugin/proc_pressure.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "plugin_proc.h"
+
+#define PLUGIN_PROC_MODULE_PRESSURE_NAME "/proc/pressure"
+#define CONFIG_SECTION_PLUGIN_PROC_PRESSURE "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_PRESSURE_NAME
+
+// linux calculates this every 2 seconds, see kernel/sched/psi.c PSI_FREQ
+#define MIN_PRESSURE_UPDATE_EVERY 2
+
+
+static struct pressure resources[PRESSURE_NUM_RESOURCES] = {
+ {
+ .some = { .id = "cpu_pressure", .title = "CPU Pressure" },
+ },
+ {
+ .some = { .id = "memory_some_pressure", .title = "Memory Pressure" },
+ .full = { .id = "memory_full_pressure", .title = "Memory Full Pressure" },
+ },
+ {
+ .some = { .id = "io_some_pressure", .title = "I/O Pressure" },
+ .full = { .id = "io_full_pressure", .title = "I/O Full Pressure" },
+ },
+};
+
+static struct {
+ procfile *pf;
+ const char *name; // metric file name
+ const char *family; // webui section name
+ int section_priority;
+} resource_info[PRESSURE_NUM_RESOURCES] = {
+ { .name = "cpu", .family = "cpu", .section_priority = NETDATA_CHART_PRIO_SYSTEM_CPU },
+ { .name = "memory", .family = "ram", .section_priority = NETDATA_CHART_PRIO_SYSTEM_RAM },
+ { .name = "io", .family = "disk", .section_priority = NETDATA_CHART_PRIO_SYSTEM_IO },
+};
+
+void update_pressure_chart(struct pressure_chart *chart) {
+ rrddim_set_by_pointer(chart->st, chart->rd10, (collected_number)(chart->value10 * 100));
+ rrddim_set_by_pointer(chart->st, chart->rd60, (collected_number) (chart->value60 * 100));
+ rrddim_set_by_pointer(chart->st, chart->rd300, (collected_number) (chart->value300 * 100));
+
+ rrdset_done(chart->st);
+}
+
+int do_proc_pressure(int update_every, usec_t dt) {
+ int fail_count = 0;
+ int i;
+
+ static usec_t next_pressure_dt = 0;
+ static char *base_path = NULL;
+
+ update_every = (update_every < MIN_PRESSURE_UPDATE_EVERY) ? MIN_PRESSURE_UPDATE_EVERY : update_every;
+
+ if (next_pressure_dt <= dt) {
+ next_pressure_dt = update_every * USEC_PER_SEC;
+ } else {
+ next_pressure_dt -= dt;
+ return 0;
+ }
+
+ if (unlikely(!base_path)) {
+ base_path = config_get(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, "base path of pressure metrics", "/proc/pressure");
+ }
+
+ for (i = 0; i < PRESSURE_NUM_RESOURCES; i++) {
+ procfile *ff = resource_info[i].pf;
+ int do_some = resources[i].some.enabled, do_full = resources[i].full.enabled;
+
+ if (unlikely(!ff)) {
+ char filename[FILENAME_MAX + 1];
+ char config_key[CONFIG_MAX_NAME + 1];
+
+ snprintfz(filename
+ , FILENAME_MAX
+ , "%s%s/%s"
+ , netdata_configured_host_prefix
+ , base_path
+ , resource_info[i].name);
+
+ snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name);
+ do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
+ resources[i].some.enabled = do_some;
+ if (resources[i].full.id) {
+ snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name);
+ do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
+ resources[i].full.enabled = do_full;
+ }
+
+ ff = procfile_open(filename, " =", PROCFILE_FLAG_DEFAULT);
+ if (unlikely(!ff)) {
+ error("Cannot read pressure information from %s.", filename);
+ fail_count++;
+ continue;
+ }
+ }
+
+ ff = procfile_readall(ff);
+ resource_info[i].pf = ff;
+ if (unlikely(!ff)) {
+ continue;
+ }
+
+ size_t lines = procfile_lines(ff);
+ if (unlikely(lines < 1)) {
+ error("%s has no lines.", procfile_filename(ff));
+ fail_count++;
+ continue;
+ }
+
+ struct pressure_chart *chart;
+ if (do_some) {
+ chart = &resources[i].some;
+ if (unlikely(!chart->st)) {
+ chart->st = rrdset_create_localhost(
+ "system"
+ , chart->id
+ , NULL
+ , resource_info[i].family
+ , NULL
+ , chart->title
+ , "percentage"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_PRESSURE_NAME
+ , resource_info[i].section_priority + 40
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+ chart->rd10 = rrddim_add(chart->st, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ chart->rd60 = rrddim_add(chart->st, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ chart->rd300 = rrddim_add(chart->st, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ } else {
+ rrdset_next(chart->st);
+ }
+
+ chart->value10 = strtod(procfile_lineword(ff, 0, 2), NULL);
+ chart->value60 = strtod(procfile_lineword(ff, 0, 4), NULL);
+ chart->value300 = strtod(procfile_lineword(ff, 0, 6), NULL);
+ update_pressure_chart(chart);
+ }
+
+ if (do_full && lines > 2) {
+ chart = &resources[i].full;
+ if (unlikely(!chart->st)) {
+ chart->st = rrdset_create_localhost(
+ "system"
+ , chart->id
+ , NULL
+ , resource_info[i].family
+ , NULL
+ , chart->title
+ , "percentage"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_PRESSURE_NAME
+ , resource_info[i].section_priority + 45
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+ chart->rd10 = rrddim_add(chart->st, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ chart->rd60 = rrddim_add(chart->st, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ chart->rd300 = rrddim_add(chart->st, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ } else {
+ rrdset_next(chart->st);
+ }
+
+ chart->value10 = strtod(procfile_lineword(ff, 1, 2), NULL);
+ chart->value60 = strtod(procfile_lineword(ff, 1, 4), NULL);
+ chart->value300 = strtod(procfile_lineword(ff, 1, 6), NULL);
+ update_pressure_chart(chart);
+ }
+ }
+
+ if (PRESSURE_NUM_RESOURCES == fail_count) {
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/collectors/proc.plugin/proc_pressure.h b/collectors/proc.plugin/proc_pressure.h
new file mode 100644
index 0000000000..3330218665
--- /dev/null
+++ b/collectors/proc.plugin/proc_pressure.h
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_PROC_PRESSURE_H
+#define NETDATA_PROC_PRESSURE_H
+
+#define PRESSURE_NUM_RESOURCES 3
+
+struct pressure {
+ int updated;
+ char *filename;
+
+ struct pressure_chart {
+ int enabled;
+
+ const char *id;
+ const char *title;
+
+ double value10;
+ double value60;
+ double value300;
+
+ RRDSET *st;
+ RRDDIM *rd10;
+ RRDDIM *rd60;
+ RRDDIM *rd300;
+ } some, full;
+};
+
+extern void update_pressure_chart(struct pressure_chart *chart);
+
+#endif //NETDATA_PROC_PRESSURE_H