diff options
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.c | 269 | ||||
-rw-r--r-- | collectors/proc.plugin/README.md | 1 | ||||
-rw-r--r-- | collectors/proc.plugin/plugin_proc.c | 3 | ||||
-rw-r--r-- | collectors/proc.plugin/plugin_proc.h | 2 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_pressure.c | 177 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_pressure.h | 31 | ||||
-rw-r--r-- | web/gui/dashboard_info.js | 25 |
9 files changed, 512 insertions, 0 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e35dff6f3..f3989b4c47 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -438,6 +438,8 @@ set(PROC_PLUGIN_FILES collectors/proc.plugin/proc_sys_kernel_random_entropy_avail.c collectors/proc.plugin/proc_vmstat.c collectors/proc.plugin/proc_uptime.c + collectors/proc.plugin/proc_pressure.c + collectors/proc.plugin/proc_pressure.h collectors/proc.plugin/sys_kernel_mm_ksm.c collectors/proc.plugin/sys_block_zram.c collectors/proc.plugin/sys_devices_system_edac_mc.c diff --git a/Makefile.am b/Makefile.am index 2625dcc563..3ecdb14064 100644 --- a/Makefile.am +++ b/Makefile.am @@ -263,6 +263,8 @@ PROC_PLUGIN_FILES = \ collectors/proc.plugin/proc_loadavg.c \ collectors/proc.plugin/proc_meminfo.c \ collectors/proc.plugin/proc_pagetypeinfo.c \ + collectors/proc.plugin/proc_pressure.c \ + collectors/proc.plugin/proc_pressure.h \ collectors/proc.plugin/proc_net_dev.c \ collectors/proc.plugin/proc_net_ip_vs_stats.c \ collectors/proc.plugin/proc_net_netstat.c \ diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index d9d130f7e3..7882d0185f 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -23,6 +23,11 @@ static int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES; static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; @@ -105,6 +110,12 @@ void read_cgroup_plugin_configuration() { cgroup_enable_blkio_queued_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio queued operations", cgroup_enable_blkio_queued_ops); cgroup_enable_blkio_merged_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio merged operations", cgroup_enable_blkio_merged_ops); + cgroup_enable_pressure_cpu = config_get_boolean_ondemand("plugin:cgroups", "enable cpu pressure", cgroup_enable_pressure_cpu); + cgroup_enable_pressure_io_some = config_get_boolean_ondemand("plugin:cgroups", "enable io some pressure", cgroup_enable_pressure_io_some); + cgroup_enable_pressure_io_full = config_get_boolean_ondemand("plugin:cgroups", "enable io full pressure", cgroup_enable_pressure_io_full); + cgroup_enable_pressure_memory_some = config_get_boolean_ondemand("plugin:cgroups", "enable memory some pressure", cgroup_enable_pressure_memory_some); + cgroup_enable_pressure_memory_full = config_get_boolean_ondemand("plugin:cgroups", "enable memory full pressure", cgroup_enable_pressure_memory_full); + cgroup_recheck_zero_blkio_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero blkio every iterations", cgroup_recheck_zero_blkio_every_iterations); cgroup_recheck_zero_mem_failcnt_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero memory failcnt every iterations", cgroup_recheck_zero_mem_failcnt_every_iterations); cgroup_recheck_zero_mem_detailed_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero detailed memory every iterations", cgroup_recheck_zero_mem_detailed_every_iterations); @@ -116,6 +127,13 @@ void read_cgroup_plugin_configuration() { char filename[FILENAME_MAX + 1], *s; struct mountinfo *mi, *root = mountinfo_read(0); if(!cgroup_use_unified_cgroups) { + // cgroup v1 does not have pressure metrics + cgroup_enable_pressure_cpu = + cgroup_enable_pressure_io_some = + cgroup_enable_pressure_io_full = + cgroup_enable_pressure_memory_some = + cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO; + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct"); if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct"); if(!mi) { @@ -461,6 +479,10 @@ struct cgroup { struct cgroup_network_interface *interfaces; + struct pressure cpu_pressure; + struct pressure io_pressure; + struct pressure memory_pressure; + // per cgroup charts RRDSET *st_cpu; RRDSET *st_cpu_limit; @@ -798,6 +820,54 @@ static inline void cgroup2_read_blkio(struct blkio *io, unsigned int word_offset } } +static inline void cgroup2_read_pressure(struct pressure *res) { + static procfile *ff = NULL; + + if (likely(res->filename)) { + ff = procfile_reopen(ff, res->filename, " =", PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) { + res->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + res->updated = 0; + cgroups_check = 1; + return; + } + + size_t lines = procfile_lines(ff); + if (lines < 1) { + error("CGROUP: file '%s' should have 1+ lines.", res->filename); + res->updated = 0; + return; + } + + res->some.value10 = strtod(procfile_lineword(ff, 0, 2), NULL); + res->some.value60 = strtod(procfile_lineword(ff, 0, 4), NULL); + res->some.value300 = strtod(procfile_lineword(ff, 0, 6), NULL); + + if (lines > 2) { + res->full.value10 = strtod(procfile_lineword(ff, 1, 2), NULL); + res->full.value60 = strtod(procfile_lineword(ff, 1, 4), NULL); + res->full.value300 = strtod(procfile_lineword(ff, 1, 6), NULL); + } + + res->updated = 1; + + if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) { + res->some.enabled = CONFIG_BOOLEAN_YES; + if (lines > 2) { + res->full.enabled = CONFIG_BOOLEAN_YES; + } else { + res->full.enabled = CONFIG_BOOLEAN_NO; + } + } + } +} + static inline void cgroup_read_memory(struct memory *mem, char parent_cg_is_unified) { static procfile *ff = NULL; @@ -946,6 +1016,9 @@ static inline void cgroup_read(struct cgroup *cg) { cgroup2_read_blkio(&cg->io_service_bytes, 0); cgroup2_read_blkio(&cg->io_serviced, 4); cgroup2_read_cpuacct_stat(&cg->cpuacct_stat); + cgroup2_read_pressure(&cg->cpu_pressure); + cgroup2_read_pressure(&cg->io_pressure); + cgroup2_read_pressure(&cg->memory_pressure); cgroup_read_memory(&cg->memory, 1); } } @@ -1236,6 +1309,12 @@ static inline struct cgroup *cgroup_add(const char *id) { return cg; } +static inline void free_pressure(struct pressure *res) { + if (res->some.st) rrdset_is_obsolete(res->some.st); + if (res->full.st) rrdset_is_obsolete(res->full.st); + freez(res->filename); +} + static inline void cgroup_free(struct cgroup *cg) { debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available"); @@ -1284,6 +1363,10 @@ static inline void cgroup_free(struct cgroup *cg) { freez(cg->io_merged.filename); freez(cg->io_queued.filename); + free_pressure(&cg->cpu_pressure); + free_pressure(&cg->io_pressure); + free_pressure(&cg->memory_pressure); + freez(cg->id); freez(cg->chart_id); freez(cg->chart_title); @@ -1748,6 +1831,42 @@ static inline void find_all_cgroups() { else debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename); } + + if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpu_pressure.filename = strdupz(filename); + cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu; + cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO; + debug(D_CGROUP, "cpu.pressure filename for cgroup '%s': '%s'", cg->id, cg->cpu_pressure.filename); + } else { + debug(D_CGROUP, "cpu.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + + if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_pressure.filename = strdupz(filename); + cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some; + cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full; + debug(D_CGROUP, "io.pressure filename for cgroup '%s': '%s'", cg->id, cg->io_pressure.filename); + } else { + debug(D_CGROUP, "io.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + + if (unlikely((cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) && !cg->memory_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory_pressure.filename = strdupz(filename); + cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some; + cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full; + debug(D_CGROUP, "memory.pressure filename for cgroup '%s': '%s'", cg->id, cg->memory_pressure.filename); + } else { + debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } } } @@ -3364,6 +3483,156 @@ void update_cgroup_charts(int update_every) { rrddim_set(cg->st_merged_ops, "write", cg->io_merged.Write); rrdset_done(cg->st_merged_ops); } + + if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) { + struct pressure *res = &cg->cpu_pressure; + if (likely(res->updated && res->some.enabled)) { + if (unlikely(!res->some.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU pressure for cgroup %s", cg->chart_title); + + chart = res->some.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_pressure" + , NULL + , "cpu" + , "cgroup.cpu_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2200 + , update_every, + RRDSET_TYPE_LINE); + + res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->some.st); + } + + update_pressure_chart(&res->some); + } + + res = &cg->memory_pressure; + if (likely(res->updated && res->some.enabled)) { + if (unlikely(!res->some.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory pressure for cgroup %s", cg->chart_title); + + chart = res->some.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_pressure" + , NULL + , "mem" + , "cgroup.memory_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2300 + , update_every, + RRDSET_TYPE_LINE); + + res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->some.st); + } + + update_pressure_chart(&res->some); + } + + if (likely(res->updated && res->full.enabled)) { + if (unlikely(!res->full.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory full pressure for cgroup %s", cg->chart_title); + + chart = res->full.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_full_pressure" + , NULL + , "mem" + , "cgroup.memory_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2350 + , update_every, + RRDSET_TYPE_LINE); + + res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->full.st); + } + + update_pressure_chart(&res->full); + } + + res = &cg->io_pressure; + if (likely(res->updated && res->some.enabled)) { + if (unlikely(!res->some.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O pressure for cgroup %s", cg->chart_title); + + chart = res->some.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_pressure" + , NULL + , "disk" + , "cgroup.io_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2400 + , update_every, + RRDSET_TYPE_LINE); + + res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->some.st); + } + + update_pressure_chart(&res->some); + } + + if (likely(res->updated && res->full.enabled)) { + if (unlikely(!res->full.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O full pressure for cgroup %s", cg->chart_title); + + chart = res->full.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_full_pressure" + , NULL + , "disk" + , "cgroup.io_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2450 + , update_every, + RRDSET_TYPE_LINE); + + res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->full.st); + } + + update_pressure_chart(&res->full); + } + } } if(likely(cgroup_enable_systemd_services)) diff --git a/collectors/proc.plugin/README.md b/collectors/proc.plugin/README.md index 7e2aa10969..b81dd82f1b 100644 --- a/collectors/proc.plugin/README.md +++ b/collectors/proc.plugin/README.md @@ -18,6 +18,7 @@ - `/proc/interrupts` (total and per core hardware interrupts) - `/proc/softirqs` (total and per core software interrupts) - `/proc/loadavg` (system load and total processes running) +- `/proc/pressure/{cpu,memory,io}` (pressure stall information) - `/proc/sys/kernel/random/entropy_avail` (random numbers pool availability - used in cryptography) - `/sys/class/power_supply` (power supply properties) - `ipc` (IPC semaphores and message queues) diff --git a/collectors/proc.plugin/plugin_proc.c b/collectors/proc.plugin/plugin_proc.c index fcb1babcfa..c9eef2c257 100644 --- a/collectors/proc.plugin/plugin_proc.c +++ b/collectors/proc.plugin/plugin_proc.c @@ -21,6 +21,9 @@ static struct proc_module { { .name = "/proc/loadavg", .dim = "loadavg", .func = do_proc_loadavg }, { .name = "/proc/sys/kernel/random/entropy_avail", .dim = "entropy", .func = do_proc_sys_kernel_random_entropy_avail }, + // pressure metrics + { .name = "/proc/pressure", .dim = "pressure", .func = do_proc_pressure }, + // CPU metrics { .name = "/proc/interrupts", .dim = "interrupts", .func = do_proc_interrupts }, { .name = "/proc/softirqs", .dim = "softirqs", .func = do_proc_softirqs }, diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h index cb9a0c5f34..0c8dbaf47c 100644 --- a/collectors/proc.plugin/plugin_proc.h +++ b/collectors/proc.plugin/plugin_proc.h @@ -40,6 +40,7 @@ extern int do_proc_net_rpc_nfsd(int update_every, usec_t dt); extern int do_proc_sys_kernel_random_entropy_avail(int update_every, usec_t dt); extern int do_proc_interrupts(int update_every, usec_t dt); extern int do_proc_softirqs(int update_every, usec_t dt); +extern int do_proc_pressure(int update_every, usec_t dt); extern int do_sys_kernel_mm_ksm(int update_every, usec_t dt); extern int do_sys_block_zram(int update_every, usec_t dt); extern int do_proc_loadavg(int update_every, usec_t dt); @@ -66,6 +67,7 @@ extern void netdev_rename_device_add(const char *host_device, const char *contai extern void netdev_rename_device_del(const char *host_device); #include "proc_self_mountinfo.h" +#include "proc_pressure.h" #include "zfs_common.h" #else // (TARGET_OS == OS_LINUX) diff --git a/collectors/proc.plugin/proc_pressure.c b/collectors/proc.plugin/proc_pressure.c new file mode 100644 index 0000000000..64974ae2eb --- /dev/null +++ b/collectors/proc.plugin/proc_pressure.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_PRESSURE_NAME "/proc/pressure" +#define CONFIG_SECTION_PLUGIN_PROC_PRESSURE "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_PRESSURE_NAME + +// linux calculates this every 2 seconds, see kernel/sched/psi.c PSI_FREQ +#define MIN_PRESSURE_UPDATE_EVERY 2 + + +static struct pressure resources[PRESSURE_NUM_RESOURCES] = { + { + .some = { .id = "cpu_pressure", .title = "CPU Pressure" }, + }, + { + .some = { .id = "memory_some_pressure", .title = "Memory Pressure" }, + .full = { .id = "memory_full_pressure", .title = "Memory Full Pressure" }, + }, + { + .some = { .id = "io_some_pressure", .title = "I/O Pressure" }, + .full = { .id = "io_full_pressure", .title = "I/O Full Pressure" }, + }, +}; + +static struct { + procfile *pf; + const char *name; // metric file name + const char *family; // webui section name + int section_priority; +} resource_info[PRESSURE_NUM_RESOURCES] = { + { .name = "cpu", .family = "cpu", .section_priority = NETDATA_CHART_PRIO_SYSTEM_CPU }, + { .name = "memory", .family = "ram", .section_priority = NETDATA_CHART_PRIO_SYSTEM_RAM }, + { .name = "io", .family = "disk", .section_priority = NETDATA_CHART_PRIO_SYSTEM_IO }, +}; + +void update_pressure_chart(struct pressure_chart *chart) { + rrddim_set_by_pointer(chart->st, chart->rd10, (collected_number)(chart->value10 * 100)); + rrddim_set_by_pointer(chart->st, chart->rd60, (collected_number) (chart->value60 * 100)); + rrddim_set_by_pointer(chart->st, chart->rd300, (collected_number) (chart->value300 * 100)); + + rrdset_done(chart->st); +} + +int do_proc_pressure(int update_every, usec_t dt) { + int fail_count = 0; + int i; + + static usec_t next_pressure_dt = 0; + static char *base_path = NULL; + + update_every = (update_every < MIN_PRESSURE_UPDATE_EVERY) ? MIN_PRESSURE_UPDATE_EVERY : update_every; + + if (next_pressure_dt <= dt) { + next_pressure_dt = update_every * USEC_PER_SEC; + } else { + next_pressure_dt -= dt; + return 0; + } + + if (unlikely(!base_path)) { + base_path = config_get(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, "base path of pressure metrics", "/proc/pressure"); + } + + for (i = 0; i < PRESSURE_NUM_RESOURCES; i++) { + procfile *ff = resource_info[i].pf; + int do_some = resources[i].some.enabled, do_full = resources[i].full.enabled; + + if (unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + char config_key[CONFIG_MAX_NAME + 1]; + + snprintfz(filename + , FILENAME_MAX + , "%s%s/%s" + , netdata_configured_host_prefix + , base_path + , resource_info[i].name); + + snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name); + do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES); + resources[i].some.enabled = do_some; + if (resources[i].full.id) { + snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name); + do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES); + resources[i].full.enabled = do_full; + } + + ff = procfile_open(filename, " =", PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) { + error("Cannot read pressure information from %s.", filename); + fail_count++; + continue; + } + } + + ff = procfile_readall(ff); + resource_info[i].pf = ff; + if (unlikely(!ff)) { + continue; + } + + size_t lines = procfile_lines(ff); + if (unlikely(lines < 1)) { + error("%s has no lines.", procfile_filename(ff)); + fail_count++; + continue; + } + + struct pressure_chart *chart; + if (do_some) { + chart = &resources[i].some; + if (unlikely(!chart->st)) { + chart->st = rrdset_create_localhost( + "system" + , chart->id + , NULL + , resource_info[i].family + , NULL + , chart->title + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_PRESSURE_NAME + , resource_info[i].section_priority + 40 + , update_every + , RRDSET_TYPE_LINE + ); + chart->rd10 = rrddim_add(chart->st, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + chart->rd60 = rrddim_add(chart->st, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + chart->rd300 = rrddim_add(chart->st, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(chart->st); + } + + chart->value10 = strtod(procfile_lineword(ff, 0, 2), NULL); + chart->value60 = strtod(procfile_lineword(ff, 0, 4), NULL); + chart->value300 = strtod(procfile_lineword(ff, 0, 6), NULL); + update_pressure_chart(chart); + } + + if (do_full && lines > 2) { + chart = &resources[i].full; + if (unlikely(!chart->st)) { + chart->st = rrdset_create_localhost( + "system" + , chart->id + , NULL + , resource_info[i].family + , NULL + , chart->title + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_PRESSURE_NAME + , resource_info[i].section_priority + 45 + , update_every + , RRDSET_TYPE_LINE + ); + chart->rd10 = rrddim_add(chart->st, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + chart->rd60 = rrddim_add(chart->st, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + chart->rd300 = rrddim_add(chart->st, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(chart->st); + } + + chart->value10 = strtod(procfile_lineword(ff, 1, 2), NULL); + chart->value60 = strtod(procfile_lineword(ff, 1, 4), NULL); + chart->value300 = strtod(procfile_lineword(ff, 1, 6), NULL); + update_pressure_chart(chart); + } + } + + if (PRESSURE_NUM_RESOURCES == fail_count) { + return 1; + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_pressure.h b/collectors/proc.plugin/proc_pressure.h new file mode 100644 index 0000000000..3330218665 --- /dev/null +++ b/collectors/proc.plugin/proc_pressure.h @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PROC_PRESSURE_H +#define NETDATA_PROC_PRESSURE_H + +#define PRESSURE_NUM_RESOURCES 3 + +struct pressure { + int updated; + char *filename; + + struct pressure_chart { + int enabled; + + const char *id; + const char *title; + + double value10; + double value60; + double value300; + + RRDSET *st; + RRDDIM *rd10; + RRDDIM *rd60; + RRDDIM *rd300; + } some, full; +}; + +extern void update_pressure_chart(struct pressure_chart *chart); + +#endif //NETDATA_PROC_PRESSURE_H diff --git a/web/gui/dashboard_info.js b/web/gui/dashboard_info.js index cb5c9f7bcf..b0cbc9a05c 100644 --- a/web/gui/dashboard_info.js +++ b/web/gui/dashboard_info.js @@ -718,6 +718,31 @@ netdataDashboard.context = { height: 0.7 }, + 'system.cpu_pressure': { + info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html">Pressure Stall Information</a> ' + + 'identifies and quantifies the disruptions caused by resource contentions. ' + + 'The "some" line indicates the share of time in which at least <b>some</b> tasks are stalled on CPU. ' + + 'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.' + }, + + 'system.memory_some_pressure': { + info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html">Pressure Stall Information</a> ' + + 'identifies and quantifies the disruptions caused by resource contentions. ' + + 'The "some" line indicates the share of time in which at least <b>some</b> tasks are stalled on memory. ' + + 'The "full" line indicates the share of time in which <b>all non-idle</b> tasks are stalled on memory simultaneously. ' + + 'In this state actual CPU cycles are going to waste, and a workload that spends extended time in this state is considered to be thrashing. ' + + 'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.' + }, + + 'system.io_some_pressure': { + info: '<a href="https://www.kernel.org/doc/html/latest/accounting/psi.html">Pressure Stall Information</a> ' + + 'identifies and quantifies the disruptions caused by resource contentions. ' + + 'The "some" line indicates the share of time in which at least <b>some</b> tasks are stalled on I/O. ' + + 'The "full" line indicates the share of time in which <b>all non-idle</b> tasks are stalled on I/O simultaneously. ' + + 'In this state actual CPU cycles are going to waste, and a workload that spends extended time in this state is considered to be thrashing. ' + + 'The ratios (in %) are tracked as recent trends over 10-, 60-, and 300-second windows.' + }, + 'system.io': { info: function (os) { var s = 'Total Disk I/O, for all physical disks. You can get detailed information about each disk at the <a href="#menu_disk">Disks</a> section and per application Disk usage at the <a href="#menu_apps">Applications Monitoring</a> section.'; |