From 8a70725c132deb6fc313edcdccaea756a685fc49 Mon Sep 17 00:00:00 2001 From: Haochen Tong Date: Mon, 2 Dec 2019 22:04:50 +0100 Subject: proc.plugin: add pressure stall information (#7209) * proc.plugin: add pressure stall information * dashboard_info: add "Pressure" section * proc.plugin: mention PSI collector in doc * dashboard_info: fix grammar in PSI section * proc_pressure: fix wrong line name for "full" metrics * proc_pressure: fix copypasta * proc_pressure: refactor to prepare for cgroup changes * cgroups.plugin: add pressure monitoring * add proc_pressure.h to targets * Makefile.am: fix indentation * cgroups.plugin: remove a useless comment * cgroups.plugin: fix pressure config name * proc.plugin: arrange pressure charts under corresponding sections * dashboard_info: rearrange pressure chart descriptions * dashboard_info: reword PSI descriptions --- collectors/cgroups.plugin/sys_fs_cgroup.c | 269 ++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) (limited to 'collectors/cgroups.plugin') diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index d9d130f7e3..7882d0185f 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -23,6 +23,11 @@ static int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES; static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; @@ -105,6 +110,12 @@ void read_cgroup_plugin_configuration() { cgroup_enable_blkio_queued_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio queued operations", cgroup_enable_blkio_queued_ops); cgroup_enable_blkio_merged_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio merged operations", cgroup_enable_blkio_merged_ops); + cgroup_enable_pressure_cpu = config_get_boolean_ondemand("plugin:cgroups", "enable cpu pressure", cgroup_enable_pressure_cpu); + cgroup_enable_pressure_io_some = config_get_boolean_ondemand("plugin:cgroups", "enable io some pressure", cgroup_enable_pressure_io_some); + cgroup_enable_pressure_io_full = config_get_boolean_ondemand("plugin:cgroups", "enable io full pressure", cgroup_enable_pressure_io_full); + cgroup_enable_pressure_memory_some = config_get_boolean_ondemand("plugin:cgroups", "enable memory some pressure", cgroup_enable_pressure_memory_some); + cgroup_enable_pressure_memory_full = config_get_boolean_ondemand("plugin:cgroups", "enable memory full pressure", cgroup_enable_pressure_memory_full); + cgroup_recheck_zero_blkio_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero blkio every iterations", cgroup_recheck_zero_blkio_every_iterations); cgroup_recheck_zero_mem_failcnt_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero memory failcnt every iterations", cgroup_recheck_zero_mem_failcnt_every_iterations); cgroup_recheck_zero_mem_detailed_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero detailed memory every iterations", cgroup_recheck_zero_mem_detailed_every_iterations); @@ -116,6 +127,13 @@ void read_cgroup_plugin_configuration() { char filename[FILENAME_MAX + 1], *s; struct mountinfo *mi, *root = mountinfo_read(0); if(!cgroup_use_unified_cgroups) { + // cgroup v1 does not have pressure metrics + cgroup_enable_pressure_cpu = + cgroup_enable_pressure_io_some = + cgroup_enable_pressure_io_full = + cgroup_enable_pressure_memory_some = + cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO; + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct"); if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct"); if(!mi) { @@ -461,6 +479,10 @@ struct cgroup { struct cgroup_network_interface *interfaces; + struct pressure cpu_pressure; + struct pressure io_pressure; + struct pressure memory_pressure; + // per cgroup charts RRDSET *st_cpu; RRDSET *st_cpu_limit; @@ -798,6 +820,54 @@ static inline void cgroup2_read_blkio(struct blkio *io, unsigned int word_offset } } +static inline void cgroup2_read_pressure(struct pressure *res) { + static procfile *ff = NULL; + + if (likely(res->filename)) { + ff = procfile_reopen(ff, res->filename, " =", PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) { + res->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + res->updated = 0; + cgroups_check = 1; + return; + } + + size_t lines = procfile_lines(ff); + if (lines < 1) { + error("CGROUP: file '%s' should have 1+ lines.", res->filename); + res->updated = 0; + return; + } + + res->some.value10 = strtod(procfile_lineword(ff, 0, 2), NULL); + res->some.value60 = strtod(procfile_lineword(ff, 0, 4), NULL); + res->some.value300 = strtod(procfile_lineword(ff, 0, 6), NULL); + + if (lines > 2) { + res->full.value10 = strtod(procfile_lineword(ff, 1, 2), NULL); + res->full.value60 = strtod(procfile_lineword(ff, 1, 4), NULL); + res->full.value300 = strtod(procfile_lineword(ff, 1, 6), NULL); + } + + res->updated = 1; + + if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) { + res->some.enabled = CONFIG_BOOLEAN_YES; + if (lines > 2) { + res->full.enabled = CONFIG_BOOLEAN_YES; + } else { + res->full.enabled = CONFIG_BOOLEAN_NO; + } + } + } +} + static inline void cgroup_read_memory(struct memory *mem, char parent_cg_is_unified) { static procfile *ff = NULL; @@ -946,6 +1016,9 @@ static inline void cgroup_read(struct cgroup *cg) { cgroup2_read_blkio(&cg->io_service_bytes, 0); cgroup2_read_blkio(&cg->io_serviced, 4); cgroup2_read_cpuacct_stat(&cg->cpuacct_stat); + cgroup2_read_pressure(&cg->cpu_pressure); + cgroup2_read_pressure(&cg->io_pressure); + cgroup2_read_pressure(&cg->memory_pressure); cgroup_read_memory(&cg->memory, 1); } } @@ -1236,6 +1309,12 @@ static inline struct cgroup *cgroup_add(const char *id) { return cg; } +static inline void free_pressure(struct pressure *res) { + if (res->some.st) rrdset_is_obsolete(res->some.st); + if (res->full.st) rrdset_is_obsolete(res->full.st); + freez(res->filename); +} + static inline void cgroup_free(struct cgroup *cg) { debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available"); @@ -1284,6 +1363,10 @@ static inline void cgroup_free(struct cgroup *cg) { freez(cg->io_merged.filename); freez(cg->io_queued.filename); + free_pressure(&cg->cpu_pressure); + free_pressure(&cg->io_pressure); + free_pressure(&cg->memory_pressure); + freez(cg->id); freez(cg->chart_id); freez(cg->chart_title); @@ -1748,6 +1831,42 @@ static inline void find_all_cgroups() { else debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename); } + + if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpu_pressure.filename = strdupz(filename); + cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu; + cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO; + debug(D_CGROUP, "cpu.pressure filename for cgroup '%s': '%s'", cg->id, cg->cpu_pressure.filename); + } else { + debug(D_CGROUP, "cpu.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + + if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_pressure.filename = strdupz(filename); + cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some; + cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full; + debug(D_CGROUP, "io.pressure filename for cgroup '%s': '%s'", cg->id, cg->io_pressure.filename); + } else { + debug(D_CGROUP, "io.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + + if (unlikely((cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) && !cg->memory_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory_pressure.filename = strdupz(filename); + cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some; + cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full; + debug(D_CGROUP, "memory.pressure filename for cgroup '%s': '%s'", cg->id, cg->memory_pressure.filename); + } else { + debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } } } @@ -3364,6 +3483,156 @@ void update_cgroup_charts(int update_every) { rrddim_set(cg->st_merged_ops, "write", cg->io_merged.Write); rrdset_done(cg->st_merged_ops); } + + if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) { + struct pressure *res = &cg->cpu_pressure; + if (likely(res->updated && res->some.enabled)) { + if (unlikely(!res->some.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU pressure for cgroup %s", cg->chart_title); + + chart = res->some.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_pressure" + , NULL + , "cpu" + , "cgroup.cpu_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2200 + , update_every, + RRDSET_TYPE_LINE); + + res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->some.st); + } + + update_pressure_chart(&res->some); + } + + res = &cg->memory_pressure; + if (likely(res->updated && res->some.enabled)) { + if (unlikely(!res->some.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory pressure for cgroup %s", cg->chart_title); + + chart = res->some.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_pressure" + , NULL + , "mem" + , "cgroup.memory_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2300 + , update_every, + RRDSET_TYPE_LINE); + + res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->some.st); + } + + update_pressure_chart(&res->some); + } + + if (likely(res->updated && res->full.enabled)) { + if (unlikely(!res->full.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory full pressure for cgroup %s", cg->chart_title); + + chart = res->full.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_full_pressure" + , NULL + , "mem" + , "cgroup.memory_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2350 + , update_every, + RRDSET_TYPE_LINE); + + res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->full.st); + } + + update_pressure_chart(&res->full); + } + + res = &cg->io_pressure; + if (likely(res->updated && res->some.enabled)) { + if (unlikely(!res->some.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O pressure for cgroup %s", cg->chart_title); + + chart = res->some.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_pressure" + , NULL + , "disk" + , "cgroup.io_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2400 + , update_every, + RRDSET_TYPE_LINE); + + res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->some.st); + } + + update_pressure_chart(&res->some); + } + + if (likely(res->updated && res->full.enabled)) { + if (unlikely(!res->full.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O full pressure for cgroup %s", cg->chart_title); + + chart = res->full.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_full_pressure" + , NULL + , "disk" + , "cgroup.io_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2450 + , update_every, + RRDSET_TYPE_LINE); + + res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->full.st); + } + + update_pressure_chart(&res->full); + } + } } if(likely(cgroup_enable_systemd_services)) -- cgit v1.2.3