diff options
author | Ilya Mashchenko <ilya@netdata.cloud> | 2023-11-09 15:23:59 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-09 15:23:59 +0200 |
commit | eafc0f045c7bd9667e23b34e108282938fb10eea (patch) | |
tree | 40d90de7d8b61f718bc2f29554beabe5fa1a13cb /collectors/cgroups.plugin | |
parent | 72231999175d985f16f076ac8e3997813b9fd788 (diff) |
cgroups: collect pids/pids.current (#16369)
* cgroups collect pids/pids.current
* change dim name
Diffstat (limited to 'collectors/cgroups.plugin')
-rw-r--r-- | collectors/cgroups.plugin/cgroup-charts.c | 40 | ||||
-rw-r--r-- | collectors/cgroups.plugin/cgroup-discovery.c | 18 | ||||
-rw-r--r-- | collectors/cgroups.plugin/cgroup-internals.h | 26 | ||||
-rw-r--r-- | collectors/cgroups.plugin/cgroup-top.c | 25 | ||||
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.c | 93 |
5 files changed, 166 insertions, 36 deletions
diff --git a/collectors/cgroups.plugin/cgroup-charts.c b/collectors/cgroups.plugin/cgroup-charts.c index fa2657c536..a89e8ac454 100644 --- a/collectors/cgroups.plugin/cgroup-charts.c +++ b/collectors/cgroups.plugin/cgroup-charts.c @@ -1484,3 +1484,43 @@ void update_io_full_pressure_stall_time_chart(struct cgroup *cg) { rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total)); rrdset_done(chart); } + +void update_pids_current_chart(struct cgroup *cg) { + RRDSET *chart = cg->st_pids; + + if (unlikely(!cg->st_pids)) { + char *title; + char *context; + int prio; + if (is_cgroup_systemd_service(cg)) { + title = "Systemd Services Number of Processes"; + context = "systemd.service.pids.current"; + prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 70; + } else { + title = "Number of processes"; + context = k8s_is_kubepod(cg) ? "k8s.cgroup.pids_current" : "cgroup.pids_current"; + prio = cgroup_containers_chart_priority + 2150; + } + + char buff[RRD_ID_LENGTH_MAX + 1]; + chart = cg->st_pids = rrdset_create_localhost( + cgroup_chart_type(buff, cg), + "pids_current", + NULL, + "pids", + context, + title, + "pids", + PLUGIN_CGROUPS_NAME, + is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME, + prio, + cgroup_update_every, + RRDSET_TYPE_LINE); + + rrdset_update_rrdlabels(chart, cg->chart_labels); + cg->st_pids_rd_pids_current = rrddim_add(chart, "pids", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(chart, cg->st_pids_rd_pids_current, (collected_number)cg->pids.pids_current); + rrdset_done(chart); +} diff --git a/collectors/cgroups.plugin/cgroup-discovery.c b/collectors/cgroups.plugin/cgroup-discovery.c index f4abf27942..28c6d96cf9 100644 --- a/collectors/cgroups.plugin/cgroup-discovery.c +++ b/collectors/cgroups.plugin/cgroup-discovery.c @@ -75,6 +75,7 @@ static inline void cgroup_free(struct cgroup *cg) { if(cg->st_throttle_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_serviced_ops); if(cg->st_queued_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_queued_ops); if(cg->st_merged_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_merged_ops); + if(cg->st_pids) rrdset_is_obsolete___safe_from_collector_thread(cg->st_pids); freez(cg->filename_cpuset_cpus); freez(cg->filename_cpu_cfs_period); @@ -105,6 +106,7 @@ static inline void cgroup_free(struct cgroup *cg) { freez(cg->io_merged.filename); freez(cg->io_queued.filename); + freez(cg->pids.pids_current_filename); free_pressure(&cg->cpu_pressure); free_pressure(&cg->io_pressure); @@ -598,6 +600,14 @@ static inline void discovery_update_filenames_cgroup_v1(struct cgroup *cg) { } } } + + // Pids + if (unlikely(!cg->pids.pids_current_filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_pids_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->pids.pids_current_filename = strdupz(filename); + } + } } static inline void discovery_update_filenames_cgroup_v2(struct cgroup *cg) { @@ -713,6 +723,14 @@ static inline void discovery_update_filenames_cgroup_v2(struct cgroup *cg) { cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full; } } + + // Pids + if (unlikely(!cg->pids.pids_current_filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->pids.pids_current_filename = strdupz(filename); + } + } } static inline void discovery_update_filenames_all_cgroups() { diff --git a/collectors/cgroups.plugin/cgroup-internals.h b/collectors/cgroups.plugin/cgroup-internals.h index cefd6c0d78..a698022406 100644 --- a/collectors/cgroups.plugin/cgroup-internals.h +++ b/collectors/cgroups.plugin/cgroup-internals.h @@ -25,6 +25,12 @@ struct blkio { */ }; +struct pids { + char *pids_current_filename; + int pids_current_updated; + unsigned long long pids_current; +}; + // https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt struct memory { ARL_BASE *arl_base; @@ -218,6 +224,8 @@ struct cgroup { struct blkio io_merged; // operations struct blkio io_queued; // operations + struct pids pids; + struct cgroup_network_interface *interfaces; struct pressure cpu_pressure; @@ -225,7 +233,7 @@ struct cgroup { struct pressure memory_pressure; struct pressure irq_pressure; - // per cgroup charts + // Cpu RRDSET *st_cpu; RRDDIM *st_cpu_rd_user; RRDDIM *st_cpu_rd_system; @@ -236,6 +244,7 @@ struct cgroup { RRDSET *st_cpu_throttled_time; RRDSET *st_cpu_shares; + // Memory RRDSET *st_mem; RRDDIM *st_mem_rd_ram; RRDDIM *st_mem_rd_swap; @@ -248,6 +257,7 @@ struct cgroup { RRDSET *st_mem_usage_limit; RRDSET *st_mem_failcnt; + // Blkio RRDSET *st_io; RRDDIM *st_io_rd_read; RRDDIM *st_io_rd_written; @@ -263,6 +273,10 @@ struct cgroup { RRDSET *st_queued_ops; RRDSET *st_merged_ops; + // Pids + RRDSET *st_pids; + RRDDIM *st_pids_rd_pids_current; + // per cgroup chart variables char *filename_cpuset_cpus; unsigned long long cpuset_cpus; @@ -307,10 +321,6 @@ extern uv_mutex_t cgroup_root_mutex; void cgroup_discovery_worker(void *ptr); - - - - extern int is_inside_k8s; extern long system_page_size; extern int cgroup_enable_cpuacct_stat; @@ -350,6 +360,7 @@ extern char *cgroup_cpuacct_base; extern char *cgroup_cpuset_base; extern char *cgroup_blkio_base; extern char *cgroup_memory_base; +extern char *cgroup_pids_base; extern char *cgroup_devices_base; extern char *cgroup_unified_base; extern int cgroup_root_count; @@ -478,18 +489,23 @@ void update_throttle_io_serviced_ops_chart(struct cgroup *cg); void update_io_queued_ops_chart(struct cgroup *cg); void update_io_merged_ops_chart(struct cgroup *cg); +void update_pids_current_chart(struct cgroup *cg); + void update_cpu_some_pressure_chart(struct cgroup *cg); void update_cpu_some_pressure_stall_time_chart(struct cgroup *cg); void update_cpu_full_pressure_chart(struct cgroup *cg); void update_cpu_full_pressure_stall_time_chart(struct cgroup *cg); + void update_mem_some_pressure_chart(struct cgroup *cg); void update_mem_some_pressure_stall_time_chart(struct cgroup *cg); void update_mem_full_pressure_chart(struct cgroup *cg); void update_mem_full_pressure_stall_time_chart(struct cgroup *cg); + void update_irq_some_pressure_chart(struct cgroup *cg); void update_irq_some_pressure_stall_time_chart(struct cgroup *cg); void update_irq_full_pressure_chart(struct cgroup *cg); void update_irq_full_pressure_stall_time_chart(struct cgroup *cg); + void update_io_some_pressure_chart(struct cgroup *cg); void update_io_some_pressure_stall_time_chart(struct cgroup *cg); void update_io_full_pressure_chart(struct cgroup *cg); diff --git a/collectors/cgroups.plugin/cgroup-top.c b/collectors/cgroups.plugin/cgroup-top.c index cb5cd9d54b..0e64b908d8 100644 --- a/collectors/cgroups.plugin/cgroup-top.c +++ b/collectors/cgroups.plugin/cgroup-top.c @@ -115,6 +115,7 @@ int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const cha buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_CGTOP_HELP); buffer_json_member_add_array(wb, "data"); + double max_pids = 0.0; double max_cpu = 0.0; double max_ram = 0.0; double max_disk_io_read = 0.0; @@ -139,6 +140,8 @@ int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const cha else buffer_json_add_array_item_string(wb, "cgroup"); // Kind + double pids_current = rrddim_get_last_stored_value(cg->st_pids_rd_pids_current, &max_pids, 1.0); + double cpu = NAN; if (cg->st_cpu_rd_user && cg->st_cpu_rd_system) { cpu = cg->st_cpu_rd_user->collector.last_stored_value + cg->st_cpu_rd_system->collector.last_stored_value; @@ -152,7 +155,6 @@ int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const cha rd = cg->st_throttle_io_rd_written ? cg->st_throttle_io_rd_written : cg->st_io_rd_written; double disk_io_written = rrddim_get_last_stored_value(rd, &max_disk_io_written, 1024.0); - NETDATA_DOUBLE received, sent; cgroup_netdev_get_bandwidth(cg, &received, &sent); if (!isnan(received) && !isnan(sent)) { @@ -162,6 +164,7 @@ int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const cha max_net_sent = MAX(max_net_sent, sent); } + buffer_json_add_array_item_double(wb, pids_current); buffer_json_add_array_item_double(wb, cpu); buffer_json_add_array_item_double(wb, ram); buffer_json_add_array_item_double(wb, disk_io_read); @@ -195,6 +198,14 @@ int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const cha RRDF_FIELD_OPTS_VISIBLE, NULL); + // PIDs + buffer_rrdf_table_add_field(wb, field_id++, "PIDs", "Number of Processes Currently in the CGROUP", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "pids", max_pids, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + // CPU buffer_rrdf_table_add_field(wb, field_id++, "CPU", "CPU Usage", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, @@ -349,6 +360,7 @@ int cgroup_function_systemd_top(BUFFER *wb, int timeout __maybe_unused, const ch buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_CGTOP_HELP); buffer_json_member_add_array(wb, "data"); + double max_pids = 0.0; double max_cpu = 0.0; double max_ram = 0.0; double max_disk_io_read = 0.0; @@ -366,6 +378,8 @@ int cgroup_function_systemd_top(BUFFER *wb, int timeout __maybe_unused, const ch buffer_json_add_array_item_string(wb, cg->name); + double pids_current = rrddim_get_last_stored_value(cg->st_pids_rd_pids_current, &max_pids, 1.0); + double cpu = NAN; if (cg->st_cpu_rd_user && cg->st_cpu_rd_system) { cpu = cg->st_cpu_rd_user->collector.last_stored_value + cg->st_cpu_rd_system->collector.last_stored_value; @@ -379,6 +393,7 @@ int cgroup_function_systemd_top(BUFFER *wb, int timeout __maybe_unused, const ch rd = cg->st_throttle_io_rd_written ? cg->st_throttle_io_rd_written : cg->st_io_rd_written; double disk_io_written = rrddim_get_last_stored_value(rd, &max_disk_io_written, 1024.0); + buffer_json_add_array_item_double(wb, pids_current); buffer_json_add_array_item_double(wb, cpu); buffer_json_add_array_item_double(wb, ram); buffer_json_add_array_item_double(wb, disk_io_read); @@ -402,6 +417,14 @@ int cgroup_function_systemd_top(BUFFER *wb, int timeout __maybe_unused, const ch RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH, NULL); + // PIDs + buffer_rrdf_table_add_field(wb, field_id++, "PIDs", "Number of Processes Currently in the CGROUP", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "pids", max_pids, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + // CPU buffer_rrdf_table_add_field(wb, field_id++, "CPU", "CPU Usage", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index 2c738feacc..76336f2fde 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -50,6 +50,7 @@ char *cgroup_cpuset_base = NULL; char *cgroup_blkio_base = NULL; char *cgroup_memory_base = NULL; char *cgroup_devices_base = NULL; +char *cgroup_pids_base = NULL; char *cgroup_unified_base = NULL; int cgroup_root_count = 0; int cgroup_root_max = 1000; @@ -310,54 +311,70 @@ void read_cgroup_plugin_configuration() { cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO; mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct"); - if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct"); - if(!mi) { + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct"); + if (!mi) { collector_error("CGROUP: cannot find cpuacct mountinfo. Assuming default: /sys/fs/cgroup/cpuacct"); s = "/sys/fs/cgroup/cpuacct"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; set_cgroup_base_path(filename, s); cgroup_cpuacct_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuacct", filename); mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuset"); - if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset"); - if(!mi) { + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset"); + if (!mi) { collector_error("CGROUP: cannot find cpuset mountinfo. Assuming default: /sys/fs/cgroup/cpuset"); s = "/sys/fs/cgroup/cpuset"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; set_cgroup_base_path(filename, s); cgroup_cpuset_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuset", filename); mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "blkio"); - if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio"); - if(!mi) { + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio"); + if (!mi) { collector_error("CGROUP: cannot find blkio mountinfo. Assuming default: /sys/fs/cgroup/blkio"); s = "/sys/fs/cgroup/blkio"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; set_cgroup_base_path(filename, s); cgroup_blkio_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/blkio", filename); mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "memory"); - if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory"); - if(!mi) { + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory"); + if (!mi) { collector_error("CGROUP: cannot find memory mountinfo. Assuming default: /sys/fs/cgroup/memory"); s = "/sys/fs/cgroup/memory"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; set_cgroup_base_path(filename, s); cgroup_memory_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/memory", filename); mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "devices"); - if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices"); - if(!mi) { + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices"); + if (!mi) { collector_error("CGROUP: cannot find devices mountinfo. Assuming default: /sys/fs/cgroup/devices"); s = "/sys/fs/cgroup/devices"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; set_cgroup_base_path(filename, s); cgroup_devices_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/devices", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "pids"); + if (!mi) + mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "pids"); + if (!mi) { + collector_error("CGROUP: cannot find pids mountinfo. Assuming default: /sys/fs/cgroup/pids"); + s = "/sys/fs/cgroup/pids"; + } else + s = mi->mount_point; + set_cgroup_base_path(filename, s); + cgroup_pids_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/pids", filename); } else { //cgroup_enable_cpuacct_stat = @@ -377,22 +394,19 @@ void read_cgroup_plugin_configuration() { cgroup_used_memory = CONFIG_BOOLEAN_NO; //unified cgroups use different values //TODO: can there be more than 1 cgroup2 mount point? - mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw"); //there is no cgroup2 specific super option - for now use 'rw' option - if(mi) - netdata_log_debug(D_CGROUP, "found unified cgroup root using super options, with path: '%s'", mi->mount_point); - if(!mi) { + //there is no cgroup2 specific super option - for now use 'rw' option + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw"); + if (!mi) { mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup2", "cgroup"); - if(mi) - netdata_log_debug(D_CGROUP, "found unified cgroup root using mountsource info, with path: '%s'", mi->mount_point); } - if(!mi) { + if (!mi) { collector_error("CGROUP: cannot find cgroup2 mountinfo. Assuming default: /sys/fs/cgroup"); s = "/sys/fs/cgroup"; - } - else s = mi->mount_point; + } else + s = mi->mount_point; + set_cgroup_base_path(filename, s); cgroup_unified_base = config_get("plugin:cgroups", "path to unified cgroups", filename); - netdata_log_debug(D_CGROUP, "using cgroup root: '%s'", cgroup_unified_base); } cgroup_root_max = (int)config_get_number("plugin:cgroups", "max cgroups to allow", cgroup_root_max); @@ -1146,6 +1160,15 @@ memory_next: } } +static void cgroup_read_pids_current(struct pids *pids) { + pids->pids_current_updated = 0; + + if (unlikely(!pids->pids_current_filename)) + return; + + pids->pids_current_updated = !read_single_signed_number_file(pids->pids_current_filename, &pids->pids_current); +} + static inline void read_cgroup(struct cgroup *cg) { netdata_log_debug(D_CGROUP, "reading metrics for cgroups '%s'", cg->id); if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { @@ -1160,6 +1183,7 @@ static inline void read_cgroup(struct cgroup *cg) { cgroup_read_blkio(&cg->throttle_io_serviced); cgroup_read_blkio(&cg->io_merged); cgroup_read_blkio(&cg->io_queued); + cgroup_read_pids_current(&cg->pids); } else { //TODO: io_service_bytes and io_serviced use same file merge into 1 function @@ -1172,6 +1196,7 @@ static inline void read_cgroup(struct cgroup *cg) { cgroup2_read_pressure(&cg->memory_pressure); cgroup2_read_pressure(&cg->irq_pressure); cgroup_read_memory(&cg->memory, 1); + cgroup_read_pids_current(&cg->pids); } } @@ -1347,6 +1372,10 @@ void update_cgroup_systemd_services_charts() { update_io_merged_ops_chart(cg); } + if (likely(cg->pids.pids_current_updated)) { + update_pids_current_chart(cg); + } + cg->function_ready = true; } } @@ -1485,6 +1514,10 @@ void update_cgroup_charts() { update_io_merged_ops_chart(cg); } + if (likely(cg->pids.pids_current_updated)) { + update_pids_current_chart(cg); + } + if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) { if (likely(cg->cpu_pressure.updated)) { if (cg->cpu_pressure.some.enabled) { |