summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Mashchenko <ilya@netdata.cloud>2023-11-09 15:23:59 +0200
committerGitHub <noreply@github.com>2023-11-09 15:23:59 +0200
commiteafc0f045c7bd9667e23b34e108282938fb10eea (patch)
tree40d90de7d8b61f718bc2f29554beabe5fa1a13cb
parent72231999175d985f16f076ac8e3997813b9fd788 (diff)
cgroups: collect pids/pids.current (#16369)
* cgroups collect pids/pids.current * change dim name
-rw-r--r--collectors/cgroups.plugin/cgroup-charts.c40
-rw-r--r--collectors/cgroups.plugin/cgroup-discovery.c18
-rw-r--r--collectors/cgroups.plugin/cgroup-internals.h26
-rw-r--r--collectors/cgroups.plugin/cgroup-top.c25
-rw-r--r--collectors/cgroups.plugin/sys_fs_cgroup.c93
5 files changed, 166 insertions, 36 deletions
diff --git a/collectors/cgroups.plugin/cgroup-charts.c b/collectors/cgroups.plugin/cgroup-charts.c
index fa2657c536..a89e8ac454 100644
--- a/collectors/cgroups.plugin/cgroup-charts.c
+++ b/collectors/cgroups.plugin/cgroup-charts.c
@@ -1484,3 +1484,43 @@ void update_io_full_pressure_stall_time_chart(struct cgroup *cg) {
rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
rrdset_done(chart);
}
+
+void update_pids_current_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_pids;
+
+ if (unlikely(!cg->st_pids)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Number of Processes";
+ context = "systemd.service.pids.current";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 70;
+ } else {
+ title = "Number of processes";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.pids_current" : "cgroup.pids_current";
+ prio = cgroup_containers_chart_priority + 2150;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_pids = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "pids_current",
+ NULL,
+ "pids",
+ context,
+ title,
+ "pids",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ cg->st_pids_rd_pids_current = rrddim_add(chart, "pids", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(chart, cg->st_pids_rd_pids_current, (collected_number)cg->pids.pids_current);
+ rrdset_done(chart);
+}
diff --git a/collectors/cgroups.plugin/cgroup-discovery.c b/collectors/cgroups.plugin/cgroup-discovery.c
index f4abf27942..28c6d96cf9 100644
--- a/collectors/cgroups.plugin/cgroup-discovery.c
+++ b/collectors/cgroups.plugin/cgroup-discovery.c
@@ -75,6 +75,7 @@ static inline void cgroup_free(struct cgroup *cg) {
if(cg->st_throttle_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_serviced_ops);
if(cg->st_queued_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_queued_ops);
if(cg->st_merged_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_merged_ops);
+ if(cg->st_pids) rrdset_is_obsolete___safe_from_collector_thread(cg->st_pids);
freez(cg->filename_cpuset_cpus);
freez(cg->filename_cpu_cfs_period);
@@ -105,6 +106,7 @@ static inline void cgroup_free(struct cgroup *cg) {
freez(cg->io_merged.filename);
freez(cg->io_queued.filename);
+ freez(cg->pids.pids_current_filename);
free_pressure(&cg->cpu_pressure);
free_pressure(&cg->io_pressure);
@@ -598,6 +600,14 @@ static inline void discovery_update_filenames_cgroup_v1(struct cgroup *cg) {
}
}
}
+
+ // Pids
+ if (unlikely(!cg->pids.pids_current_filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_pids_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->pids.pids_current_filename = strdupz(filename);
+ }
+ }
}
static inline void discovery_update_filenames_cgroup_v2(struct cgroup *cg) {
@@ -713,6 +723,14 @@ static inline void discovery_update_filenames_cgroup_v2(struct cgroup *cg) {
cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full;
}
}
+
+ // Pids
+ if (unlikely(!cg->pids.pids_current_filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->pids.pids_current_filename = strdupz(filename);
+ }
+ }
}
static inline void discovery_update_filenames_all_cgroups() {
diff --git a/collectors/cgroups.plugin/cgroup-internals.h b/collectors/cgroups.plugin/cgroup-internals.h
index cefd6c0d78..a698022406 100644
--- a/collectors/cgroups.plugin/cgroup-internals.h
+++ b/collectors/cgroups.plugin/cgroup-internals.h
@@ -25,6 +25,12 @@ struct blkio {
*/
};
+struct pids {
+ char *pids_current_filename;
+ int pids_current_updated;
+ unsigned long long pids_current;
+};
+
// https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
struct memory {
ARL_BASE *arl_base;
@@ -218,6 +224,8 @@ struct cgroup {
struct blkio io_merged; // operations
struct blkio io_queued; // operations
+ struct pids pids;
+
struct cgroup_network_interface *interfaces;
struct pressure cpu_pressure;
@@ -225,7 +233,7 @@ struct cgroup {
struct pressure memory_pressure;
struct pressure irq_pressure;
- // per cgroup charts
+ // Cpu
RRDSET *st_cpu;
RRDDIM *st_cpu_rd_user;
RRDDIM *st_cpu_rd_system;
@@ -236,6 +244,7 @@ struct cgroup {
RRDSET *st_cpu_throttled_time;
RRDSET *st_cpu_shares;
+ // Memory
RRDSET *st_mem;
RRDDIM *st_mem_rd_ram;
RRDDIM *st_mem_rd_swap;
@@ -248,6 +257,7 @@ struct cgroup {
RRDSET *st_mem_usage_limit;
RRDSET *st_mem_failcnt;
+ // Blkio
RRDSET *st_io;
RRDDIM *st_io_rd_read;
RRDDIM *st_io_rd_written;
@@ -263,6 +273,10 @@ struct cgroup {
RRDSET *st_queued_ops;
RRDSET *st_merged_ops;
+ // Pids
+ RRDSET *st_pids;
+ RRDDIM *st_pids_rd_pids_current;
+
// per cgroup chart variables
char *filename_cpuset_cpus;
unsigned long long cpuset_cpus;
@@ -307,10 +321,6 @@ extern uv_mutex_t cgroup_root_mutex;
void cgroup_discovery_worker(void *ptr);
-
-
-
-
extern int is_inside_k8s;
extern long system_page_size;
extern int cgroup_enable_cpuacct_stat;
@@ -350,6 +360,7 @@ extern char *cgroup_cpuacct_base;
extern char *cgroup_cpuset_base;
extern char *cgroup_blkio_base;
extern char *cgroup_memory_base;
+extern char *cgroup_pids_base;
extern char *cgroup_devices_base;
extern char *cgroup_unified_base;
extern int cgroup_root_count;
@@ -478,18 +489,23 @@ void update_throttle_io_serviced_ops_chart(struct cgroup *cg);
void update_io_queued_ops_chart(struct cgroup *cg);
void update_io_merged_ops_chart(struct cgroup *cg);
+void update_pids_current_chart(struct cgroup *cg);
+
void update_cpu_some_pressure_chart(struct cgroup *cg);
void update_cpu_some_pressure_stall_time_chart(struct cgroup *cg);
void update_cpu_full_pressure_chart(struct cgroup *cg);
void update_cpu_full_pressure_stall_time_chart(struct cgroup *cg);
+
void update_mem_some_pressure_chart(struct cgroup *cg);
void update_mem_some_pressure_stall_time_chart(struct cgroup *cg);
void update_mem_full_pressure_chart(struct cgroup *cg);
void update_mem_full_pressure_stall_time_chart(struct cgroup *cg);
+
void update_irq_some_pressure_chart(struct cgroup *cg);
void update_irq_some_pressure_stall_time_chart(struct cgroup *cg);
void update_irq_full_pressure_chart(struct cgroup *cg);
void update_irq_full_pressure_stall_time_chart(struct cgroup *cg);
+
void update_io_some_pressure_chart(struct cgroup *cg);
void update_io_some_pressure_stall_time_chart(struct cgroup *cg);
void update_io_full_pressure_chart(struct cgroup *cg);
diff --git a/collectors/cgroups.plugin/cgroup-top.c b/collectors/cgroups.plugin/cgroup-top.c
index cb5cd9d54b..0e64b908d8 100644
--- a/collectors/cgroups.plugin/cgroup-top.c
+++ b/collectors/cgroups.plugin/cgroup-top.c
@@ -115,6 +115,7 @@ int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const cha
buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_CGTOP_HELP);
buffer_json_member_add_array(wb, "data");
+ double max_pids = 0.0;
double max_cpu = 0.0;
double max_ram = 0.0;
double max_disk_io_read = 0.0;
@@ -139,6 +140,8 @@ int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const cha
else
buffer_json_add_array_item_string(wb, "cgroup"); // Kind
+ double pids_current = rrddim_get_last_stored_value(cg->st_pids_rd_pids_current, &max_pids, 1.0);
+
double cpu = NAN;
if (cg->st_cpu_rd_user && cg->st_cpu_rd_system) {
cpu = cg->st_cpu_rd_user->collector.last_stored_value + cg->st_cpu_rd_system->collector.last_stored_value;
@@ -152,7 +155,6 @@ int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const cha
rd = cg->st_throttle_io_rd_written ? cg->st_throttle_io_rd_written : cg->st_io_rd_written;
double disk_io_written = rrddim_get_last_stored_value(rd, &max_disk_io_written, 1024.0);
-
NETDATA_DOUBLE received, sent;
cgroup_netdev_get_bandwidth(cg, &received, &sent);
if (!isnan(received) && !isnan(sent)) {
@@ -162,6 +164,7 @@ int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const cha
max_net_sent = MAX(max_net_sent, sent);
}
+ buffer_json_add_array_item_double(wb, pids_current);
buffer_json_add_array_item_double(wb, cpu);
buffer_json_add_array_item_double(wb, ram);
buffer_json_add_array_item_double(wb, disk_io_read);
@@ -195,6 +198,14 @@ int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const cha
RRDF_FIELD_OPTS_VISIBLE,
NULL);
+ // PIDs
+ buffer_rrdf_table_add_field(wb, field_id++, "PIDs", "Number of Processes Currently in the CGROUP",
+ RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER,
+ 0, "pids", max_pids, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
// CPU
buffer_rrdf_table_add_field(wb, field_id++, "CPU", "CPU Usage",
RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
@@ -349,6 +360,7 @@ int cgroup_function_systemd_top(BUFFER *wb, int timeout __maybe_unused, const ch
buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_CGTOP_HELP);
buffer_json_member_add_array(wb, "data");
+ double max_pids = 0.0;
double max_cpu = 0.0;
double max_ram = 0.0;
double max_disk_io_read = 0.0;
@@ -366,6 +378,8 @@ int cgroup_function_systemd_top(BUFFER *wb, int timeout __maybe_unused, const ch
buffer_json_add_array_item_string(wb, cg->name);
+ double pids_current = rrddim_get_last_stored_value(cg->st_pids_rd_pids_current, &max_pids, 1.0);
+
double cpu = NAN;
if (cg->st_cpu_rd_user && cg->st_cpu_rd_system) {
cpu = cg->st_cpu_rd_user->collector.last_stored_value + cg->st_cpu_rd_system->collector.last_stored_value;
@@ -379,6 +393,7 @@ int cgroup_function_systemd_top(BUFFER *wb, int timeout __maybe_unused, const ch
rd = cg->st_throttle_io_rd_written ? cg->st_throttle_io_rd_written : cg->st_io_rd_written;
double disk_io_written = rrddim_get_last_stored_value(rd, &max_disk_io_written, 1024.0);
+ buffer_json_add_array_item_double(wb, pids_current);
buffer_json_add_array_item_double(wb, cpu);
buffer_json_add_array_item_double(wb, ram);
buffer_json_add_array_item_double(wb, disk_io_read);
@@ -402,6 +417,14 @@ int cgroup_function_systemd_top(BUFFER *wb, int timeout __maybe_unused, const ch
RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH,
NULL);
+ // PIDs
+ buffer_rrdf_table_add_field(wb, field_id++, "PIDs", "Number of Processes Currently in the CGROUP",
+ RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER,
+ 0, "pids", max_pids, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
// CPU
buffer_rrdf_table_add_field(wb, field_id++, "CPU", "CPU Usage",
RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c
index 2c738feacc..76336f2fde 100644
--- a/collectors/cgroups.plugin/sys_fs_cgroup.c
+++ b/collectors/cgroups.plugin/sys_fs_cgroup.c
@@ -50,6 +50,7 @@ char *cgroup_cpuset_base = NULL;
char *cgroup_blkio_base = NULL;
char *cgroup_memory_base = NULL;
char *cgroup_devices_base = NULL;
+char *cgroup_pids_base = NULL;
char *cgroup_unified_base = NULL;
int cgroup_root_count = 0;
int cgroup_root_max = 1000;
@@ -310,54 +311,70 @@ void read_cgroup_plugin_configuration() {
cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO;
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct");
- if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct");
- if(!mi) {
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct");
+ if (!mi) {
collector_error("CGROUP: cannot find cpuacct mountinfo. Assuming default: /sys/fs/cgroup/cpuacct");
s = "/sys/fs/cgroup/cpuacct";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
set_cgroup_base_path(filename, s);
cgroup_cpuacct_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuacct", filename);
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuset");
- if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset");
- if(!mi) {
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset");
+ if (!mi) {
collector_error("CGROUP: cannot find cpuset mountinfo. Assuming default: /sys/fs/cgroup/cpuset");
s = "/sys/fs/cgroup/cpuset";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
set_cgroup_base_path(filename, s);
cgroup_cpuset_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuset", filename);
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "blkio");
- if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio");
- if(!mi) {
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio");
+ if (!mi) {
collector_error("CGROUP: cannot find blkio mountinfo. Assuming default: /sys/fs/cgroup/blkio");
s = "/sys/fs/cgroup/blkio";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
set_cgroup_base_path(filename, s);
cgroup_blkio_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/blkio", filename);
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "memory");
- if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory");
- if(!mi) {
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory");
+ if (!mi) {
collector_error("CGROUP: cannot find memory mountinfo. Assuming default: /sys/fs/cgroup/memory");
s = "/sys/fs/cgroup/memory";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
set_cgroup_base_path(filename, s);
cgroup_memory_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/memory", filename);
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "devices");
- if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices");
- if(!mi) {
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices");
+ if (!mi) {
collector_error("CGROUP: cannot find devices mountinfo. Assuming default: /sys/fs/cgroup/devices");
s = "/sys/fs/cgroup/devices";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
set_cgroup_base_path(filename, s);
cgroup_devices_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/devices", filename);
+
+ mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "pids");
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "pids");
+ if (!mi) {
+ collector_error("CGROUP: cannot find pids mountinfo. Assuming default: /sys/fs/cgroup/pids");
+ s = "/sys/fs/cgroup/pids";
+ } else
+ s = mi->mount_point;
+ set_cgroup_base_path(filename, s);
+ cgroup_pids_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/pids", filename);
}
else {
//cgroup_enable_cpuacct_stat =
@@ -377,22 +394,19 @@ void read_cgroup_plugin_configuration() {
cgroup_used_memory = CONFIG_BOOLEAN_NO; //unified cgroups use different values
//TODO: can there be more than 1 cgroup2 mount point?
- mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw"); //there is no cgroup2 specific super option - for now use 'rw' option
- if(mi)
- netdata_log_debug(D_CGROUP, "found unified cgroup root using super options, with path: '%s'", mi->mount_point);
- if(!mi) {
+ //there is no cgroup2 specific super option - for now use 'rw' option
+ mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw");
+ if (!mi) {
mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup2", "cgroup");
- if(mi)
- netdata_log_debug(D_CGROUP, "found unified cgroup root using mountsource info, with path: '%s'", mi->mount_point);
}
- if(!mi) {
+ if (!mi) {
collector_error("CGROUP: cannot find cgroup2 mountinfo. Assuming default: /sys/fs/cgroup");
s = "/sys/fs/cgroup";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
+
set_cgroup_base_path(filename, s);
cgroup_unified_base = config_get("plugin:cgroups", "path to unified cgroups", filename);
- netdata_log_debug(D_CGROUP, "using cgroup root: '%s'", cgroup_unified_base);
}
cgroup_root_max = (int)config_get_number("plugin:cgroups", "max cgroups to allow", cgroup_root_max);
@@ -1146,6 +1160,15 @@ memory_next:
}
}
+static void cgroup_read_pids_current(struct pids *pids) {
+ pids->pids_current_updated = 0;
+
+ if (unlikely(!pids->pids_current_filename))
+ return;
+
+ pids->pids_current_updated = !read_single_signed_number_file(pids->pids_current_filename, &pids->pids_current);
+}
+
static inline void read_cgroup(struct cgroup *cg) {
netdata_log_debug(D_CGROUP, "reading metrics for cgroups '%s'", cg->id);
if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
@@ -1160,6 +1183,7 @@ static inline void read_cgroup(struct cgroup *cg) {
cgroup_read_blkio(&cg->throttle_io_serviced);
cgroup_read_blkio(&cg->io_merged);
cgroup_read_blkio(&cg->io_queued);
+ cgroup_read_pids_current(&cg->pids);
}
else {
//TODO: io_service_bytes and io_serviced use same file merge into 1 function
@@ -1172,6 +1196,7 @@ static inline void read_cgroup(struct cgroup *cg) {
cgroup2_read_pressure(&cg->memory_pressure);
cgroup2_read_pressure(&cg->irq_pressure);
cgroup_read_memory(&cg->memory, 1);
+ cgroup_read_pids_current(&cg->pids);
}
}
@@ -1347,6 +1372,10 @@ void update_cgroup_systemd_services_charts() {
update_io_merged_ops_chart(cg);
}
+ if (likely(cg->pids.pids_current_updated)) {
+ update_pids_current_chart(cg);
+ }
+
cg->function_ready = true;
}
}
@@ -1485,6 +1514,10 @@ void update_cgroup_charts() {
update_io_merged_ops_chart(cg);
}
+ if (likely(cg->pids.pids_current_updated)) {
+ update_pids_current_chart(cg);
+ }
+
if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) {
if (likely(cg->cpu_pressure.updated)) {
if (cg->cpu_pressure.some.enabled) {