summaryrefslogtreecommitdiffstats
path: root/collectors/cgroups.plugin/sys_fs_cgroup.c
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-11-02 14:27:43 +0000
committerGitHub <noreply@github.com>2023-11-02 16:27:43 +0200
commit4e6883b881081adfaf40c9f132773c46f7590a81 (patch)
tree534d34081bbdf66bc3af816371d39c4f15125bd4 /collectors/cgroups.plugin/sys_fs_cgroup.c
parentf6ad080e3543d2d539d8c5678c91c7b53126424e (diff)
cgroup-top function (#16314)
* split cgroups discovery to a separate file * base for implementing cgroup-top * working cgtop * removed off_t casting * update * fix cpu_full_pressure_stall_time ctx * check disk read/write dims is not NULL in functions * fixes * fix traffic * fix codacy warnings * upd --------- Co-authored-by: ilyam8 <ilya@netdata.cloud>
Diffstat (limited to 'collectors/cgroups.plugin/sys_fs_cgroup.c')
-rw-r--r--collectors/cgroups.plugin/sys_fs_cgroup.c3479
1 files changed, 251 insertions, 3228 deletions
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c
index 575274a20d..9b53ff88bc 100644
--- a/collectors/cgroups.plugin/sys_fs_cgroup.c
+++ b/collectors/cgroups.plugin/sys_fs_cgroup.c
@@ -1,149 +1,97 @@
// SPDX-License-Identifier: GPL-3.0-or-later
-#include "sys_fs_cgroup.h"
-
-#define PLUGIN_CGROUPS_NAME "cgroups.plugin"
-#define PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME "systemd"
-#define PLUGIN_CGROUPS_MODULE_CGROUPS_NAME "/sys/fs/cgroup"
-
-#ifdef NETDATA_INTERNAL_CHECKS
-#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_DEFAULT
-#else
-#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_NO_ERROR_ON_FILE_IO
-#endif
+#include "cgroup-internals.h"
// main cgroups thread worker jobs
#define WORKER_CGROUPS_LOCK 0
#define WORKER_CGROUPS_READ 1
#define WORKER_CGROUPS_CHART 2
-// discovery cgroup thread worker jobs
-#define WORKER_DISCOVERY_INIT 0
-#define WORKER_DISCOVERY_FIND 1
-#define WORKER_DISCOVERY_PROCESS 2
-#define WORKER_DISCOVERY_PROCESS_RENAME 3
-#define WORKER_DISCOVERY_PROCESS_NETWORK 4
-#define WORKER_DISCOVERY_PROCESS_FIRST_TIME 5
-#define WORKER_DISCOVERY_UPDATE 6
-#define WORKER_DISCOVERY_CLEANUP 7
-#define WORKER_DISCOVERY_COPY 8
-#define WORKER_DISCOVERY_SHARE 9
-#define WORKER_DISCOVERY_LOCK 10
-
-#if WORKER_UTILIZATION_MAX_JOB_TYPES < 11
-#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 11
-#endif
-
// ----------------------------------------------------------------------------
// cgroup globals
+unsigned long long host_ram_total = 0;
+int is_inside_k8s = 0;
+long system_page_size = 4096; // system will be queried via sysconf() in configuration()
+int cgroup_enable_cpuacct_stat = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO;
+int cgroup_enable_cpuacct_cpu_throttling = CONFIG_BOOLEAN_YES;
+int cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_NO;
+int cgroup_enable_memory = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_detailed_memory = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_swap = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_io = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_ops = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO;
+int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES;
+int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO;
+int cgroup_used_memory = CONFIG_BOOLEAN_YES;
+int cgroup_use_unified_cgroups = CONFIG_BOOLEAN_NO;
+int cgroup_unified_exist = CONFIG_BOOLEAN_AUTO;
+int cgroup_search_in_devices = 1;
+int cgroup_check_for_new_every = 10;
+int cgroup_update_every = 1;
+int cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS;
+int cgroup_recheck_zero_blkio_every_iterations = 10;
+int cgroup_recheck_zero_mem_failcnt_every_iterations = 10;
+int cgroup_recheck_zero_mem_detailed_every_iterations = 10;
+char *cgroup_cpuacct_base = NULL;
+char *cgroup_cpuset_base = NULL;
+char *cgroup_blkio_base = NULL;
+char *cgroup_memory_base = NULL;
+char *cgroup_devices_base = NULL;
+char *cgroup_unified_base = NULL;
+int cgroup_root_count = 0;
+int cgroup_root_max = 1000;
+int cgroup_max_depth = 0;
+SIMPLE_PATTERN *enabled_cgroup_paths = NULL;
+SIMPLE_PATTERN *enabled_cgroup_names = NULL;
+SIMPLE_PATTERN *search_cgroup_paths = NULL;
+SIMPLE_PATTERN *enabled_cgroup_renames = NULL;
+SIMPLE_PATTERN *systemd_services_cgroups = NULL;
+SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL;
+char *cgroups_network_interface_script = NULL;
+int cgroups_check = 0;
+uint32_t Read_hash = 0;
+uint32_t Write_hash = 0;
+uint32_t user_hash = 0;
+uint32_t system_hash = 0;
+uint32_t user_usec_hash = 0;
+uint32_t system_usec_hash = 0;
+uint32_t nr_periods_hash = 0;
+uint32_t nr_throttled_hash = 0;
+uint32_t throttled_time_hash = 0;
+uint32_t throttled_usec_hash = 0;
-static char cgroup_chart_id_prefix[] = "cgroup_";
-static char services_chart_id_prefix[] = "systemd_";
-
-static int is_inside_k8s = 0;
-
-static long system_page_size = 4096; // system will be queried via sysconf() in configuration()
-
-static int cgroup_enable_cpuacct_stat = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_cpuacct_cpu_throttling = CONFIG_BOOLEAN_YES;
-static int cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_NO;
-static int cgroup_enable_memory = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_detailed_memory = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_swap = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_io = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_ops = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO;
-static int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO;
-
-static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES;
-static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO;
-static int cgroup_used_memory = CONFIG_BOOLEAN_YES;
-
-static int cgroup_use_unified_cgroups = CONFIG_BOOLEAN_NO;
-static int cgroup_unified_exist = CONFIG_BOOLEAN_AUTO;
-
-static int cgroup_search_in_devices = 1;
-
-static int cgroup_check_for_new_every = 10;
-static int cgroup_update_every = 1;
-static int cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS;
-
-static int cgroup_recheck_zero_blkio_every_iterations = 10;
-static int cgroup_recheck_zero_mem_failcnt_every_iterations = 10;
-static int cgroup_recheck_zero_mem_detailed_every_iterations = 10;
-
-static char *cgroup_cpuacct_base = NULL;
-static char *cgroup_cpuset_base = NULL;
-static char *cgroup_blkio_base = NULL;
-static char *cgroup_memory_base = NULL;
-static char *cgroup_devices_base = NULL;
-static char *cgroup_unified_base = NULL;
-
-static int cgroup_root_count = 0;
-static int cgroup_root_max = 1000;
-static int cgroup_max_depth = 0;
-
-static SIMPLE_PATTERN *enabled_cgroup_paths = NULL;
-static SIMPLE_PATTERN *enabled_cgroup_names = NULL;
-static SIMPLE_PATTERN *search_cgroup_paths = NULL;
-static SIMPLE_PATTERN *enabled_cgroup_renames = NULL;
-static SIMPLE_PATTERN *systemd_services_cgroups = NULL;
-
-static SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL;
-
-static char *cgroups_rename_script = NULL;
-static char *cgroups_network_interface_script = NULL;
-
-static int cgroups_check = 0;
-
-static uint32_t Read_hash = 0;
-static uint32_t Write_hash = 0;
-static uint32_t user_hash = 0;
-static uint32_t system_hash = 0;
-static uint32_t user_usec_hash = 0;
-static uint32_t system_usec_hash = 0;
-static uint32_t nr_periods_hash = 0;
-static uint32_t nr_throttled_hash = 0;
-static uint32_t throttled_time_hash = 0;
-static uint32_t throttled_usec_hash = 0;
-
-enum cgroups_type { CGROUPS_AUTODETECT_FAIL, CGROUPS_V1, CGROUPS_V2 };
-
-enum cgroups_systemd_setting {
- SYSTEMD_CGROUP_ERR,
- SYSTEMD_CGROUP_LEGACY,
- SYSTEMD_CGROUP_HYBRID,
- SYSTEMD_CGROUP_UNIFIED
-};
-
-struct cgroups_systemd_config_setting {
- char *name;
- enum cgroups_systemd_setting setting;
-};
+// *** WARNING *** The fields are not thread safe. Take care of safe usage.
+struct cgroup *cgroup_root = NULL;
+uv_mutex_t cgroup_root_mutex;
-static struct cgroups_systemd_config_setting cgroups_systemd_options[] = {
- { .name = "legacy", .setting = SYSTEMD_CGROUP_LEGACY },
- { .name = "hybrid", .setting = SYSTEMD_CGROUP_HYBRID },
- { .name = "unified", .setting = SYSTEMD_CGROUP_UNIFIED },
- { .name = NULL, .setting = SYSTEMD_CGROUP_ERR },
+struct cgroups_systemd_config_setting cgroups_systemd_options[] = {
+ { .name = "legacy", .setting = SYSTEMD_CGROUP_LEGACY },
+ { .name = "hybrid", .setting = SYSTEMD_CGROUP_HYBRID },
+ { .name = "unified", .setting = SYSTEMD_CGROUP_UNIFIED },
+ { .name = NULL, .setting = SYSTEMD_CGROUP_ERR },
};
// Shared memory with information from detected cgroups
netdata_ebpf_cgroup_shm_t shm_cgroup_ebpf = {NULL, NULL};
-static int shm_fd_cgroup_ebpf = -1;
+int shm_fd_cgroup_ebpf = -1;
sem_t *shm_mutex_cgroup_ebpf = SEM_FAILED;
+struct discovery_thread discovery_thread;
+
+
/* on Fed systemd is not in PATH for some reason */
#define SYSTEMD_CMD_RHEL "/usr/lib/systemd/systemd --version"
#define SYSTEMD_HIERARCHY_STRING "default-hierarchy="
@@ -621,395 +569,6 @@ end_init_shm:
shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME);
}
-// ----------------------------------------------------------------------------
-// cgroup objects
-
-struct blkio {
- int updated;
- int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
- int delay_counter;
-
- char *filename;
-
- unsigned long long Read;
- unsigned long long Write;
-/*
- unsigned long long Sync;
- unsigned long long Async;
- unsigned long long Total;
-*/
-};
-
-// https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
-struct memory {
- ARL_BASE *arl_base;
- ARL_ENTRY *arl_dirty;
- ARL_ENTRY *arl_swap;
-
- int updated_detailed;
- int updated_usage_in_bytes;
- int updated_msw_usage_in_bytes;
- int updated_failcnt;
-
- int enabled_detailed; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
- int enabled_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
- int enabled_msw_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
- int enabled_failcnt; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
-
- int delay_counter_detailed;
- int delay_counter_failcnt;
-
- char *filename_detailed;
- char *filename_usage_in_bytes;
- char *filename_msw_usage_in_bytes;
- char *filename_failcnt;
-
- int detailed_has_dirty;
- int detailed_has_swap;
-
- // detailed metrics
-/*
- unsigned long long cache;
- unsigned long long rss;
- unsigned long long rss_huge;
- unsigned long long mapped_file;
- unsigned long long writeback;
- unsigned long long dirty;
- unsigned long long swap;
- unsigned long long pgpgin;
- unsigned long long pgpgout;
- unsigned long long pgfault;
- unsigned long long pgmajfault;
- unsigned long long inactive_anon;
- unsigned long long active_anon;
- unsigned long long inactive_file;
- unsigned long long active_file;
- unsigned long long unevictable;
- unsigned long long hierarchical_memory_limit;
-*/
- //unified cgroups metrics
- unsigned long long anon;
- unsigned long long kernel_stack;
- unsigned long long slab;
- unsigned long long sock;
- unsigned long long shmem;
- unsigned long long anon_thp;
- //unsigned long long file_writeback;
- //unsigned long long file_dirty;
- //unsigned long long file;
-
- unsigned long long total_cache;
- unsigned long long total_rss;
- unsigned long long total_rss_huge;
- unsigned long long total_mapped_file;
- unsigned long long total_writeback;
- unsigned long long total_dirty;
- unsigned long long total_swap;
- unsigned long long total_pgpgin;
- unsigned long long total_pgpgout;
- unsigned long long total_pgfault;
- unsigned long long total_pgmajfault;
-/*
- unsigned long long total_inactive_anon;
- unsigned long long total_active_anon;
-*/
-
- unsigned long long total_inactive_file;
-
-/*
- unsigned long long total_active_file;
- unsigned long long total_unevictable;
-*/
-
- // single file metrics
- unsigned long long usage_in_bytes;
- unsigned long long msw_usage_in_bytes;
- unsigned long long failcnt;
-};
-
-// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt
-struct cpuacct_stat {
- int updated;
- int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
-
- char *filename;
-
- unsigned long long user; // v1, v2(user_usec)
- unsigned long long system; // v1, v2(system_usec)
-};
-
-// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt
-struct cpuacct_usage {
- int updated;
- int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
-
- char *filename;
-
- unsigned int cpus;
- unsigned long long *cpu_percpu;
-};
-
-// represents cpuacct/cpu.stat, for v2 'cpuacct_stat' is used for 'user_usec', 'system_usec'
-struct cpuacct_cpu_throttling {
- int updated;
- int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
-
- char *filename;
-
- unsigned long long nr_periods;
- unsigned long long nr_throttled;
- unsigned long long throttled_time;
-
- unsigned long long nr_throttled_perc;
-};
-
-// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs
-// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/managing_monitoring_and_updating_the_kernel/using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications_managing-monitoring-and-updating-the-kernel#proc_controlling-distribution-of-cpu-time-for-applications-by-adjusting-cpu-weight_using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications
-struct cpuacct_cpu_shares {
- int updated;
- int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
-
- char *filename;
-
- unsigned long long shares;
-};
-
-struct cgroup_network_interface {
- const char *host_device;
- const char *container_device;
- struct cgroup_network_interface *next;
-};
-
-enum cgroups_container_orchestrator {
- CGROUPS_ORCHESTRATOR_UNSET,
- CGROUPS_ORCHESTRATOR_UNKNOWN,
- CGROUPS_ORCHESTRATOR_K8S
-};
-
-// *** WARNING *** The fields are not thread safe. Take care of safe usage.
-struct cgroup {
- uint32_t options;
-
- int first_time_seen; // first time seen by the discoverer
- int processed; // the discoverer is done processing a cgroup (resolved name, set 'enabled' option)
-
- char available; // found in the filesystem
- char enabled; // enabled in the config
-
- char pending_renames;
-
- char *id;
- uint32_t hash;
-
- char *intermediate_id; // TODO: remove it when the renaming script is fixed
-
- char *chart_id;
- uint32_t hash_chart_id;
-
- // 'cgroup_name' label value.
- // By default this is the *id (path), later changed to the resolved name (cgroup-name.sh) or systemd service name.
- char *name;
-
- RRDLABELS *chart_labels;
-
- int container_orchestrator;
-
- struct cpuacct_stat cpuacct_stat;
- struct cpuacct_usage cpuacct_usage;
- struct cpuacct_cpu_throttling cpuacct_cpu_throttling;
- struct cpuacct_cpu_shares cpuacct_cpu_shares;
-
- struct memory memory;
-
- struct blkio io_service_bytes; // bytes
- struct blkio io_serviced; // operations
-
- struct blkio throttle_io_service_bytes; // bytes
- struct blkio throttle_io_serviced; // operations
-
- struct blkio io_merged; // operations
- struct blkio io_queued; // operations
-
- struct cgroup_network_interface *interfaces;
-
- struct pressure cpu_pressure;
- struct pressure io_pressure;
- struct pressure memory_pressure;
- struct pressure irq_pressure;
-
- // per cgroup charts
- RRDSET *st_cpu;
- RRDSET *st_cpu_limit;
- RRDSET *st_cpu_per_core;
- RRDSET *st_cpu_nr_throttled;
- RRDSET *st_cpu_throttled_time;
- RRDSET *st_cpu_shares;
-
- RRDSET *st_mem;
- RRDSET *st_mem_utilization;
- RRDSET *st_writeback;
- RRDSET *st_mem_activity;
- RRDSET *st_pgfaults;
- RRDSET *st_mem_usage;
- RRDSET *st_mem_usage_limit;
- RRDSET *st_mem_failcnt;
-
- RRDSET *st_io;
- RRDSET *st_serviced_ops;
- RRDSET *st_throttle_io;
- RRDSET *st_throttle_serviced_ops;
- RRDSET *st_queued_ops;
- RRDSET *st_merged_ops;
-
- // per cgroup chart variables
- char *filename_cpuset_cpus;
- unsigned long long cpuset_cpus;
-
- char *filename_cpu_cfs_period;
- unsigned long long cpu_cfs_period;
-
- char *filename_cpu_cfs_quota;
- unsigned long long cpu_cfs_quota;
-
- const RRDSETVAR_ACQUIRED *chart_var_cpu_limit;
- NETDATA_DOUBLE prev_cpu_usage;
-
- char *filename_memory_limit;
- unsigned long long memory_limit;
- const RRDSETVAR_ACQUIRED *chart_var_memory_limit;
-
- char *filename_memoryswap_limit;
- unsigned long long memoryswap_limit;
- const RRDSETVAR_ACQUIRED *chart_var_memoryswap_limit;
-
- struct cgroup *next;
- struct cgroup *discovered_next;
-
-} *cgroup_root = NULL;
-
-uv_mutex_t cgroup_root_mutex;
-
-struct cgroup *discovered_cgroup_root = NULL;
-
-struct discovery_thread {
- uv_thread_t thread;
- uv_mutex_t mutex;
- uv_cond_t cond_var;
- int exited;
-} discovery_thread;
-
-// ---------------------------------------------------------------------------------------------
-
-static inline int matches_enabled_cgroup_paths(char *id) {
- return simple_pattern_matches(enabled_cgroup_paths, id);
-}
-
-static inline int matches_enabled_cgroup_names(char *name) {
- return simple_pattern_matches(enabled_cgroup_names, name);
-}
-
-static inline int matches_enabled_cgroup_renames(char *id) {
- return simple_pattern_matches(enabled_cgroup_renames, id);
-}
-
-static inline int matches_systemd_services_cgroups(char *id) {
- return simple_pattern_matches(systemd_services_cgroups, id);
-}
-
-static inline int matches_search_cgroup_paths(const char *dir) {
- return simple_pattern_matches(search_cgroup_paths, dir);
-}
-
-static inline int matches_entrypoint_parent_process_comm(const char *comm) {
- return simple_pattern_matches(entrypoint_parent_process_comm, comm);
-}
-
-static inline int is_cgroup_systemd_service(struct cgroup *cg) {
- return (cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE);
-}
-
-// ---------------------------------------------------------------------------------------------
-static int k8s_is_kubepod(struct cgroup *cg) {
- return cg->container_orchestrator == CGROUPS_ORCHESTRATOR_K8S;
-}
-
-static int k8s_is_container(const char *id) {
- // examples:
- // https://github.com/netdata/netdata/blob/0fc101679dcd12f1cb8acdd07bb4c85d8e553e53/collectors/cgroups.plugin/cgroup-name.sh#L121-L147
- const char *p = id;
- const char *pp = NULL;
- int i = 0;
- size_t l = 3; // pod
- while ((p = strstr(p, "pod"))) {
- i++;
- p += l;
- pp = p;
- }
- return !(i < 2 || !pp || !(pp = strchr(pp, '/')) || !pp++ || !*pp);
-}
-
-#define TASK_COMM_LEN 16
-
-static int k8s_get_container_first_proc_comm(const char *id, char *comm) {
- if (!k8s_is_container(id)) {
- return 1;
- }
-
- static procfile *ff = NULL;
-
- char filename[FILENAME_MAX + 1];
- snprintfz(filename, FILENAME_MAX, "%s/%s/cgroup.procs", cgroup_cpuacct_base, id);
-
- ff = procfile_reopen(ff, filename, NULL, CGROUP_PROCFILE_FLAG);
- if (unlikely(!ff)) {
- netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename);
- return 1;
- }
-
- ff = procfile_readall(ff);
- if (unlikely(!ff)) {
- netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename);
- return 1;
- }
-
- unsigned long lines = procfile_lines(ff);
- if (likely(lines < 2)) {
- return 1;
- }
-
- char *pid = procfile_lineword(ff, 0, 0);
- if (!pid || !*pid) {
- return 1;
- }
-
- snprintfz(filename, FILENAME_MAX, "%s/proc/%s/comm", netdata_configured_host_prefix, pid);
-
- ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT);
- if (unlikely(!ff)) {
- netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename);
- return 1;
- }
-
- ff = procfile_readall(ff);
- if (unlikely(!ff)) {
- netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename);
- return 1;
- }
-
- lines = procfile_lines(ff);
- if (unlikely(lines != 2)) {
- return 1;
- }
-
- char *proc_comm = procfile_lineword(ff, 0, 0);
- if (!proc_comm || !*proc_comm) {
- return 1;
- }
-
- strncpyz(comm, proc_comm, TASK_COMM_LEN);
- return 0;
-}
-
// ---------------------------------------------------------------------------------------------
static unsigned long long calc_delta(unsigned long long curr, unsigned long long prev) {
@@ -1023,16 +582,7 @@ static unsigned long long calc_percentage(unsigned long long value, unsigned lon
if (total == 0) {
return 0;
}
- return (NETDATA_DOUBLE)value / (NETDATA_DOUBLE)total * 100;
-}
-
-static int calc_cgroup_depth(const char *id) {
- int depth = 0;
- const char *s;
- for (s = id; *s; s++) {
- depth += unlikely(*s == '/');
- }
- return depth;
+ return (unsigned long long)((NETDATA_DOUBLE)value / (NETDATA_DOUBLE)total * 100);
}
// ----------------------------------------------------------------------------
@@ -1636,1583 +1186,7 @@ static inline void read_all_discovered_cgroups(struct cgroup *root) {
}
}
-// ----------------------------------------------------------------------------
-// cgroup network interfaces
-
-#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048
-static inline void read_cgroup_network_interfaces(struct cgroup *cg) {
- netdata_log_debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id);
-
- pid_t cgroup_pid;
- char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1];
-
- if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
- snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_cpuacct_base, cg->id);
- }
- else {
- snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_unified_base, cg->id);
- }
-
- netdata_log_debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id);
- FILE *fp_child_input, *fp_child_output;
- (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier);
- if(!fp_child_output) {
- collector_error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier);
- return;
- }
-
- char *s;
- char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1];
- while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) {
- trim(s);
-
- if(*s && *s != '\n') {
- char *t = s;
- while(*t && *t != ' ') t++;
- if(*t == ' ') {
- *t = '\0';
- t++;
- }
-
- if(!*s) {
- collector_error("CGROUP: empty host interface returned by script");
- continue;
- }
-
- if(!*t) {
- collector_error("CGROUP: empty guest interface returned by script");
- continue;
- }
-
- struct cgroup_network_interface *i = callocz(1, sizeof(struct cgroup_network_interface));
- i->host_device = strdupz(s);
- i->container_device = strdupz(t);
- i->next = cg->interfaces;
- cg->interfaces = i;
-
- collector_info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device);
-
- // register a device rename to proc_net_dev.c
- netdev_rename_device_add(
- i->host_device, i->container_device, cg->chart_id, cg->chart_labels, k8s_is_kubepod(cg) ? "k8s." : "");
- }
- }
-
- netdata_pclose(fp_child_input, fp_child_output, cgroup_pid);
- // netdata_log_debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id);
-}
-
-static inline void free_cgroup_network_interfaces(struct cgroup *cg) {
- while(cg->interfaces) {
- struct cgroup_network_interface *i = cg->interfaces;
- cg->interfaces = i->next;
-
- // delete the registration of proc_net_dev rename
- netdev_rename_device_del(i->host_device);
-
- freez((void *)i->host_device);
- freez((void *)i->container_device);
- freez((void *)i);
- }
-}
-
-// ----------------------------------------------------------------------------
-// add/remove/find cgroup objects
-
-#define CGROUP_CHARTID_LINE_MAX 1024
-
-static inline char *cgroup_chart_id_strdupz(const char *s) {
- if(!s || !*s) s = "/";
-
- if(*s == '/' && s[1] != '\0') s++;
-
- char *r = strdupz(s);
- netdata_fix_chart_id(r);
-
- return r;
-}
-
-// TODO: move the code to cgroup_chart_id_strdupz() when the renaming script is fixed
-static inline void substitute_dots_in_id(char *s) {
- // dots are used to distinguish chart type and id in streaming, so we should replace them
- for (char *d = s; *d; d++) {
- if (*d == '.')
- *d = '-';
- }
-}
-
-// ----------------------------------------------------------------------------
-// parse k8s labels
-
-char *cgroup_parse_resolved_name_and_labels(RRDLABELS *labels, char *data) {
- // the first word, up to the first space is the name
- char *name = strsep_skip_consecutive_separators(&data, " ");
-
- // the rest are key=value pairs separated by comma
- while(data) {
- char *pair = strsep_skip_consecutive_separators(&data, ",");
- rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_AUTO | RRDLABEL_SRC_K8S);
- }
-
- return name;
-}
-
-// ----------------------------------------------------------------------------
-
-static inline void free_pressure(struct pressure *res) {
- if (res->some.share_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->some.share_time.st);
- if (res->some.total_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->some.total_time.st);
- if (res->full.share_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->full.share_time.st);
- if (res->full.total_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->full.total_time.st);
- freez(res->filename);
-}
-
-static inline void cgroup_free(struct cgroup *cg) {
- netdata_log_debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available");
-
- if(cg->st_cpu) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu);
- if(cg->st_cpu_limit) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_limit);
- if(cg->st_cpu_per_core) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_per_core);
- if(cg->st_cpu_nr_throttled) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_nr_throttled);
- if(cg->st_cpu_throttled_time) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_throttled_time);
- if(cg->st_cpu_shares) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_shares);
- if(cg->st_mem) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem);
- if(cg->st_writeback) rrdset_is_obsolete___safe_from_collector_thread(cg->st_writeback);
- if(cg->st_mem_activity) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_activity);
- if(cg->st_pgfaults) rrdset_is_obsolete___safe_from_collector_thread(cg->st_pgfaults);
- if(cg->st_mem_usage) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage);
- if(cg->st_mem_usage_limit) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage_limit);
- if(cg->st_mem_utilization) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_utilization);
- if(cg->st_mem_failcnt) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_failcnt);
- if(cg->st_io) rrdset_is_obsolete___safe_from_collector_thread(cg->st_io);
- if(cg->st_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_serviced_ops);
- if(cg->st_throttle_io) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_io);
- if(cg->st_throttle_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_serviced_ops);
- if(cg->st_queued_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_queued_ops);
- if(cg->st_merged_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_merged_ops);
-
- freez(cg->filename_cpuset_cpus);
- freez(cg->filename_cpu_cfs_period);
- freez(cg->filename_cpu_cfs_quota);
- freez(cg->filename_memory_limit);
- freez(cg->filename_memoryswap_limit);
-
- free_cgroup_network_interfaces(cg);
-
- freez(cg->cpuacct_usage.cpu_percpu);
-
- freez(cg->cpuacct_stat.filename);
- freez(cg->cpuacct_usage.filename);
- freez(cg->cpuacct_cpu_throttling.filename);
- freez(cg->cpuacct_cpu_shares.filename);
-
- arl_free(cg->memory.arl_base);
- freez(cg->memory.filename_detailed);
- freez(cg->memory.filename_failcnt);
- freez(cg->memory.filename_usage_in_bytes);
- freez(cg->memory.filename_msw_usage_in_bytes);
-
- freez(cg->io_service_bytes.filename);
- freez(cg->io_serviced.filename);
-
- freez(cg->throttle_io_service_bytes.filename);
- freez(cg->throttle_io_serviced.filename);
-
- freez(cg->io_merged.filename);
- freez(cg->io_queued.filename);
-
- free_pressure(&cg->cpu_pressure);
- free_pressure(&cg->io_pressure);
- free_pressure(&cg->memory_pressure);
- free_pressure(&cg->irq_pressure);
-
- freez(cg->id);
- freez(cg->intermediate_id);
- freez(cg->chart_id);
- freez(cg->name);
-
- rrdlabels_destroy(cg->chart_labels);
-
- freez(cg);
-
- cgroup_root_count--;
-}
-
-// ----------------------------------------------------------------------------
-
-static inline void discovery_rename_cgroup(struct cgroup *cg) {
- if (!cg->pending_renames) {
- return;
- }
- cg->pending_renames--;
-
- netdata_log_debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id);
- netdata_log_debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id);
- pid_t cgroup_pid;
-
- FILE *fp_child_input, *fp_child_output;
- (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id);
- if (!fp_child_output) {
- collector_error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id);
- cg->pending_renames = 0;
- cg->processed = 1;
- return;
- }
-
- char buffer[CGROUP_CHARTID_LINE_MAX + 1];
- char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output);
- int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid);
-
- switch (exit_code) {
- case 0:
- cg->pending_renames = 0;
- break;
-
- case 3:
- cg->pending_renames = 0;
- cg->processed = 1;
- break;
- }
-
- if (cg->pending_renames || cg->processed)
- return;
- if (!new_name || !*new_name || *new_name == '\n')
- return;
- if (!(new_name = trim(new_name)))
- return;
-
- char *name = new_name;
-
- if (!cg->chart_labels)
- cg->chart_labels = rrdlabels_create();
- // read the new labels and remove the obsolete ones
- rrdlabels_unmark_all(cg->chart_labels);
- name = cgroup_parse_resolved_name_and_labels(cg->chart_labels, new_name);
- rrdlabels_remove_all_unmarked(cg->chart_labels);
-
- freez(cg->name);
- cg->name = strdupz(name);
-
- freez(cg->chart_id);
- cg->chart_id = cgroup_chart_id_strdupz(name);
-
- substitute_dots_in_id(cg->chart_id);
- cg->hash_chart_id = simple_hash(cg->chart_id);
-}
-
-static void is_cgroup_procs_exist(netdata_ebpf_cgroup_shm_body_t *out, char *id) {
- struct stat buf;
-
- snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id);
- if (likely(stat(out->path, &buf) == 0)) {
- return;
- }
-
- snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id);
- if (likely(stat(out->path, &buf) == 0)) {
- return;
- }
-
- snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id);
- if (likely(stat(out->path, &buf) == 0)) {
- return;
- }
-
- snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", c