diff options
author | Ilya Mashchenko <ilya@netdata.cloud> | 2022-05-16 10:38:21 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-16 10:38:21 +0300 |
commit | ce2e3083856bb8e080f4808e06286f023c313a97 (patch) | |
tree | 10c650fda8c6443dded39870940a728bc2f447be | |
parent | d795364876f570188ea18185b3eb7c8494ec98ce (diff) |
fix(cgroups.plugin): improve check for uninitialized containers in k8s (#12912)
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.c | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index bf3ea5ee2f..5f585f56bc 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -91,6 +91,8 @@ static SIMPLE_PATTERN *search_cgroup_paths = NULL; static SIMPLE_PATTERN *enabled_cgroup_renames = NULL; static SIMPLE_PATTERN *systemd_services_cgroups = NULL; +static SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL; + static char *cgroups_rename_script = NULL; static char *cgroups_network_interface_script = NULL; @@ -918,6 +920,10 @@ static inline int matches_search_cgroup_paths(const char *dir) { return simple_pattern_matches(search_cgroup_paths, dir); } +static inline int matches_entrypoint_parent_process_comm(const char *comm) { + return simple_pattern_matches(entrypoint_parent_process_comm, comm); +} + static inline int is_cgroup_systemd_service(struct cgroup *cg) { return (cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE); } @@ -2567,6 +2573,16 @@ static inline void discovery_find_all_cgroups_v2() { } } +static int is_digits_only(const char *s) { + do { + if (!isdigit(*s++)) { + return 0; + } + } while (*s); + + return 1; +} + static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) { if (!cg->first_time_seen) { return; @@ -2577,8 +2593,8 @@ static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) { if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) { // container initialization may take some time when CPU % is high - // TODO: not sure run-level 2 is enough (just came across this problem on an AWS K8s cluster) - if (!strcmp(comm, "runc:[2:INIT]")) { + // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number) + if (is_digits_only(comm) || matches_entrypoint_parent_process_comm(comm)) { cg->first_time_seen = 1; return; } @@ -2728,6 +2744,12 @@ void cgroup_discovery_worker(void *ptr) worker_register_job_name(WORKER_DISCOVERY_SHARE, "share"); worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock"); + entrypoint_parent_process_comm = simple_pattern_create( + " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3) + " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961 + NULL, + SIMPLE_PATTERN_EXACT); + while (!netdata_exit) { worker_is_idle(); |