summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Mashchenko <ilya@netdata.cloud>2022-05-16 10:38:21 +0300
committerGitHub <noreply@github.com>2022-05-16 10:38:21 +0300
commitce2e3083856bb8e080f4808e06286f023c313a97 (patch)
tree10c650fda8c6443dded39870940a728bc2f447be
parentd795364876f570188ea18185b3eb7c8494ec98ce (diff)
fix(cgroups.plugin): improve check for uninitialized containers in k8s (#12912)
-rw-r--r--collectors/cgroups.plugin/sys_fs_cgroup.c26
1 files changed, 24 insertions, 2 deletions
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c
index bf3ea5ee2f..5f585f56bc 100644
--- a/collectors/cgroups.plugin/sys_fs_cgroup.c
+++ b/collectors/cgroups.plugin/sys_fs_cgroup.c
@@ -91,6 +91,8 @@ static SIMPLE_PATTERN *search_cgroup_paths = NULL;
static SIMPLE_PATTERN *enabled_cgroup_renames = NULL;
static SIMPLE_PATTERN *systemd_services_cgroups = NULL;
+static SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL;
+
static char *cgroups_rename_script = NULL;
static char *cgroups_network_interface_script = NULL;
@@ -918,6 +920,10 @@ static inline int matches_search_cgroup_paths(const char *dir) {
return simple_pattern_matches(search_cgroup_paths, dir);
}
+static inline int matches_entrypoint_parent_process_comm(const char *comm) {
+ return simple_pattern_matches(entrypoint_parent_process_comm, comm);
+}
+
static inline int is_cgroup_systemd_service(struct cgroup *cg) {
return (cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE);
}
@@ -2567,6 +2573,16 @@ static inline void discovery_find_all_cgroups_v2() {
}
}
+static int is_digits_only(const char *s) {
+ do {
+ if (!isdigit(*s++)) {
+ return 0;
+ }
+ } while (*s);
+
+ return 1;
+}
+
static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) {
if (!cg->first_time_seen) {
return;
@@ -2577,8 +2593,8 @@ static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) {
if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) {
// container initialization may take some time when CPU % is high
- // TODO: not sure run-level 2 is enough (just came across this problem on an AWS K8s cluster)
- if (!strcmp(comm, "runc:[2:INIT]")) {
+ // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number)
+ if (is_digits_only(comm) || matches_entrypoint_parent_process_comm(comm)) {
cg->first_time_seen = 1;
return;
}
@@ -2728,6 +2744,12 @@ void cgroup_discovery_worker(void *ptr)
worker_register_job_name(WORKER_DISCOVERY_SHARE, "share");
worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock");
+ entrypoint_parent_process_comm = simple_pattern_create(
+ " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3)
+ " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961
+ NULL,
+ SIMPLE_PATTERN_EXACT);
+
while (!netdata_exit) {
worker_is_idle();