From 2e05371992130803b546b7b6d37a98fb089fc5a3 Mon Sep 17 00:00:00 2001 From: Vladimir Kobal Date: Sat, 9 Mar 2019 18:12:47 +0200 Subject: Delay renaming for docker containers (#5557) Fixes #5512 * Delay cgroup renaming * Restrict renaming delay to problematic containers only * Use pending_renames instead of needs_renaming * Use an exit code instead of a parameter --- collectors/cgroups.plugin/cgroup-name.sh.in | 8 +++++ collectors/cgroups.plugin/sys_fs_cgroup.c | 46 +++++++++++++++++++++++------ 2 files changed, 45 insertions(+), 9 deletions(-) (limited to 'collectors/cgroups.plugin') diff --git a/collectors/cgroups.plugin/cgroup-name.sh.in b/collectors/cgroups.plugin/cgroup-name.sh.in index 3aebe2bf47..c470870b53 100755 --- a/collectors/cgroups.plugin/cgroup-name.sh.in +++ b/collectors/cgroups.plugin/cgroup-name.sh.in @@ -73,6 +73,7 @@ function docker_get_name() { fi if [ -z "${NAME}" ]; then warning "cannot find the name of docker container '${id}'" + NAME_NOT_FOUND=1 NAME="${id:0:12}" else info "docker container '${id}' is named '${NAME}'" @@ -95,6 +96,7 @@ function docker_validate_id() { DOCKER_HOST="${DOCKER_HOST:=/var/run/docker.sock}" CGROUP="${1}" +NAME_NOT_FOUND=0 NAME= # ----------------------------------------------------------------------------- @@ -174,3 +176,9 @@ fi info "cgroup '${CGROUP}' is called '${NAME}'" echo "${NAME}" + +if [ "${NAME_NOT_FOUND}" -eq 1 ]; then + exit 2 +else + exit 0 +fi diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index 1d42f7292c..5d3f14f5ed 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -376,6 +376,8 @@ struct cgroup { char available; // found in the filesystem char enabled; // enabled in the config + char pending_renames; + char *id; uint32_t hash; @@ -776,7 +778,7 @@ static inline void read_all_cgroups(struct cgroup *root) { struct cgroup *cg; for(cg = root; cg ; cg = cg->next) - if(cg->enabled && cg->available) + if(cg->enabled && cg->available && !cg->pending_renames) cgroup_read(cg); } @@ -886,7 +888,7 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) { pid_t cgroup_pid; char command[CGROUP_CHARTID_LINE_MAX + 1]; - snprintfz(command, CGROUP_CHARTID_LINE_MAX, "exec %s '%s' '%s'", cgroups_rename_script, cg->chart_id, cg->id); + snprintfz(command, CGROUP_CHARTID_LINE_MAX, "exec %s '%s'", cgroups_rename_script, cg->id); debug(D_CGROUP, "executing command \"%s\" for cgroup '%s'", command, cg->id); FILE *fp = mypopen(command, &cgroup_pid); @@ -895,7 +897,7 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) { char buffer[CGROUP_CHARTID_LINE_MAX + 1]; char *s = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp); // debug(D_CGROUP, "closing command for cgroup '%s'", cg->id); - mypclose(fp, cgroup_pid); + int name_error = mypclose(fp, cgroup_pid); // debug(D_CGROUP, "closed command for cgroup '%s'", cg->id); if(s && *s && *s != '\n') { @@ -903,6 +905,9 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) { s = trim(s); if (s) { + if(likely(!name_error)) + cg->pending_renames = 0; + freez(cg->chart_title); cg->chart_title = cgroup_title_strdupz(s); @@ -950,6 +955,7 @@ static inline struct cgroup *cgroup_add(const char *id) { // fix the chart_id and title by calling the external script if(simple_pattern_matches(enabled_cgroup_renames, cg->id)) { + cg->pending_renames = 2; cgroup_get_chart_name(cg); debug(D_CGROUP, "cgroup '%s' renamed to '%s' (title: '%s')", cg->id, cg->chart_id, cg->chart_title); @@ -1031,7 +1037,7 @@ static inline struct cgroup *cgroup_add(const char *id) { } } - if(cg->enabled && !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)) + if(cg->enabled && !cg->pending_renames && !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)) read_cgroup_network_interfaces(cg); debug(D_CGROUP, "ADDED CGROUP: '%s' with chart id '%s' and title '%s' as %s (default was %s)", cg->id, cg->chart_id, cg->chart_title, (cg->enabled)?"enabled":"disabled", (def)?"enabled":"disabled"); @@ -1050,7 +1056,7 @@ static inline void cgroup_free(struct cgroup *cg) { if(cg->st_mem_activity) rrdset_is_obsolete(cg->st_mem_activity); if(cg->st_pgfaults) rrdset_is_obsolete(cg->st_pgfaults); if(cg->st_mem_usage) rrdset_is_obsolete(cg->st_mem_usage); - if (cg->st_mem_usage_limit) rrdset_is_obsolete(cg->st_mem_usage_limit); + if(cg->st_mem_usage_limit) rrdset_is_obsolete(cg->st_mem_usage_limit); if(cg->st_mem_failcnt) rrdset_is_obsolete(cg->st_mem_failcnt); if(cg->st_io) rrdset_is_obsolete(cg->st_io); if(cg->st_serviced_ops) rrdset_is_obsolete(cg->st_serviced_ops); @@ -1138,7 +1144,26 @@ static inline void found_subdir_in_dir(const char *dir) { cg = cgroup_add(dir); } - if(cg) cg->available = 1; + if(cg) { + // delay renaming of the cgroup and looking for network interfaces to deal with the docker lag when starting the container + if(unlikely(cg->pending_renames == 1)) { + // fix the chart_id and title by calling the external script + if(simple_pattern_matches(enabled_cgroup_renames, cg->id)) { + + cgroup_get_chart_name(cg); + cg->pending_renames = 0; + + if(cg->enabled && !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)) + read_cgroup_network_interfaces(cg); + + debug(D_CGROUP, "cgroup '%s' renamed to '%s' (title: '%s')", cg->id, cg->chart_id, cg->chart_title); + } + else + debug(D_CGROUP, "cgroup '%s' will not be renamed - it matches the list of disabled cgroup renames (will be shown as '%s')", cg->id, cg->chart_id); + } + + cg->available = 1; + } } static inline int find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) { @@ -1302,7 +1327,10 @@ static inline void find_all_cgroups() { for(cg = cgroup_root; cg ; cg = cg->next) { // fprintf(stderr, " >>> CGROUP '%s' (%u - %s) with name '%s'\n", cg->id, cg->hash, cg->available?"available":"stopped", cg->name); - if(unlikely(!cg->available)) + if(unlikely(cg->pending_renames)) + cg->pending_renames--; + + if(unlikely(!cg->available || cg->pending_renames)) continue; debug(D_CGROUP, "checking paths for cgroup '%s'", cg->id); @@ -2037,7 +2065,7 @@ void update_systemd_services_charts( // update the values struct cgroup *cg; for(cg = cgroup_root; cg ; cg = cg->next) { - if(unlikely(!cg->available || !cg->enabled || !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE))) + if(unlikely(!cg->available || !cg->enabled || cg->pending_renames || !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE))) continue; if(likely(do_cpu && cg->cpuacct_stat.updated)) { @@ -2360,7 +2388,7 @@ void update_cgroup_charts(int update_every) { struct cgroup *cg; for(cg = cgroup_root; cg ; cg = cg->next) { - if(unlikely(!cg->available || !cg->enabled)) + if(unlikely(!cg->available || !cg->enabled || cg->pending_renames)) continue; if(likely(cgroup_enable_systemd_services && cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)) { -- cgit v1.2.3