summaryrefslogtreecommitdiffstats
path: root/collectors/cgroups.plugin
diff options
context:
space:
mode:
authorVladimir Kobal <vlad@prokk.net>2019-03-09 18:12:47 +0200
committerChris Akritidis <43294513+cakrit@users.noreply.github.com>2019-03-09 17:12:47 +0100
commit2e05371992130803b546b7b6d37a98fb089fc5a3 (patch)
treef5baf34ce7084a8dd16c86ea9aea197623c5dcea /collectors/cgroups.plugin
parent3068d75ab275c356755356ab9087da8f1ef061a3 (diff)
Delay renaming for docker containers (#5557)
Fixes #5512 * Delay cgroup renaming * Restrict renaming delay to problematic containers only * Use pending_renames instead of needs_renaming * Use an exit code instead of a parameter
Diffstat (limited to 'collectors/cgroups.plugin')
-rwxr-xr-xcollectors/cgroups.plugin/cgroup-name.sh.in8
-rw-r--r--collectors/cgroups.plugin/sys_fs_cgroup.c46
2 files changed, 45 insertions, 9 deletions
diff --git a/collectors/cgroups.plugin/cgroup-name.sh.in b/collectors/cgroups.plugin/cgroup-name.sh.in
index 3aebe2bf47..c470870b53 100755
--- a/collectors/cgroups.plugin/cgroup-name.sh.in
+++ b/collectors/cgroups.plugin/cgroup-name.sh.in
@@ -73,6 +73,7 @@ function docker_get_name() {
fi
if [ -z "${NAME}" ]; then
warning "cannot find the name of docker container '${id}'"
+ NAME_NOT_FOUND=1
NAME="${id:0:12}"
else
info "docker container '${id}' is named '${NAME}'"
@@ -95,6 +96,7 @@ function docker_validate_id() {
DOCKER_HOST="${DOCKER_HOST:=/var/run/docker.sock}"
CGROUP="${1}"
+NAME_NOT_FOUND=0
NAME=
# -----------------------------------------------------------------------------
@@ -174,3 +176,9 @@ fi
info "cgroup '${CGROUP}' is called '${NAME}'"
echo "${NAME}"
+
+if [ "${NAME_NOT_FOUND}" -eq 1 ]; then
+ exit 2
+else
+ exit 0
+fi
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c
index 1d42f7292c..5d3f14f5ed 100644
--- a/collectors/cgroups.plugin/sys_fs_cgroup.c
+++ b/collectors/cgroups.plugin/sys_fs_cgroup.c
@@ -376,6 +376,8 @@ struct cgroup {
char available; // found in the filesystem
char enabled; // enabled in the config
+ char pending_renames;
+
char *id;
uint32_t hash;
@@ -776,7 +778,7 @@ static inline void read_all_cgroups(struct cgroup *root) {
struct cgroup *cg;
for(cg = root; cg ; cg = cg->next)
- if(cg->enabled && cg->available)
+ if(cg->enabled && cg->available && !cg->pending_renames)
cgroup_read(cg);
}
@@ -886,7 +888,7 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) {
pid_t cgroup_pid;
char command[CGROUP_CHARTID_LINE_MAX + 1];
- snprintfz(command, CGROUP_CHARTID_LINE_MAX, "exec %s '%s' '%s'", cgroups_rename_script, cg->chart_id, cg->id);
+ snprintfz(command, CGROUP_CHARTID_LINE_MAX, "exec %s '%s'", cgroups_rename_script, cg->id);
debug(D_CGROUP, "executing command \"%s\" for cgroup '%s'", command, cg->id);
FILE *fp = mypopen(command, &cgroup_pid);
@@ -895,7 +897,7 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) {
char buffer[CGROUP_CHARTID_LINE_MAX + 1];
char *s = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp);
// debug(D_CGROUP, "closing command for cgroup '%s'", cg->id);
- mypclose(fp, cgroup_pid);
+ int name_error = mypclose(fp, cgroup_pid);
// debug(D_CGROUP, "closed command for cgroup '%s'", cg->id);
if(s && *s && *s != '\n') {
@@ -903,6 +905,9 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) {
s = trim(s);
if (s) {
+ if(likely(!name_error))
+ cg->pending_renames = 0;
+
freez(cg->chart_title);
cg->chart_title = cgroup_title_strdupz(s);
@@ -950,6 +955,7 @@ static inline struct cgroup *cgroup_add(const char *id) {
// fix the chart_id and title by calling the external script
if(simple_pattern_matches(enabled_cgroup_renames, cg->id)) {
+ cg->pending_renames = 2;
cgroup_get_chart_name(cg);
debug(D_CGROUP, "cgroup '%s' renamed to '%s' (title: '%s')", cg->id, cg->chart_id, cg->chart_title);
@@ -1031,7 +1037,7 @@ static inline struct cgroup *cgroup_add(const char *id) {
}
}
- if(cg->enabled && !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE))
+ if(cg->enabled && !cg->pending_renames && !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE))
read_cgroup_network_interfaces(cg);
debug(D_CGROUP, "ADDED CGROUP: '%s' with chart id '%s' and title '%s' as %s (default was %s)", cg->id, cg->chart_id, cg->chart_title, (cg->enabled)?"enabled":"disabled", (def)?"enabled":"disabled");
@@ -1050,7 +1056,7 @@ static inline void cgroup_free(struct cgroup *cg) {
if(cg->st_mem_activity) rrdset_is_obsolete(cg->st_mem_activity);
if(cg->st_pgfaults) rrdset_is_obsolete(cg->st_pgfaults);
if(cg->st_mem_usage) rrdset_is_obsolete(cg->st_mem_usage);
- if (cg->st_mem_usage_limit) rrdset_is_obsolete(cg->st_mem_usage_limit);
+ if(cg->st_mem_usage_limit) rrdset_is_obsolete(cg->st_mem_usage_limit);
if(cg->st_mem_failcnt) rrdset_is_obsolete(cg->st_mem_failcnt);
if(cg->st_io) rrdset_is_obsolete(cg->st_io);
if(cg->st_serviced_ops) rrdset_is_obsolete(cg->st_serviced_ops);
@@ -1138,7 +1144,26 @@ static inline void found_subdir_in_dir(const char *dir) {
cg = cgroup_add(dir);
}
- if(cg) cg->available = 1;
+ if(cg) {
+ // delay renaming of the cgroup and looking for network interfaces to deal with the docker lag when starting the container
+ if(unlikely(cg->pending_renames == 1)) {
+ // fix the chart_id and title by calling the external script
+ if(simple_pattern_matches(enabled_cgroup_renames, cg->id)) {
+
+ cgroup_get_chart_name(cg);
+ cg->pending_renames = 0;
+
+ if(cg->enabled && !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE))
+ read_cgroup_network_interfaces(cg);
+
+ debug(D_CGROUP, "cgroup '%s' renamed to '%s' (title: '%s')", cg->id, cg->chart_id, cg->chart_title);
+ }
+ else
+ debug(D_CGROUP, "cgroup '%s' will not be renamed - it matches the list of disabled cgroup renames (will be shown as '%s')", cg->id, cg->chart_id);
+ }
+
+ cg->available = 1;
+ }
}
static inline int find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) {
@@ -1302,7 +1327,10 @@ static inline void find_all_cgroups() {
for(cg = cgroup_root; cg ; cg = cg->next) {
// fprintf(stderr, " >>> CGROUP '%s' (%u - %s) with name '%s'\n", cg->id, cg->hash, cg->available?"available":"stopped", cg->name);
- if(unlikely(!cg->available))
+ if(unlikely(cg->pending_renames))
+ cg->pending_renames--;
+
+ if(unlikely(!cg->available || cg->pending_renames))
continue;
debug(D_CGROUP, "checking paths for cgroup '%s'", cg->id);
@@ -2037,7 +2065,7 @@ void update_systemd_services_charts(
// update the values
struct cgroup *cg;
for(cg = cgroup_root; cg ; cg = cg->next) {
- if(unlikely(!cg->available || !cg->enabled || !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)))
+ if(unlikely(!cg->available || !cg->enabled || cg->pending_renames || !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)))
continue;
if(likely(do_cpu && cg->cpuacct_stat.updated)) {
@@ -2360,7 +2388,7 @@ void update_cgroup_charts(int update_every) {
struct cgroup *cg;
for(cg = cgroup_root; cg ; cg = cg->next) {
- if(unlikely(!cg->available || !cg->enabled))
+ if(unlikely(!cg->available || !cg->enabled || cg->pending_renames))
continue;
if(likely(cgroup_enable_systemd_services && cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)) {