From 8fbf817ef83b3524b15f908251909d9d6feb5532 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Mon, 15 Oct 2018 23:16:42 +0300 Subject: modularized all source code (#4391) * modularized all external plugins * added README.md in plugins * fixed title * fixed typo * relative link to external plugins * external plugins configuration README * added plugins link * remove plugins link * plugin names are links * added links to external plugins * removed unecessary spacing * list to table * added language * fixed typo * list to table on internal plugins * added more documentation to internal plugins * moved python, node, and bash code and configs into the external plugins * added statsd README * fix bug with corrupting config.h every 2nd compilation * moved all config files together with their code * more documentation * diskspace info * fixed broken links in apps.plugin * added backends docs * updated plugins readme * move nc-backend.sh to backends * created daemon directory * moved all code outside src/ * fixed readme identation * renamed plugins.d.plugin to plugins.d * updated readme * removed linux- from linux plugins * updated readme * updated readme * updated readme * updated readme * updated readme * updated readme * fixed README.md links * fixed netdata tree links * updated codacy, codeclimate and lgtm excluded paths * update CMakeLists.txt * updated automake options at top directory * libnetdata slit into directories * updated READMEs * updated READMEs * updated ARL docs * updated ARL docs * moved /plugins to /collectors * moved all external plugins outside plugins.d * updated codacy, codeclimate, lgtm * updated README * updated url * updated readme * updated readme * updated readme * updated readme * moved api and web into webserver * web/api web/gui web/server * modularized webserver * removed web/gui/version.txt --- collectors/cgroups.plugin/Makefile.am | 20 + collectors/cgroups.plugin/cgroup-name.sh.in | 196 ++ collectors/cgroups.plugin/cgroup-network-helper.sh | 258 ++ collectors/cgroups.plugin/cgroup-network.c | 682 +++++ collectors/cgroups.plugin/sys_fs_cgroup.c | 2771 ++++++++++++++++++++ collectors/cgroups.plugin/sys_fs_cgroup.h | 31 + 6 files changed, 3958 insertions(+) create mode 100644 collectors/cgroups.plugin/Makefile.am create mode 100755 collectors/cgroups.plugin/cgroup-name.sh.in create mode 100755 collectors/cgroups.plugin/cgroup-network-helper.sh create mode 100644 collectors/cgroups.plugin/cgroup-network.c create mode 100644 collectors/cgroups.plugin/sys_fs_cgroup.c create mode 100644 collectors/cgroups.plugin/sys_fs_cgroup.h (limited to 'collectors/cgroups.plugin') diff --git a/collectors/cgroups.plugin/Makefile.am b/collectors/cgroups.plugin/Makefile.am new file mode 100644 index 0000000000..fd878049d0 --- /dev/null +++ b/collectors/cgroups.plugin/Makefile.am @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +CLEANFILES = \ + cgroup-name.sh \ + $(NULL) + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + +dist_plugins_SCRIPTS = \ + cgroup-name.sh \ + cgroup-network-helper.sh \ + $(NULL) + +dist_noinst_DATA = \ + cgroup-name.sh.in \ + $(NULL) diff --git a/collectors/cgroups.plugin/cgroup-name.sh.in b/collectors/cgroups.plugin/cgroup-name.sh.in new file mode 100755 index 0000000000..53696a4bf9 --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-name.sh.in @@ -0,0 +1,196 @@ +#!/usr/bin/env bash + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2016 Costa Tsaousis +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Script to find a better name for cgroups +# + +export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin" +export LC_ALL=C + +# ----------------------------------------------------------------------------- + +PROGRAM_NAME="$(basename "${0}")" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + exit 1 +} + +debug=0 +debug() { + [ $debug -eq 1 ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- + +[ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@" +[ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@" + +DOCKER_HOST="${DOCKER_HOST:=/var/run/docker.sock}" +CGROUP="${1}" +NAME= + +# ----------------------------------------------------------------------------- + +if [ -z "${CGROUP}" ] + then + fatal "called without a cgroup name. Nothing to do." +fi + +for CONFIG in "${NETDATA_USER_CONFIG_DIR}/cgroups-names.conf" "${NETDATA_STOCK_CONFIG_DIR}/cgroups-names.conf" +do + if [ -f "${CONFIG}" ] + then + NAME="$(grep "^${CGROUP} " "${CONFIG}" | sed "s/[[:space:]]\+/ /g" | cut -d ' ' -f 2)" + if [ -z "${NAME}" ] + then + info "cannot find cgroup '${CGROUP}' in '${CONFIG}'." + else + break + fi + #else + # info "configuration file '${CONFIG}' is not available." + fi +done + +function docker_get_name_classic { + local id="${1}" + info "Running command: docker ps --filter=id=\"${id}\" --format=\"{{.Names}}\"" + NAME="$( docker ps --filter=id="${id}" --format="{{.Names}}" )" + return 0 +} + +function docker_get_name_api { + local id="${1}" + if [ ! -S "${DOCKER_HOST}" ] + then + warning "Can't find ${DOCKER_HOST}" + return 1 + fi + info "Running API command: /containers/${id}/json" + JSON=$(echo -e "GET /containers/${id}/json HTTP/1.0\r\n" | nc -U ${DOCKER_HOST} | grep '^{.*') + NAME=$(echo $JSON | jq -r .Name,.Config.Hostname | grep -v null | head -n1 | sed 's|^/||') + return 0 +} + +function docker_get_name { + local id="${1}" + if hash docker 2>/dev/null + then + docker_get_name_classic "${id}" + else + docker_get_name_api "${id}" || docker_get_name_classic "${id}" + fi + if [ -z "${NAME}" ] + then + warning "cannot find the name of docker container '${id}'" + NAME="${id:0:12}" + else + info "docker container '${id}' is named '${NAME}'" + fi +} + +if [ -z "${NAME}" ] + then + if [[ "${CGROUP}" =~ ^.*docker[-_/\.][a-fA-F0-9]+[-_\.]?.*$ ]] + then + # docker containers + + DOCKERID="$( echo "${CGROUP}" | sed "s|^.*docker[-_/]\([a-fA-F0-9]\+\)[-_\.]\?.*$|\1|" )" + # echo "DOCKERID=${DOCKERID}" + + if [ ! -z "${DOCKERID}" -a \( ${#DOCKERID} -eq 64 -o ${#DOCKERID} -eq 12 \) ] + then + docker_get_name "${DOCKERID}" + else + error "a docker id cannot be extracted from docker cgroup '${CGROUP}'." + fi + elif [[ "${CGROUP}" =~ ^.*kubepods[_/].*[_/]pod[a-fA-F0-9-]+[_/][a-fA-F0-9]+$ ]] + then + # kubernetes + + DOCKERID="$( echo "${CGROUP}" | sed "s|^.*kubepods[_/].*[_/]pod[a-fA-F0-9-]\+[_/]\([a-fA-F0-9]\+\)$|\1|" )" + # echo "DOCKERID=${DOCKERID}" + + if [ ! -z "${DOCKERID}" -a \( ${#DOCKERID} -eq 64 -o ${#DOCKERID} -eq 12 \) ] + then + docker_get_name "${DOCKERID}" + else + error "a docker id cannot be extracted from kubernetes cgroup '${CGROUP}'." + fi + elif [[ "${CGROUP}" =~ machine.slice[_/].*\.service ]] + then + # systemd-nspawn + + NAME="$(echo ${CGROUP} | sed 's/.*machine.slice[_\/]\(.*\)\.service/\1/g')" + + elif [[ "${CGROUP}" =~ machine.slice_machine.*-qemu ]] + then + # libvirtd / qemu virtual machines + + # NAME="$(echo ${CGROUP} | sed 's/machine.slice_machine.*-qemu//; s/\/x2d//; s/\/x2d/\-/g; s/\.scope//g')" + NAME="qemu_$(echo ${CGROUP} | sed 's/machine.slice_machine.*-qemu//; s/\/x2d[[:digit:]]*//; s/\/x2d//g; s/\.scope//g')" + + elif [[ "${CGROUP}" =~ machine_.*\.libvirt-qemu ]] + then + # libvirtd / qemu virtual machines + NAME="qemu_$(echo ${CGROUP} | sed 's/^machine_//; s/\.libvirt-qemu$//; s/-/_/;')" + + elif [[ "${CGROUP}" =~ qemu.slice_([0-9]+).scope && -d /etc/pve ]] + then + # Proxmox VMs + + FILENAME="/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf" + if [[ -f $FILENAME && -r $FILENAME ]] + then + NAME="qemu_$(grep -e '^name: ' "/etc/pve/qemu-server/${BASH_REMATCH[1]}.conf" | head -1 | sed -rn 's|\s*name\s*:\s*(.*)?$|\1|p')" + else + error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group." + fi + elif [[ "${CGROUP}" =~ lxc_([0-9]+) && -d /etc/pve ]] + then + # Proxmox Containers (LXC) + + FILENAME="/etc/pve/lxc/${BASH_REMATCH[1]}.conf" + if [[ -f ${FILENAME} && -r ${FILENAME} ]] + then + NAME=$(grep -e '^hostname: ' /etc/pve/lxc/${BASH_REMATCH[1]}.conf | head -1 | sed -rn 's|\s*hostname\s*:\s*(.*)?$|\1|p') + else + error "proxmox config file missing ${FILENAME} or netdata does not have read access. Please ensure netdata is a member of www-data group." + fi + fi + + [ -z "${NAME}" ] && NAME="${CGROUP}" + [ ${#NAME} -gt 100 ] && NAME="${NAME:0:100}" +fi + +info "cgroup '${CGROUP}' is called '${NAME}'" +echo "${NAME}" diff --git a/collectors/cgroups.plugin/cgroup-network-helper.sh b/collectors/cgroups.plugin/cgroup-network-helper.sh new file mode 100755 index 0000000000..666f02fc88 --- /dev/null +++ b/collectors/cgroups.plugin/cgroup-network-helper.sh @@ -0,0 +1,258 @@ +#!/usr/bin/env bash +# shellcheck disable=SC1117 + +# cgroup-network-helper.sh +# detect container and virtual machine interfaces +# +# (C) 2017 Costa Tsaousis +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This script is called as root (by cgroup-network), with either a pid, or a cgroup path. +# It tries to find all the network interfaces that belong to the same cgroup. +# +# It supports several method for this detection: +# +# 1. cgroup-network (the binary father of this script) detects veth network interfaces, +# by examining iflink and ifindex IDs and switching namespaces +# (it also detects the interface name as it is used by the container). +# +# 2. this script, uses /proc/PID/fdinfo to find tun/tap network interfaces. +# +# 3. this script, calls virsh to find libvirt network interfaces. +# + +# ----------------------------------------------------------------------------- + +# the system path is cleared by cgroup-network +# shellcheck source=/dev/null +[ -f /etc/profile ] && source /etc/profile + +export LC_ALL=C + +PROGRAM_NAME="$(basename "${0}")" + +logdate() { + date "+%Y-%m-%d %H:%M:%S" +} + +log() { + local status="${1}" + shift + + echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}" + +} + +warning() { + log WARNING "${@}" +} + +error() { + log ERROR "${@}" +} + +info() { + log INFO "${@}" +} + +fatal() { + log FATAL "${@}" + exit 1 +} + +debug=0 +debug() { + [ "${debug}" = "1" ] && log DEBUG "${@}" +} + +# ----------------------------------------------------------------------------- +# check for BASH v4+ (required for associative arrays) + +[ $(( BASH_VERSINFO[0] )) -lt 4 ] && \ + fatal "BASH version 4 or later is required (this is ${BASH_VERSION})." + +# ----------------------------------------------------------------------------- +# parse the arguments + +pid= +cgroup= +while [ ! -z "${1}" ] +do + case "${1}" in + --cgroup) cgroup="${2}"; shift 1;; + --pid|-p) pid="${2}"; shift 1;; + --debug|debug) debug=1;; + *) fatal "Cannot understand argument '${1}'";; + esac + + shift +done + +if [ -z "${pid}" ] && [ -z "${cgroup}" ] +then + fatal "Either --pid or --cgroup is required" +fi + +# ----------------------------------------------------------------------------- + +set_source() { + [ ${debug} -eq 1 ] && echo "SRC ${*}" +} + + +# ----------------------------------------------------------------------------- +# veth interfaces via cgroup + +# cgroup-network can detect veth interfaces by itself (written in C). +# If you seek for a shell version of what it does, check this: +# https://github.com/netdata/netdata/issues/474#issuecomment-317866709 + + +# ----------------------------------------------------------------------------- +# tun/tap interfaces via /proc/PID/fdinfo + +# find any tun/tap devices linked to a pid +proc_pid_fdinfo_iff() { + local p="${1}" # the pid + + debug "Searching for tun/tap interfaces for pid ${p}..." + set_source "fdinfo" + grep "^iff:.*" "${NETDATA_HOST_PREFIX}/proc/${p}/fdinfo"/* 2>/dev/null | cut -f 2 +} + +find_tun_tap_interfaces_for_cgroup() { + local c="${1}" # the cgroup path + + # for each pid of the cgroup + # find any tun/tap devices linked to the pid + if [ -f "${c}/emulator/cgroup.procs" ] + then + local p + for p in $(< "${c}/emulator/cgroup.procs" ) + do + proc_pid_fdinfo_iff "${p}" + done + fi +} + + +# ----------------------------------------------------------------------------- +# virsh domain network interfaces + +virsh_cgroup_to_domain_name() { + local c="${1}" # the cgroup path + + debug "extracting a possible virsh domain from cgroup ${c}..." + + # extract for the cgroup path + sed -n -e "s|.*/machine-qemu\\\\x2d[0-9]\+\\\\x2d\(.*\)\.scope$|\1|p" \ + -e "s|.*/machine/\(.*\)\.libvirt-qemu$|\1|p" \ + < +#endif + +char environment_variable2[FILENAME_MAX + 50] = ""; +char *environment[] = { + "PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin", + environment_variable2, + NULL +}; + + +// ---------------------------------------------------------------------------- + +// callback required by fatal() +void netdata_cleanup_and_exit(int ret) { + exit(ret); +} + +// callbacks required by popen() +void signals_block(void) {}; +void signals_unblock(void) {}; +void signals_reset(void) {}; + +// callback required by eval() +int health_variable_lookup(const char *variable, uint32_t hash, struct rrdcalc *rc, calculated_number *result) { + (void)variable; + (void)hash; + (void)rc; + (void)result; + return 0; +}; + +// required by get_system_cpus() +char *netdata_configured_host_prefix = ""; + +// ---------------------------------------------------------------------------- + +struct iface { + const char *device; + uint32_t hash; + + unsigned int ifindex; + unsigned int iflink; + + struct iface *next; +}; + +unsigned int read_iface_iflink(const char *prefix, const char *iface) { + if(!prefix) prefix = ""; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/sys/class/net/%s/iflink", prefix, iface); + + unsigned long long iflink = 0; + int ret = read_single_number_file(filename, &iflink); + if(ret) error("Cannot read '%s'.", filename); + + return (unsigned int)iflink; +} + +unsigned int read_iface_ifindex(const char *prefix, const char *iface) { + if(!prefix) prefix = ""; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/sys/class/net/%s/ifindex", prefix, iface); + + unsigned long long ifindex = 0; + int ret = read_single_number_file(filename, &ifindex); + if(ret) error("Cannot read '%s'.", filename); + + return (unsigned int)ifindex; +} + +struct iface *read_proc_net_dev(const char *prefix) { + if(!prefix) prefix = ""; + + procfile *ff = NULL; + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s%s", prefix, (*prefix)?"/proc/1/net/dev":"/proc/net/dev"); + ff = procfile_open(filename, " \t,:|", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + error("Cannot open file '%s'", filename); + return NULL; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + error("Cannot read file '%s'", filename); + return NULL; + } + + size_t lines = procfile_lines(ff), l; + struct iface *root = NULL; + for(l = 2; l < lines ;l++) { + if (unlikely(procfile_linewords(ff, l) < 1)) continue; + + struct iface *t = callocz(1, sizeof(struct iface)); + t->device = strdupz(procfile_lineword(ff, l, 0)); + t->hash = simple_hash(t->device); + t->ifindex = read_iface_ifindex(prefix, t->device); + t->iflink = read_iface_iflink(prefix, t->device); + t->next = root; + root = t; + } + + procfile_close(ff); + + return root; +} + +void free_iface(struct iface *iface) { + freez((void *)iface->device); + freez(iface); +} + +void free_host_ifaces(struct iface *iface) { + while(iface) { + struct iface *t = iface->next; + free_iface(iface); + iface = t; + } +} + +int iface_is_eligible(struct iface *iface) { + if(iface->iflink != iface->ifindex) + return 1; + + return 0; +} + +int eligible_ifaces(struct iface *root) { + int eligible = 0; + + struct iface *t; + for(t = root; t ; t = t->next) + if(iface_is_eligible(t)) + eligible++; + + return eligible; +} + +static void continue_as_child(void) { + pid_t child = fork(); + int status; + pid_t ret; + + if (child < 0) + error("fork() failed"); + + /* Only the child returns */ + if (child == 0) + return; + + for (;;) { + ret = waitpid(child, &status, WUNTRACED); + if ((ret == child) && (WIFSTOPPED(status))) { + /* The child suspended so suspend us as well */ + kill(getpid(), SIGSTOP); + kill(child, SIGCONT); + } else { + break; + } + } + + /* Return the child's exit code if possible */ + if (WIFEXITED(status)) { + exit(WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + kill(getpid(), WTERMSIG(status)); + } + + exit(EXIT_FAILURE); +} + +int proc_pid_fd(const char *prefix, const char *ns, pid_t pid) { + if(!prefix) prefix = ""; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/%s", prefix, (int)pid, ns); + int fd = open(filename, O_RDONLY); + + if(fd == -1) + error("Cannot open proc_pid_fd() file '%s'", filename); + + return fd; +} + +static struct ns { + int nstype; + int fd; + int status; + const char *name; + const char *path; +} all_ns[] = { + // { .nstype = CLONE_NEWUSER, .fd = -1, .status = -1, .name = "user", .path = "ns/user" }, + // { .nstype = CLONE_NEWCGROUP, .fd = -1, .status = -1, .name = "cgroup", .path = "ns/cgroup" }, + // { .nstype = CLONE_NEWIPC, .fd = -1, .status = -1, .name = "ipc", .path = "ns/ipc" }, + // { .nstype = CLONE_NEWUTS, .fd = -1, .status = -1, .name = "uts", .path = "ns/uts" }, + { .nstype = CLONE_NEWNET, .fd = -1, .status = -1, .name = "network", .path = "ns/net" }, + { .nstype = CLONE_NEWPID, .fd = -1, .status = -1, .name = "pid", .path = "ns/pid" }, + { .nstype = CLONE_NEWNS, .fd = -1, .status = -1, .name = "mount", .path = "ns/mnt" }, + + // terminator + { .nstype = 0, .fd = -1, .status = -1, .name = NULL, .path = NULL } +}; + +int switch_namespace(const char *prefix, pid_t pid) { + if(!prefix) prefix = ""; + +#ifdef HAVE_SETNS + + int i; + for(i = 0; all_ns[i].name ; i++) + all_ns[i].fd = proc_pid_fd(prefix, all_ns[i].path, pid); + + int root_fd = proc_pid_fd(prefix, "root", pid); + int cwd_fd = proc_pid_fd(prefix, "cwd", pid); + + setgroups(0, NULL); + + // 2 passes - found it at nsenter source code + // this is related CLONE_NEWUSER functionality + + // This code cannot switch user namespace (it can all the other namespaces) + // Fortunately, we don't need to switch user namespaces. + + int pass, errors = 0; + for(pass = 0; pass < 2 ;pass++) { + for(i = 0; all_ns[i].name ; i++) { + if (all_ns[i].fd != -1 && all_ns[i].status == -1) { + if(setns(all_ns[i].fd, all_ns[i].nstype) == -1) { + if(pass == 1) { + all_ns[i].status = 0; + error("Cannot switch to %s namespace of pid %d", all_ns[i].name, (int) pid); + errors++; + } + } + else + all_ns[i].status = 1; + } + } + } + + setgroups(0, NULL); + + if(root_fd != -1) { + if(fchdir(root_fd) < 0) + error("Cannot fchdir() to pid %d root directory", (int)pid); + + if(chroot(".") < 0) + error("Cannot chroot() to pid %d root directory", (int)pid); + + close(root_fd); + } + + if(cwd_fd != -1) { + if(fchdir(cwd_fd) < 0) + error("Cannot fchdir() to pid %d current working directory", (int)pid); + + close(cwd_fd); + } + + int do_fork = 0; + for(i = 0; all_ns[i].name ; i++) + if(all_ns[i].fd != -1) { + + // CLONE_NEWPID requires a fork() to become effective + if(all_ns[i].nstype == CLONE_NEWPID && all_ns[i].status) + do_fork = 1; + + close(all_ns[i].fd); + } + + if(do_fork) + continue_as_child(); + + return 0; + +#else + + errno = ENOSYS; + error("setns() is missing on this system."); + return 1; + +#endif +} + +pid_t read_pid_from_cgroup_file(const char *filename) { + int fd = open(filename, procfile_open_flags); + if(fd == -1) { + error("Cannot open pid_from_cgroup() file '%s'.", filename); + return 0; + } + + FILE *fp = fdopen(fd, "r"); + if(!fp) { + error("Cannot upgrade fd to fp for file '%s'.", filename); + return 0; + } + + char buffer[100 + 1]; + pid_t pid = 0; + char *s; + while((s = fgets(buffer, 100, fp))) { + buffer[100] = '\0'; + pid = atoi(s); + if(pid > 0) break; + } + + fclose(fp); + return pid; +} + +pid_t read_pid_from_cgroup_files(const char *path) { + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s/cgroup.procs", path); + pid_t pid = read_pid_from_cgroup_file(filename); + if(pid > 0) return pid; + + snprintfz(filename, FILENAME_MAX, "%s/tasks", path); + return read_pid_from_cgroup_file(filename); +} + +pid_t read_pid_from_cgroup(const char *path) { + pid_t pid = read_pid_from_cgroup_files(path); + if (pid > 0) return pid; + + DIR *dir = opendir(path); + if (!dir) { + error("cannot read directory '%s'", path); + return 0; + } + + struct dirent *de = NULL; + while ((de = readdir(dir))) { + if (de->d_type == DT_DIR + && ( + (de->d_name[0] == '.' && de->d_name[1] == '\0') + || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + )) + continue; + + if (de->d_type == DT_DIR) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/%s", path, de->d_name); + pid = read_pid_from_cgroup(filename); + if(pid > 0) break; + } + } + closedir(dir); + return pid; +} + +// ---------------------------------------------------------------------------- +// send the result to netdata + +struct found_device { + const char *host_device; + const char *guest_device; + + uint32_t host_device_hash; + + struct found_device *next; +} *detected_devices = NULL; + +void add_device(const char *host, const char *guest) { + uint32_t hash = simple_hash(host); + + if(guest && (!*guest || strcmp(host, guest) == 0)) + guest = NULL; + + struct found_device *f; + for(f = detected_devices; f ; f = f->next) { + if(f->host_device_hash == hash && strcmp(host, f->host_device) == 0) { + + if(guest && !f->guest_device) + f->guest_device = strdupz(guest); + + return; + } + } + + f = mallocz(sizeof(struct found_device)); + f->host_device = strdupz(host); + f->host_device_hash = hash; + f->guest_device = (guest)?strdupz(guest):NULL; + f->next = detected_devices; + detected_devices = f; +} + +int send_devices(void) { + int found = 0; + + struct found_device *f; + for(f = detected_devices; f ; f = f->next) { + found++; + printf("%s %s\n", f->host_device, (f->guest_device)?f->guest_device:f->host_device); + } + + return found; +} + +// ---------------------------------------------------------------------------- +// this function should be called only **ONCE** +// also it has to be the **LAST** to be called +// since it switches namespaces, so after this call, everything is different! + +void detect_veth_interfaces(pid_t pid) { + struct iface *host = NULL, *cgroup = NULL, *h, *c; + + host = read_proc_net_dev(netdata_configured_host_prefix); + if(!host) { + errno = 0; + error("cannot read host interface list."); + goto cleanup; + } + + if(!eligible_ifaces(host)) { + errno = 0; + error("there are no double-linked host interfaces available."); + goto cleanup; + } + + if(switch_namespace(netdata_configured_host_prefix, pid)) { + errno = 0; + error("cannot switch to the namespace of pid %u", (unsigned int) pid); + goto cleanup; + } + + cgroup = read_proc_net_dev(NULL); + if(!cgroup) { + errno = 0; + error("cannot read cgroup interface list."); + goto cleanup; + } + + if(!eligible_ifaces(cgroup)) { + errno = 0; + error("there are not double-linked cgroup interfaces available."); + goto cleanup; + } + + for(h = host; h ; h = h->next) { + if(iface_is_eligible(h)) { + for (c = cgroup; c; c = c->next) { + if(iface_is_eligible(c) && h->ifindex == c->iflink && h->iflink == c->ifindex) { + add_device(h->device, c->device); + } + } + } + } + +cleanup: + free_host_ifaces(cgroup); + free_host_ifaces(host); +} + +// ---------------------------------------------------------------------------- +// call the external helper + +#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048 +void call_the_helper(pid_t pid, const char *cgroup) { + if(setresuid(0, 0, 0) == -1) + error("setresuid(0, 0, 0) failed."); + + char command[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + if(cgroup) + snprintfz(command, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec " PLUGINS_DIR "/cgroup-network-helper.sh --cgroup '%s'", cgroup); + else + snprintfz(command, CGROUP_NETWORK_INTERFACE_MAX_LINE, "exec " PLUGINS_DIR "/cgroup-network-helper.sh --pid %d", pid); + + info("running: %s", command); + + pid_t cgroup_pid; + FILE *fp = mypopene(command, &cgroup_pid, environment); + if(fp) { + char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; + char *s; + while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp))) { + trim(s); + + if(*s && *s != '\n') { + char *t = s; + while(*t && *t != ' ') t++; + if(*t == ' ') { + *t = '\0'; + t++; + } + + if(!*s || !*t) continue; + add_device(s, t); + } + } + + mypclose(fp, cgroup_pid); + } + else + error("cannot execute cgroup-network helper script: %s", command); +} + +int is_valid_path_symbol(char c) { + switch(c) { + case '/': // path separators + case '\\': // needed for virsh domains \x2d1\x2dname + case ' ': // space + case '-': // hyphen + case '_': // underscore + case '.': // dot + case ',': // comma + return 1; + + default: + return 0; + } +} + +// we will pass this path a shell script running as root +// so, we need to make sure the path will be valid +// and will not include anything that could allow +// the caller use shell expansion for gaining escalated +// privileges. +int verify_path(const char *path) { + struct stat sb; + + char c; + const char *s = path; + while((c = *s++)) { + if(!( isalnum(c) || is_valid_path_symbol(c) )) { + error("invalid character in path '%s'", path); + return -1; + } + } + + if(strstr(path, "\\") && !strstr(path, "\\x")) { + error("invalid escape sequence in path '%s'", path); + return 1; + } + + if(strstr(path, "/../")) { + error("invalid parent path sequence detected in '%s'", path); + return 1; + } + + if(path[0] != '/') { + error("only absolute path names are supported - invalid path '%s'", path); + return -1; + } + + if (stat(path, &sb) == -1) { + error("cannot stat() path '%s'", path); + return -1; + } + + if((sb.st_mode & S_IFMT) != S_IFDIR) { + error("path '%s' is not a directory", path); + return -1; + } + + return 0; +} + +/* +char *fix_path_variable(void) { + const char *path = getenv("PATH"); + if(!path || !*path) return 0; + + char *p = strdupz(path); + char *safe_path = callocz(1, strlen(p) + strlen("PATH=") + 1); + strcpy(safe_path, "PATH="); + + int added = 0; + char *ptr = p; + while(ptr && *ptr) { + char *s = strsep(&ptr, ":"); + if(s && *s) { + if(verify_path(s) == -1) { + error("the PATH variable includes an invalid path '%s' - removed it.", s); + } + else { + info("the PATH variable includes a valid path '%s'.", s); + if(added) strcat(safe_path, ":"); + strcat(safe_path, s); + added++; + } + } + } + + info("unsafe PATH: '%s'.", path); + info(" safe PATH: '%s'.", safe_path); + + freez(p); + return safe_path; +} +*/ + +// ---------------------------------------------------------------------------- +// main + +void usage(void) { + fprintf(stderr, "%s [ -p PID | --pid PID | --cgroup /path/to/cgroup ]\n", program_name); + exit(1); +} + +int main(int argc, char **argv) { + pid_t pid = 0; + + program_name = argv[0]; + program_version = VERSION; + error_log_syslog = 0; + + // since cgroup-network runs as root, prevent it from opening symbolic links + procfile_open_flags = O_RDONLY|O_NOFOLLOW; + + // ------------------------------------------------------------------------ + // make sure NETDATA_HOST_PREFIX is safe + + netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); + if(verify_netdata_host_prefix() == -1) exit(1); + + if(netdata_configured_host_prefix[0] != '\0' && verify_path(netdata_configured_host_prefix) == -1) + fatal("invalid NETDATA_HOST_PREFIX '%s'", netdata_configured_host_prefix); + + // ------------------------------------------------------------------------ + // build a safe environment for our script + + // the first environment variable is a fixed PATH= + snprintfz(environment_variable2, sizeof(environment_variable2) - 1, "NETDATA_HOST_PREFIX=%s", netdata_configured_host_prefix); + + // ------------------------------------------------------------------------ + + if(argc == 2 && (!strcmp(argv[1], "version") || !strcmp(argv[1], "-version") || !strcmp(argv[1], "--version") || !strcmp(argv[1], "-v") || !strcmp(argv[1], "-V"))) { + fprintf(stderr, "cgroup-network %s\n", VERSION); + exit(0); + } + + if(argc != 3) + usage(); + + if(!strcmp(argv[1], "-p") || !strcmp(argv[1], "--pid")) { + pid = atoi(argv[2]); + + if(pid <= 0) { + errno = 0; + error("Invalid pid %d given", (int) pid); + return 2; + } + + call_the_helper(pid, NULL); + } + else if(!strcmp(argv[1], "--cgroup")) { + char *cgroup = argv[2]; + if(verify_path(cgroup) == -1) + fatal("cgroup '%s' does not exist or is not valid.", cgroup); + + pid = read_pid_from_cgroup(cgroup); + call_the_helper(pid, cgroup); + + if(pid <= 0 && !detected_devices) { + errno = 0; + error("Cannot find a cgroup PID from cgroup '%s'", cgroup); + } + } + else + usage(); + + if(pid > 0) + detect_veth_interfaces(pid); + + int found = send_devices(); + if(found <= 0) return 1; + return 0; +} diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c new file mode 100644 index 0000000000..9c0fd7f43f --- /dev/null +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -0,0 +1,2771 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sys_fs_cgroup.h" + +#define PLUGIN_CGROUPS_NAME "cgroups.plugin" +#define PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME "systemd" +#define PLUGIN_CGROUPS_MODULE_CGROUPS_NAME "/sys/fs/cgroup" + +// ---------------------------------------------------------------------------- +// cgroup globals + +static long system_page_size = 4096; // system will be queried via sysconf() in configuration() + +static int cgroup_enable_cpuacct_stat = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_memory = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_detailed_memory = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_swap = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_io = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO; + +static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES; +static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; +static int cgroup_used_memory_without_cache = CONFIG_BOOLEAN_YES; + +static int cgroup_search_in_devices = 1; + +static int cgroup_enable_new_cgroups_detected_at_runtime = 1; +static int cgroup_check_for_new_every = 10; +static int cgroup_update_every = 1; + +static int cgroup_recheck_zero_blkio_every_iterations = 10; +static int cgroup_recheck_zero_mem_failcnt_every_iterations = 10; +static int cgroup_recheck_zero_mem_detailed_every_iterations = 10; + +static char *cgroup_cpuacct_base = NULL; +static char *cgroup_blkio_base = NULL; +static char *cgroup_memory_base = NULL; +static char *cgroup_devices_base = NULL; + +static int cgroup_root_count = 0; +static int cgroup_root_max = 1000; +static int cgroup_max_depth = 0; + +static SIMPLE_PATTERN *enabled_cgroup_patterns = NULL; +static SIMPLE_PATTERN *enabled_cgroup_paths = NULL; +static SIMPLE_PATTERN *enabled_cgroup_renames = NULL; +static SIMPLE_PATTERN *systemd_services_cgroups = NULL; + +static char *cgroups_rename_script = NULL; +static char *cgroups_network_interface_script = NULL; + +static int cgroups_check = 0; + +static uint32_t Read_hash = 0; +static uint32_t Write_hash = 0; +static uint32_t user_hash = 0; +static uint32_t system_hash = 0; + +void read_cgroup_plugin_configuration() { + system_page_size = sysconf(_SC_PAGESIZE); + + Read_hash = simple_hash("Read"); + Write_hash = simple_hash("Write"); + user_hash = simple_hash("user"); + system_hash = simple_hash("system"); + + cgroup_update_every = (int)config_get_number("plugin:cgroups", "update every", localhost->rrd_update_every); + if(cgroup_update_every < localhost->rrd_update_every) + cgroup_update_every = localhost->rrd_update_every; + + cgroup_check_for_new_every = (int)config_get_number("plugin:cgroups", "check for new cgroups every", (long long)cgroup_check_for_new_every * (long long)cgroup_update_every); + if(cgroup_check_for_new_every < cgroup_update_every) + cgroup_check_for_new_every = cgroup_update_every; + + cgroup_enable_cpuacct_stat = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct stat (total CPU)", cgroup_enable_cpuacct_stat); + cgroup_enable_cpuacct_usage = config_get_boolean_ondemand("plugin:cgroups", "enable cpuacct usage (per core CPU)", cgroup_enable_cpuacct_usage); + + cgroup_enable_memory = config_get_boolean_ondemand("plugin:cgroups", "enable memory (used mem including cache)", cgroup_enable_memory); + cgroup_enable_detailed_memory = config_get_boolean_ondemand("plugin:cgroups", "enable detailed memory", cgroup_enable_detailed_memory); + cgroup_enable_memory_failcnt = config_get_boolean_ondemand("plugin:cgroups", "enable memory limits fail count", cgroup_enable_memory_failcnt); + cgroup_enable_swap = config_get_boolean_ondemand("plugin:cgroups", "enable swap memory", cgroup_enable_swap); + + cgroup_enable_blkio_io = config_get_boolean_ondemand("plugin:cgroups", "enable blkio bandwidth", cgroup_enable_blkio_io); + cgroup_enable_blkio_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio operations", cgroup_enable_blkio_ops); + cgroup_enable_blkio_throttle_io = config_get_boolean_ondemand("plugin:cgroups", "enable blkio throttle bandwidth", cgroup_enable_blkio_throttle_io); + cgroup_enable_blkio_throttle_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio throttle operations", cgroup_enable_blkio_throttle_ops); + cgroup_enable_blkio_queued_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio queued operations", cgroup_enable_blkio_queued_ops); + cgroup_enable_blkio_merged_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio merged operations", cgroup_enable_blkio_merged_ops); + + cgroup_recheck_zero_blkio_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero blkio every iterations", cgroup_recheck_zero_blkio_every_iterations); + cgroup_recheck_zero_mem_failcnt_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero memory failcnt every iterations", cgroup_recheck_zero_mem_failcnt_every_iterations); + cgroup_recheck_zero_mem_detailed_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero detailed memory every iterations", cgroup_recheck_zero_mem_detailed_every_iterations); + + cgroup_enable_systemd_services = config_get_boolean("plugin:cgroups", "enable systemd services", cgroup_enable_systemd_services); + cgroup_enable_systemd_services_detailed_memory = config_get_boolean("plugin:cgroups", "enable systemd services detailed memory", cgroup_enable_systemd_services_detailed_memory); + cgroup_used_memory_without_cache = config_get_boolean("plugin:cgroups", "report used memory without cache", cgroup_used_memory_without_cache); + + char filename[FILENAME_MAX + 1], *s; + struct mountinfo *mi, *root = mountinfo_read(0); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct"); + if(!mi) { + error("CGROUP: cannot find cpuacct mountinfo. Assuming default: /sys/fs/cgroup/cpuacct"); + s = "/sys/fs/cgroup/cpuacct"; + } + else s = mi->mount_point; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, s); + cgroup_cpuacct_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuacct", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "blkio"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio"); + if(!mi) { + error("CGROUP: cannot find blkio mountinfo. Assuming default: /sys/fs/cgroup/blkio"); + s = "/sys/fs/cgroup/blkio"; + } + else s = mi->mount_point; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, s); + cgroup_blkio_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/blkio", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "memory"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory"); + if(!mi) { + error("CGROUP: cannot find memory mountinfo. Assuming default: /sys/fs/cgroup/memory"); + s = "/sys/fs/cgroup/memory"; + } + else s = mi->mount_point; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, s); + cgroup_memory_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/memory", filename); + + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "devices"); + if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices"); + if(!mi) { + error("CGROUP: cannot find devices mountinfo. Assuming default: /sys/fs/cgroup/devices"); + s = "/sys/fs/cgroup/devices"; + } + else s = mi->mount_point; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, s); + cgroup_devices_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/devices", filename); + + cgroup_root_max = (int)config_get_number("plugin:cgroups", "max cgroups to allow", cgroup_root_max); + cgroup_max_depth = (int)config_get_number("plugin:cgroups", "max cgroups depth to monitor", cgroup_max_depth); + + cgroup_enable_new_cgroups_detected_at_runtime = config_get_boolean("plugin:cgroups", "enable new cgroups detected at run time", cgroup_enable_new_cgroups_detected_at_runtime); + + enabled_cgroup_patterns = simple_pattern_create( + config_get("plugin:cgroups", "enable by default cgroups matching", + // ---------------------------------------------------------------- + + " !*/init.scope " // ignore init.scope + " !/system.slice/run-*.scope " // ignore system.slice/run-XXXX.scope + " *.scope " // we need all other *.scope for sure + + // ---------------------------------------------------------------- + + " /machine.slice/*.service " // #3367 systemd-nspawn + + // ---------------------------------------------------------------- + + " !*/vcpu* " // libvirtd adds these sub-cgroups + " !*/emulator " // libvirtd adds these sub-cgroups + " !*.mount " + " !*.partition " + " !*.service " + " !*.socket " + " !*.slice " + " !*.swap " + " !*.user " + " !/ " + " !/docker " + " !/libvirt " + " !/lxc " + " !/lxc/*/* " // #1397 #2649 + " !/machine " + " !/qemu " + " !/system " + " !/systemd " + " !/user " + " * " // enable anything else + ), NULL, SIMPLE_PATTERN_EXACT); + + enabled_cgroup_paths = simple_pattern_create( + config_get("plugin:cgroups", "search for cgroups in subpaths matching", + " !*/init.scope " // ignore init.scope + " !*-qemu " // #345 + " !*.libvirt-qemu " // #3010 + " !/init.scope " + " !/system " + " !/systemd " + " !/user " + " !/user.slice " + " !/lxc/*/* " // #2161 #2649 + " * " + ), NULL, SIMPLE_PATTERN_EXACT); + + snprintfz(filename, FILENAME_MAX, "%s/cgroup-name.sh", netdata_configured_plugins_dir); + cgroups_rename_script = config_get("plugin:cgroups", "script to get cgroup names", filename); + + snprintfz(filename, FILENAME_MAX, "%s/cgroup-network", netdata_configured_plugins_dir); + cgroups_network_interface_script = config_get("plugin:cgroups", "script to get cgroup network interfaces", filename); + + enabled_cgroup_renames = simple_pattern_create( + config_get("plugin:cgroups", "run script to rename cgroups matching", + " !/ " + " !*.mount " + " !*.socket " + " !*.partition " + " /machine.slice/*.service " // #3367 systemd-nspawn + " !*.service " + " !*.slice " + " !*.swap " + " !*.user " + " !init.scope " + " !*.scope/vcpu* " // libvirtd adds these sub-cgroups + " !*.scope/emulator " // libvirtd adds these sub-cgroups + " *.scope " + " *docker* " + " *lxc* " + " *qemu* " + " *kubepods* " // #3396 kubernetes + " *.libvirt-qemu " // #3010 + " * " + ), NULL, SIMPLE_PATTERN_EXACT); + + if(cgroup_enable_systemd_services) { + systemd_services_cgroups = simple_pattern_create( + config_get("plugin:cgroups", "cgroups to match as systemd services", + " !/system.slice/*/*.service " + " /system.slice/*.service " + ), NULL, SIMPLE_PATTERN_EXACT); + } + + mountinfo_free_all(root); +} + +// ---------------------------------------------------------------------------- +// cgroup objects + +struct blkio { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int delay_counter; + + char *filename; + + unsigned long long Read; + unsigned long long Write; +/* + unsigned long long Sync; + unsigned long long Async; + unsigned long long Total; +*/ +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt +struct memory { + ARL_BASE *arl_base; + ARL_ENTRY *arl_dirty; + ARL_ENTRY *arl_swap; + + int updated_detailed; + int updated_usage_in_bytes; + int updated_msw_usage_in_bytes; + int updated_failcnt; + + int enabled_detailed; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_msw_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + int enabled_failcnt; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + int delay_counter_detailed; + int delay_counter_failcnt; + + char *filename_detailed; + char *filename_usage_in_bytes; + char *filename_msw_usage_in_bytes; + char *filename_failcnt; + + int detailed_has_dirty; + int detailed_has_swap; + + // detailed metrics + unsigned long long cache; + unsigned long long rss; + unsigned long long rss_huge; + unsigned long long mapped_file; + unsigned long long writeback; + unsigned long long dirty; + unsigned long long swap; + unsigned long long pgpgin; + unsigned long long pgpgout; + unsigned long long pgfault; + unsigned long long pgmajfault; +/* + unsigned long long inactive_anon; + unsigned long long active_anon; + unsigned long long inactive_file; + unsigned long long active_file; + unsigned long long unevictable; + unsigned long long hierarchical_memory_limit; + unsigned long long total_cache; + unsigned long long total_rss; + unsigned long long total_rss_huge; + unsigned long long total_mapped_file; + unsigned long long total_writeback; + unsigned long long total_dirty; + unsigned long long total_swap; + unsigned long long total_pgpgin; + unsigned long long total_pgpgout; + unsigned long long total_pgfault; + unsigned long long total_pgmajfault; + unsigned long long total_inactive_anon; + unsigned long long total_active_anon; + unsigned long long total_inactive_file; + unsigned long long total_active_file; + unsigned long long total_unevictable; +*/ + + // single file metrics + unsigned long long usage_in_bytes; + unsigned long long msw_usage_in_bytes; + unsigned long long failcnt; +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt +struct cpuacct_stat { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned long long user; + unsigned long long system; +}; + +// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt +struct cpuacct_usage { + int updated; + int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO + + char *filename; + + unsigned int cpus; + unsigned long long *cpu_percpu; +}; + +struct cgroup_network_interface { + const char *host_device; + const char *container_device; + struct cgroup_network_interface *next; +}; + +#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 +#define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002 + +struct cgroup { + uint32_t options; + + char available; // found in the filesystem + char enabled; // enabled in the config + + char *id; + uint32_t hash; + + char *chart_id; + uint32_t hash_chart; + + char *chart_title; + + struct cpuacct_stat cpuacct_stat; + struct cpuacct_usage cpuacct_usage; + + struct memory memory; + + struct blkio io_service_bytes; // bytes + struct blkio io_serviced; // operations + + struct blkio throttle_io_service_bytes; // bytes + struct blkio throttle_io_serviced; // operations + + struct blkio io_merged; // operations + struct blkio io_queued; // operations + + struct cgroup_network_interface *interfaces; + + // per cgroup charts + RRDSET *st_cpu; + RRDSET *st_cpu_per_core; + RRDSET *st_mem; + RRDSET *st_writeback; + RRDSET *st_mem_activity; + RRDSET *st_pgfaults; + RRDSET *st_mem_usage; + RRDSET *st_mem_failcnt; + RRDSET *st_io; + RRDSET *st_serviced_ops; + RRDSET *st_throttle_io; + RRDSET *st_throttle_serviced_ops; + RRDSET *st_queued_ops; + RRDSET *st_merged_ops; + + // services + RRDDIM *rd_cpu; + RRDDIM *rd_mem_usage; + RRDDIM *rd_mem_failcnt; + RRDDIM *rd_swap_usage; + + RRDDIM *rd_mem_detailed_cache; + RRDDIM *rd_mem_detailed_rss; + RRDDIM *rd_mem_detailed_mapped; + RRDDIM *rd_mem_detailed_writeback; + RRDDIM *rd_mem_detailed_pgpgin; + RRDDIM *rd_mem_detailed_pgpgout; + RRDDIM *rd_mem_detailed_pgfault; + RRDDIM *rd_mem_detailed_pgmajfault; + + RRDDIM *rd_io_service_bytes_read; + RRDDIM *rd_io_serviced_read; + RRDDIM *rd_throttle_io_read; + RRDDIM *rd_throttle_io_serviced_read; + RRDDIM *rd_io_queued_read; + RRDDIM *rd_io_merged_read; + + RRDDIM *rd_io_service_bytes_write; + RRDDIM *rd_io_serviced_write; + RRDDIM *rd_throttle_io_write; + RRDDIM *rd_throttle_io_serviced_write; + RRDDIM *rd_io_queued_write; + RRDDIM *rd_io_merged_write; + + struct cgroup *next; + +} *cgroup_root = NULL; + +// ---------------------------------------------------------------------------- +// read values from /sys + +static inline void cgroup_read_cpuacct_stat(struct cpuacct_stat *cp) { + static procfile *ff = NULL; + + if(likely(cp->filename)) { + ff = procfile_reopen(ff, cp->filename, NULL, PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + cp->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long i, lines = procfile_lines(ff); + + if(unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1+ lines.", cp->filename); + cp->updated = 0; + return; + } + + for(i = 0; i < lines ; i++) { + char *s = procfile_lineword(ff, i, 0); + uint32_t hash = simple_hash(s); + + if(unlikely(hash == user_hash && !strcmp(s, "user"))) + cp->user = str2ull(procfile_lineword(ff, i, 1)); + + else if(unlikely(hash == system_hash && !strcmp(s, "system"))) + cp->system = str2ull(procfile_lineword(ff, i, 1)); + } + + cp->updated = 1; + + if(unlikely(cp->enabled == CONFIG_BOOLEAN_AUTO && (cp->user || cp->system))) + cp->enabled = CONFIG_BOOLEAN_YES; + } +} + +static inline void cgroup_read_cpuacct_usage(struct cpuacct_usage *ca) { + static procfile *ff = NULL; + + if(likely(ca->filename)) { + ff = procfile_reopen(ff, ca->filename, NULL, PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + ca->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + ca->updated = 0; + cgroups_check = 1; + return; + } + + if(unlikely(procfile_lines(ff) < 1)) { + error("CGROUP: file '%s' should have 1+ lines but has %zu.", ca->filename, procfile_lines(ff)); + ca->updated = 0; + return; + } + + unsigned long i = procfile_linewords(ff, 0); + if(unlikely(i == 0)) { + ca->updated = 0; + return; + } + + // we may have 1 more CPU reported + while(i > 0) { + char *s = procfile_lineword(ff, 0, i - 1); + if(!*s) i--; + else break; + } + + if(unlikely(i != ca->cpus)) { + freez(ca->cpu_percpu); + ca->cpu_percpu = mallocz(sizeof(unsigned long long) * i); + ca->cpus = (unsigned int)i; + } + + unsigned long long total = 0; + for(i = 0; i < ca->cpus ;i++) { + unsigned long long n = str2ull(procfile_lineword(ff, 0, i)); + ca->cpu_percpu[i] = n; + total += n; + } + + ca->updated = 1; + + if(unlikely(ca->enabled == CONFIG_BOOLEAN_AUTO && total)) + ca->enabled = CONFIG_BOOLEAN_YES; + } +} + +static inline void cgroup_read_blkio(struct blkio *io) { + if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO && io->delay_counter > 0)) { + io->delay_counter--; + return; + } + + if(likely(io->filename)) { + static procfile *ff = NULL; + + ff = procfile_reopen(ff, io->filename, NULL, PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + io->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + io->updated = 0; + cgroups_check = 1; + return; + } + + unsigned long i, lines = procfile_lines(ff); + + if(unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1+ lines.", io->filename); + io->updated = 0; + return; + } + + io->Read = 0; + io->Write = 0; +/* + io->Sync = 0; + io->Async = 0; + io->Total = 0; +*/ + + for(i = 0; i < lines ; i++) { + char *s = procfile_lineword(ff, i, 1); + uint32_t hash = simple_hash(s); + + if(unlikely(hash == Read_hash && !strcmp(s, "Read"))) + io->Read += str2ull(procfile_lineword(ff, i, 2)); + + else if(unlikely(hash == Write_hash && !strcmp(s, "Write"))) + io->Write += str2ull(procfile_lineword(ff, i, 2)); + +/* + else if(unlikely(hash == Sync_hash && !strcmp(s, "Sync"))) + io->Sync += str2ull(procfile_lineword(ff, i, 2)); + + else if(unlikely(hash == Async_hash && !strcmp(s, "Async"))) + io->Async += str2ull(procfile_lineword(ff, i, 2)); + + else if(unlikely(hash == Total_hash && !strcmp(s, "Total"))) + io->Total += str2ull(procfile_lineword(ff, i, 2)); +*/ + } + + io->updated = 1; + + if(unlikely(io->enabled == CONFIG_BOOLEAN_AUTO)) { + if(unlikely(io->Read || io->Write)) + io->enabled = CONFIG_BOOLEAN_YES; + else + io->delay_counter = cgroup_recheck_zero_blkio_every_iterations; + } + } +} + +static inline void cgroup_read_memory(struct memory *mem) { + static procfile *ff = NULL; + + // read detailed ram usage + if(likely(mem->filename_detailed)) { + if(unlikely(mem->enabled_detailed == CONFIG_BOOLEAN_AUTO && mem->delay_counter_detailed > 0)) { + mem->delay_counter_detailed--; + goto memory_next; + } + + ff = procfile_reopen(ff, mem->filename_detailed, NULL, PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + mem->updated_detailed = 0; + cgroups_check = 1; + goto memory_next; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) { + mem->updated_detailed = 0; + cgroups_check = 1; + goto memory_next; + } + + unsigned long i, lines = procfile_lines(ff); + + if(unlikely(lines < 1)) { + error("CGROUP: file '%s' should have 1+ lines.", mem->filename_detailed); + mem->updated_detailed = 0; + goto memory_next; + } + + if(unlikely(!mem->arl_base)) { + mem->arl_base = arl_create("cgroup/memory", NULL, 60); + + arl_expect(mem->arl_base, "cache", &mem->cache); + arl_expect(mem->arl_base, "rss", &mem->rss); + arl_expect(mem->arl_base, "rss_huge", &mem->rss_huge); + arl_expect(mem->arl_base, "mapped_file", &mem->mapped_file); + arl_expect(mem->arl_base, "writeback", &mem->writeback); + mem->arl_dirty = arl_expect(mem->arl_base, "dirty", &mem->dirty); + mem->arl_swap = arl_expect(mem->arl_base, "swap", &mem->swap); + arl_expect(mem->arl_base, "pgpgin", &mem->pgpgin); + arl_expect(mem->arl_base, "pgpgout", &mem->pgpgout); + arl_expect(mem->arl_base, "pgfault", &mem->pgfault); + arl_expect(mem->arl_base, "pgmajfault", &mem->pgmajfault); + } + + arl_begin(mem->arl_base); + + for(i = 0; i < lines ; i++) { + if(arl_check(mem->arl_base, + procfile_lineword(ff, i, 0), + procfile_lineword(ff, i, 1))) break; + } + + if(unlikely(mem->arl_dirty->flags & ARL_ENTRY_FLAG_FOUND)) + mem->detailed_has_dirty = 1; + + if(unlikely(mem->arl_swap->flags & ARL_ENTRY_FLAG_FOUND)) + mem->detailed_has_swap = 1; + + // fprintf(stderr, "READ: '%s', cache: %llu, rss: %llu, rss_huge: %llu, mapped_file: %llu, writeback: %llu, dirty: %llu, swap: %llu, pgpgin: %llu, pgpgout: %llu, pgfault: %llu, pgmajfault: %llu, inactive_anon: %llu, active_anon: %llu, inactive_file: %llu, active_file: %llu, unevictable: %llu, hierarchical_memory_limit: %llu, total_cache: %llu, total_rss: %llu, total_rss_huge: %llu, total_mapped_file: %llu, total_writeback: %llu, total_dirty: %llu, total_swap: %llu, total_pgpgin: %llu, total_pgpgout: %llu, total_pgfault: %llu, total_pgmajfault: %llu, total_inactive_anon: %llu, total_active_anon: %llu, total_inactive_file: %llu, total_active_file: %llu, total_unevictable: %llu\n", mem->filename, mem->cache, mem->rss, mem->rss_huge, mem->mapped_file, mem->writeback, mem->dirty, mem->swap, mem->pgpgin, mem->pgpgout, mem->pgfault, mem->pgmajfault, mem->inactive_anon, mem->active_anon, mem->inactive_file, mem->active_file, mem->unevictable, mem->hierarchical_memory_limit, mem->total_cache, mem->total_rss, mem->total_rss_huge, mem->total_mapped_file, mem->total_writeback, mem->total_dirty, mem->total_swap, mem->total_pgpgin, mem->total_pgpgout, mem->total_pgfault, mem->total_pgmajfault, mem->total_inactive_anon, mem->total_active_anon, mem->total_inactive_file, mem->total_active_file, mem->total_unevictable); + + mem->updated_detailed = 1; + + if(unlikely(mem->enabled_detailed == CONFIG_BOOLEAN_AUTO)) { + if(mem->cache || mem->dirty || mem->rss || mem->rss_huge || mem->mapped_file || mem->writeback || mem->swap || mem->pgpgin || mem->pgpgout || mem->pgfault || mem->pgmajfault) + mem->enabled_detailed = CONFIG_BOOLEAN_YES; + else + mem->delay_counter_detailed = cgroup_recheck_zero_mem_detailed_every_iterations; + } + } + +memory_next: + + // read usage_in_bytes + if(likely(mem->filename_usage_i