summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Mashchenko <ilya@netdata.cloud>2023-01-16 15:02:39 +0200
committerGitHub <noreply@github.com>2023-01-16 15:02:39 +0200
commita14a21f90f7f38ff2b71eb3bc0b442d240c09fd6 (patch)
tree18c796b007e858fc200a03a46caf1d4c9e305105
parent7d0ca0b83e96fdebed5e66b73362ece4fa0ad62e (diff)
fix(pacakging): fix cpu/memory metrics when running inside LXC container as systemd service (#14255)
Fixes https://github.com/netdata/netdata/issues/14238
-rw-r--r--collectors/proc.plugin/plugin_proc.c37
-rw-r--r--collectors/proc.plugin/plugin_proc.h1
-rw-r--r--collectors/proc.plugin/proc_meminfo.c8
-rwxr-xr-xdaemon/system-info.sh5
-rw-r--r--system/netdata.service.in4
5 files changed, 51 insertions, 4 deletions
diff --git a/collectors/proc.plugin/plugin_proc.c b/collectors/proc.plugin/plugin_proc.c
index 220f9cb3db..ab974d4977 100644
--- a/collectors/proc.plugin/plugin_proc.c
+++ b/collectors/proc.plugin/plugin_proc.c
@@ -98,6 +98,41 @@ static void proc_main_cleanup(void *ptr)
worker_unregister();
}
+bool inside_lxc_container = false;
+
+static bool is_lxcfs_proc_mounted() {
+ procfile *ff = NULL;
+
+ if (unlikely(!ff)) {
+ char filename[FILENAME_MAX + 1];
+ snprintfz(filename, FILENAME_MAX, "/proc/self/mounts");
+ ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT);
+ if (unlikely(!ff))
+ return false;
+ }
+
+ ff = procfile_readall(ff);
+ if (unlikely(!ff))
+ return false;
+
+ unsigned long l, lines = procfile_lines(ff);
+
+ for (l = 0; l < lines; l++) {
+ size_t words = procfile_linewords(ff, l);
+ if (words < 2) {
+ continue;
+ }
+ if (!strcmp(procfile_lineword(ff, l, 0), "lxcfs") && !strncmp(procfile_lineword(ff, l, 1), "/proc", 5)) {
+ procfile_close(ff);
+ return true;
+ }
+ }
+
+ procfile_close(ff);
+
+ return false;
+}
+
void *proc_main(void *ptr)
{
worker_register("PROC");
@@ -128,6 +163,8 @@ void *proc_main(void *ptr)
heartbeat_t hb;
heartbeat_init(&hb);
+ inside_lxc_container = is_lxcfs_proc_mounted();
+
while (service_running(SERVICE_COLLECTORS)) {
worker_is_idle();
usec_t hb_dt = heartbeat_next(&hb, step);
diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h
index d67ccd6e59..b4fd7b83d4 100644
--- a/collectors/proc.plugin/plugin_proc.h
+++ b/collectors/proc.plugin/plugin_proc.h
@@ -48,6 +48,7 @@ int get_numa_node_count(void);
// metrics that need to be shared among data collectors
extern unsigned long long zfs_arcstats_shrinkable_cache_size_bytes;
+extern bool inside_lxc_container;
// netdev renames
void netdev_rename_device_add(
diff --git a/collectors/proc.plugin/proc_meminfo.c b/collectors/proc.plugin/proc_meminfo.c
index 2f390c6535..6988c70e01 100644
--- a/collectors/proc.plugin/proc_meminfo.c
+++ b/collectors/proc.plugin/proc_meminfo.c
@@ -158,9 +158,11 @@ int do_proc_meminfo(int update_every, usec_t dt) {
unsigned long long MemCached = Cached + SReclaimable - Shmem;
unsigned long long MemUsed = MemTotal - MemFree - MemCached - Buffers;
// The Linux kernel doesn't report ZFS ARC usage as cache memory (the ARC is included in the total used system memory)
- MemCached += (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
- MemUsed -= (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
- MemAvailable += (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
+ if (!inside_lxc_container) {
+ MemCached += (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
+ MemUsed -= (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
+ MemAvailable += (zfs_arcstats_shrinkable_cache_size_bytes / 1024);
+ }
if(do_ram) {
{
diff --git a/daemon/system-info.sh b/daemon/system-info.sh
index 68cdc48122..1e334a3d13 100755
--- a/daemon/system-info.sh
+++ b/daemon/system-info.sh
@@ -217,6 +217,9 @@ if [ -n "${lscpu}" ] && lscpu > /dev/null 2>&1; then
LCPU_COUNT="$(echo "${lscpu_output}" | grep "^CPU(s):" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
CPU_VENDOR="$(echo "${lscpu_output}" | grep "^Vendor ID:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
CPU_MODEL="$(echo "${lscpu_output}" | grep "^Model name:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+ if grep -q "^lxcfs /proc" /proc/self/mounts 2>/dev/null && count=$(grep -c ^processor /proc/cpuinfo 2>/dev/null); then
+ LCPU_COUNT="$count"
+ fi
possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU max MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')"
if [ -z "$possible_cpu_freq" ]; then
possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')"
@@ -437,7 +440,7 @@ CLOUD_INSTANCE_TYPE="unknown"
CLOUD_INSTANCE_REGION="unknown"
if [ "${VIRTUALIZATION}" != "none" ] && command -v curl > /dev/null 2>&1; then
- # Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404.
+ # Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404.
curl --fail -s -m 1 --noproxy "*" http://169.254.169.254 >/dev/null 2>&1
ret=$?
# anything but operation timeout.
diff --git a/system/netdata.service.in b/system/netdata.service.in
index 7af84e63c8..25d95b2b81 100644
--- a/system/netdata.service.in
+++ b/system/netdata.service.in
@@ -71,6 +71,10 @@ ProtectControlGroups=on
ReadWriteDirectories=/run/netdata
# This is needed to make email-based alert deliver work if Postfix is the email provider on the system.
ReadWriteDirectories=-/var/spool/postfix/maildrop
+# LXCFS directories (https://github.com/lxc/lxcfs#lxcfs)
+# If we don't set them explicitly, systemd mounts procfs from the host. See https://github.com/netdata/netdata/issues/14238.
+BindReadOnlyPaths=-/proc/cpuinfo -/proc/diskstats -/proc/loadavg -/proc/meminfo
+BindReadOnlyPaths=-/proc/stat -/proc/swaps -/proc/uptime -/proc/slabinfo
[Install]
WantedBy=multi-user.target