summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorthiagoftsm <thiagoftsm@gmail.com>2023-04-11 12:01:07 +0000
committerGitHub <noreply@github.com>2023-04-11 12:01:07 +0000
commit77a0551f92c3a7b921fc276a5c39de4badd2cecd (patch)
treecf4261ae8fb765ded2256ad2e882993a41fe84e9
parente2aa89a9bbae30e6438eaeb6731222f3021437e7 (diff)
eBPF bug fixes (#14869)
-rw-r--r--collectors/ebpf.plugin/ebpf.c3
-rw-r--r--collectors/ebpf.plugin/ebpf_hardirq.c179
-rw-r--r--collectors/ebpf.plugin/ebpf_hardirq.h12
-rw-r--r--collectors/ebpf.plugin/ebpf_oomkill.c2
-rw-r--r--libnetdata/ebpf/ebpf.c12
-rwxr-xr-xnetdata-installer.sh5
-rw-r--r--packaging/ebpf-co-re.checksums2
-rw-r--r--packaging/ebpf-co-re.version2
-rw-r--r--packaging/ebpf.checksums6
-rw-r--r--packaging/ebpf.version2
10 files changed, 172 insertions, 53 deletions
diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c
index 44e072b24b..3468bb1485 100644
--- a/collectors/ebpf.plugin/ebpf.c
+++ b/collectors/ebpf.plugin/ebpf.c
@@ -1820,8 +1820,9 @@ void set_global_variables()
ebpf_configured_log_dir = LOG_DIR;
ebpf_nprocs = (int)sysconf(_SC_NPROCESSORS_ONLN);
- if (ebpf_nprocs > NETDATA_MAX_PROCESSOR) {
+ if (ebpf_nprocs < 0) {
ebpf_nprocs = NETDATA_MAX_PROCESSOR;
+ error("Cannot identify number of process, using default value %d", ebpf_nprocs);
}
isrh = get_redhat_release();
diff --git a/collectors/ebpf.plugin/ebpf_hardirq.c b/collectors/ebpf.plugin/ebpf_hardirq.c
index 1b1ea8ab4c..ff98704dc1 100644
--- a/collectors/ebpf.plugin/ebpf_hardirq.c
+++ b/collectors/ebpf.plugin/ebpf_hardirq.c
@@ -129,11 +129,54 @@ static hardirq_static_val_t hardirq_static_vals[] = {
// thread will write to netdata agent.
static avl_tree_lock hardirq_pub;
-// tmp store for dynamic hard IRQ values we get from a per-CPU eBPF map.
-static hardirq_ebpf_val_t *hardirq_ebpf_vals = NULL;
+/*****************************************************************
+ *
+ * ARAL SECTION
+ *
+ *****************************************************************/
+
+// ARAL vectors used to speed up processing
+ARAL *ebpf_aral_hardirq = NULL;
-// tmp store for static hard IRQ values we get from a per-CPU eBPF map.
-static hardirq_ebpf_static_val_t *hardirq_ebpf_static_vals = NULL;
+/**
+ * eBPF hardirq Aral init
+ *
+ * Initiallize array allocator that will be used when integration with apps is enabled.
+ */
+static inline void ebpf_hardirq_aral_init()
+{
+ ebpf_aral_hardirq = ebpf_allocate_pid_aral(NETDATA_EBPF_HARDIRQ_ARAL_NAME, sizeof(hardirq_val_t));
+}
+
+/**
+ * eBPF hardirq get
+ *
+ * Get a hardirq_val_t entry to be used with a specific IRQ.
+ *
+ * @return it returns the address on success.
+ */
+hardirq_val_t *ebpf_hardirq_get(void)
+{
+ hardirq_val_t *target = aral_mallocz(ebpf_aral_hardirq);
+ memset(target, 0, sizeof(hardirq_val_t));
+ return target;
+}
+
+/**
+ * eBPF hardirq release
+ *
+ * @param stat Release a target after usage.
+ */
+void ebpf_hardirq_release(hardirq_val_t *stat)
+{
+ aral_freez(ebpf_aral_hardirq, stat);
+}
+
+/*****************************************************************
+ *
+ * EXIT FUNCTIONS
+ *
+ *****************************************************************/
/**
* Hardirq Free
@@ -151,9 +194,6 @@ static void ebpf_hardirq_free(ebpf_module_t *em)
for (int i = 0; hardirq_tracepoints[i].class != NULL; i++) {
ebpf_disable_tracepoint(&hardirq_tracepoints[i]);
}
- freez(hardirq_ebpf_vals);
- freez(hardirq_ebpf_static_vals);
-
pthread_mutex_lock(&ebpf_exit_cleanup);
em->thread->enabled = NETDATA_THREAD_EBPF_STOPPED;
pthread_mutex_unlock(&ebpf_exit_cleanup);
@@ -200,8 +240,82 @@ static int hardirq_val_cmp(void *a, void *b)
}
}
-static void hardirq_read_latency_map(int mapfd)
+/**
+ * Parse interrupts
+ *
+ * Parse /proc/interrupts to get names used in metrics
+ *
+ * @param irq_name vector to store data.
+ * @param irq irq value
+ *
+ * @return It returns 0 on success and -1 otherwise
+ */
+static int hardirq_parse_interrupts(char *irq_name, int irq)
{
+ static procfile *ff = NULL;
+ static int cpus = -1;
+ if(unlikely(!ff)) {
+ char filename[FILENAME_MAX + 1];
+ snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/interrupts");
+ ff = procfile_open(filename, " \t:", PROCFILE_FLAG_DEFAULT);
+ }
+ if(unlikely(!ff))
+ return -1;
+
+ ff = procfile_readall(ff);
+ if(unlikely(!ff))
+ return -1; // we return 0, so that we will retry to open it next time
+
+ size_t words = procfile_linewords(ff, 0);
+ if(unlikely(cpus == -1)) {
+ uint32_t w;
+ cpus = 0;
+ for(w = 0; w < words ; w++) {
+ if(likely(strncmp(procfile_lineword(ff, 0, w), "CPU", 3) == 0))
+ cpus++;
+ }
+ }
+
+ size_t lines = procfile_lines(ff), l;
+ if(unlikely(!lines)) {
+ collector_error("Cannot read /proc/interrupts, zero lines reported.");
+ return -1;
+ }
+
+ for(l = 1; l < lines ;l++) {
+ words = procfile_linewords(ff, l);
+ if(unlikely(!words)) continue;
+ const char *id = procfile_lineword(ff, l, 0);
+ if (!isdigit(id[0]))
+ continue;
+
+ int cmp = str2i(id);
+ if (cmp != irq)
+ continue;
+
+ if(unlikely((uint32_t)(cpus + 2) < words)) {
+ const char *name = procfile_lineword(ff, l, words - 1);
+ // On some motherboards IRQ can have the same name, so we append IRQ id to differentiate.
+ snprintfz(irq_name, NETDATA_HARDIRQ_NAME_LEN - 1, "%d_%s", irq, name);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Read Latency MAP
+ *
+ * Read data from kernel ring to user ring.
+ *
+ * @param mapfd hash map id.
+ *
+ * @return it returns 0 on success and -1 otherwise
+ */
+static int hardirq_read_latency_map(int mapfd)
+{
+ hardirq_ebpf_static_val_t hardirq_ebpf_vals[ebpf_nprocs + 1];
+
hardirq_ebpf_key_t key = {};
hardirq_ebpf_key_t next_key = {};
hardirq_val_t search_v = {};
@@ -234,7 +348,7 @@ static void hardirq_read_latency_map(int mapfd)
if (unlikely(v == NULL)) {
// latency/name can only be added reliably at a later time.
// when they're added, only then will we AVL insert.
- v = callocz(1, sizeof(hardirq_val_t));
+ v = ebpf_hardirq_get();
v->irq = key.irq;
v->dim_exists = false;
@@ -246,22 +360,10 @@ static void hardirq_read_latency_map(int mapfd)
// 2. the name is unfortunately *not* available on all CPU maps - only
// a single map contains the name, so we must find it. we only need
// to copy it though if the IRQ is new for us.
- bool name_saved = false;
uint64_t total_latency = 0;
int i;
- int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs;
- for (i = 0; i < end; i++) {
+ for (i = 0; i < ebpf_nprocs; i++) {
total_latency += hardirq_ebpf_vals[i].latency/1000;
-
- // copy name for new IRQs.
- if (v_is_new && !name_saved && hardirq_ebpf_vals[i].name[0] != '\0') {
- strncpyz(
- v->name,
- hardirq_ebpf_vals[i].name,
- NETDATA_HARDIRQ_NAME_LEN
- );
- name_saved = true;
- }
}
// can now safely publish latency for existing IRQs.
@@ -269,6 +371,11 @@ static void hardirq_read_latency_map(int mapfd)
// can now safely publish new IRQ.
if (v_is_new) {
+ if (hardirq_parse_interrupts(v->name, v->irq)) {
+ ebpf_hardirq_release(v);
+ return -1;
+ }
+
avl_t *check = avl_insert_lock(&hardirq_pub, (avl_t *)v);
if (check != (avl_t *)v) {
error("Internal error, cannot insert the AVL tree.");
@@ -277,10 +384,14 @@ static void hardirq_read_latency_map(int mapfd)
key = next_key;
}
+
+ return 0;
}
static void hardirq_read_latency_static_map(int mapfd)
{
+ hardirq_ebpf_static_val_t hardirq_ebpf_static_vals[ebpf_nprocs + 1];
+
uint32_t i;
for (i = 0; i < HARDIRQ_EBPF_STATIC_END; i++) {
uint32_t map_i = hardirq_static_vals[i].idx;
@@ -302,11 +413,17 @@ static void hardirq_read_latency_static_map(int mapfd)
/**
* Read eBPF maps for hard IRQ.
+ *
+ * @return When it is not possible to parse /proc, it returns -1, on success it returns 0;
*/
-static void hardirq_reader()
+static int hardirq_reader()
{
- hardirq_read_latency_map(hardirq_maps[HARDIRQ_MAP_LATENCY].map_fd);
+ if (hardirq_read_latency_map(hardirq_maps[HARDIRQ_MAP_LATENCY].map_fd))
+ return -1;
+
hardirq_read_latency_static_map(hardirq_maps[HARDIRQ_MAP_LATENCY_STATIC].map_fd);
+
+ return 0;
}
static void hardirq_create_charts(int update_every)
@@ -375,16 +492,8 @@ static inline void hardirq_write_static_dims()
*/
static void hardirq_collector(ebpf_module_t *em)
{
- hardirq_ebpf_vals = callocz(
- (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs,
- sizeof(hardirq_ebpf_val_t)
- );
- hardirq_ebpf_static_vals = callocz(
- (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs,
- sizeof(hardirq_ebpf_static_val_t)
- );
-
avl_init_lock(&hardirq_pub, hardirq_val_cmp);
+ ebpf_hardirq_aral_init();
// create chart and static dims.
pthread_mutex_lock(&lock);
@@ -407,7 +516,9 @@ static void hardirq_collector(ebpf_module_t *em)
continue;
counter = 0;
- hardirq_reader();
+ if (hardirq_reader())
+ break;
+
pthread_mutex_lock(&lock);
// write dims now for all hitherto discovered IRQs.
diff --git a/collectors/ebpf.plugin/ebpf_hardirq.h b/collectors/ebpf.plugin/ebpf_hardirq.h
index fe38b1bb12..52dea1e560 100644
--- a/collectors/ebpf.plugin/ebpf_hardirq.h
+++ b/collectors/ebpf.plugin/ebpf_hardirq.h
@@ -3,6 +3,9 @@
#ifndef NETDATA_EBPF_HARDIRQ_H
#define NETDATA_EBPF_HARDIRQ_H 1
+#include <stdint.h>
+#include "libnetdata/avl/avl.h"
+
/*****************************************************************
* copied from kernel-collectors repo, with modifications needed
* for inclusion here.
@@ -15,12 +18,6 @@ typedef struct hardirq_ebpf_key {
int irq;
} hardirq_ebpf_key_t;
-typedef struct hardirq_ebpf_val {
- uint64_t latency;
- uint64_t ts;
- char name[NETDATA_HARDIRQ_NAME_LEN];
-} hardirq_ebpf_val_t;
-
enum hardirq_ebpf_static {
HARDIRQ_EBPF_STATIC_APIC_THERMAL,
HARDIRQ_EBPF_STATIC_APIC_THRESHOLD,
@@ -46,6 +43,9 @@ typedef struct hardirq_ebpf_static_val {
* below this is eBPF plugin-specific code.
*****************************************************************/
+// ARAL Name
+#define NETDATA_EBPF_HARDIRQ_ARAL_NAME "ebpf_harddirq"
+
#define NETDATA_EBPF_MODULE_NAME_HARDIRQ "hardirq"
#define NETDATA_HARDIRQ_CONFIG_FILE "hardirq.conf"
diff --git a/collectors/ebpf.plugin/ebpf_oomkill.c b/collectors/ebpf.plugin/ebpf_oomkill.c
index d5f3481d20..1687a8d5d8 100644
--- a/collectors/ebpf.plugin/ebpf_oomkill.c
+++ b/collectors/ebpf.plugin/ebpf_oomkill.c
@@ -299,7 +299,7 @@ static void oomkill_collector(ebpf_module_t *em)
int counter = update_every - 1;
while (!ebpf_exit_plugin) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (!ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_exit_plugin || ++counter != update_every)
continue;
counter = 0;
diff --git a/libnetdata/ebpf/ebpf.c b/libnetdata/ebpf/ebpf.c
index ee3ce3e003..c47ad29943 100644
--- a/libnetdata/ebpf/ebpf.c
+++ b/libnetdata/ebpf/ebpf.c
@@ -366,20 +366,22 @@ static uint32_t ebpf_select_index(uint32_t kernels, int is_rhf, uint32_t kver)
* V - The kernel version in string format.
*
* @param out the vector where the name will be stored
- * @param path
* @param len the size of the out vector.
+ * @param path where the binaries are stored
* @param kver the kernel version
* @param name the eBPF program name.
* @param is_return is return or entry ?
*/
-static void ebpf_mount_name(char *out, size_t len, char *path, uint32_t kver, const char *name, int is_return)
+static void ebpf_mount_name(char *out, size_t len, char *path, uint32_t kver, const char *name,
+ int is_return, int is_rhf)
{
char *version = ebpf_select_kernel_name(kver);
- snprintfz(out, len, "%s/ebpf.d/%cnetdata_ebpf_%s.%s.o",
+ snprintfz(out, len, "%s/ebpf.d/%cnetdata_ebpf_%s.%s%s.o",
path,
(is_return) ? 'r' : 'p',
name,
- version);
+ version,
+ (is_rhf != -1) ? ".rhf" : "");
}
//----------------------------------------------------------------------------------------------------------------------
@@ -781,7 +783,7 @@ struct bpf_link **ebpf_load_program(char *plugins_dir, ebpf_module_t *em, int kv
uint32_t idx = ebpf_select_index(em->kernels, is_rhf, kver);
- ebpf_mount_name(lpath, 4095, plugins_dir, idx, em->thread_name, em->mode);
+ ebpf_mount_name(lpath, 4095, plugins_dir, idx, em->thread_name, em->mode, is_rhf);
// When this function is called ebpf.plugin is using legacy code, so we should reset the variable
em->load &= ~ NETDATA_EBPF_LOAD_METHODS;
diff --git a/netdata-installer.sh b/netdata-installer.sh
index 6da397e987..c145c2800f 100755
--- a/netdata-installer.sh
+++ b/netdata-installer.sh
@@ -1564,6 +1564,11 @@ remove_old_ebpf() {
rm -f "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/pnetdata_ebpf"*.?.*.o
fi
+ # Remove old eBPF programs that did not have "rhf" suffix
+ if [ ! -f "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/ebpf.d/pnetdata_ebpf_process.3.10.rhf.o" ]; then
+ rm -f "${NETDATA_PREFIX}/usr/libexec/netdata/plugins.d/ebpf.d/"*.o
+ fi
+
# Remove old reject list from previous directory
if [ -f "${NETDATA_PREFIX}/usr/lib/netdata/conf.d/ebpf_kernel_reject_list.txt" ]; then
echo >&2 "Removing old ebpf_kernel_reject_list.txt."
diff --git a/packaging/ebpf-co-re.checksums b/packaging/ebpf-co-re.checksums
index 7f08d7327d..813e421cc7 100644
--- a/packaging/ebpf-co-re.checksums
+++ b/packaging/ebpf-co-re.checksums
@@ -1 +1 @@
-d1864cd736d236aa3738152d86096529830822a26405a62fe164779949bb3658 netdata-ebpf-co-re-glibc-v1.1.0.tar.xz
+a50e649635cc2fe86c21a08334ee73451f08591ebbda8b5d0012c3b8fad2cc1e netdata-ebpf-co-re-glibc-v1.1.2.tar.xz
diff --git a/packaging/ebpf-co-re.version b/packaging/ebpf-co-re.version
index 795460fcec..0f1acbd565 100644
--- a/packaging/ebpf-co-re.version
+++ b/packaging/ebpf-co-re.version
@@ -1 +1 @@
-v1.1.0
+v1.1.2
diff --git a/packaging/ebpf.checksums b/packaging/ebpf.checksums
index e74349115f..0d0be4eabb 100644
--- a/packaging/ebpf.checksums
+++ b/packaging/ebpf.checksums
@@ -1,3 +1,3 @@
-7f28bb61b1e9fdac59e5f8f041502c54f319048c1cf4adaa96ace3360f55a80e ./netdata-kernel-collector-glibc-v1.1.0.tar.xz
-5d927deadac9a4a5bc8a5be386aec2ea4f9b8335e60eadf375b11e7656404270 ./netdata-kernel-collector-musl-v1.1.0.tar.xz
-0d8825b77b8ba20e10b6e24f15c1d65a43f1c47dced93798839adc789f1427d3 ./netdata-kernel-collector-static-v1.1.0.tar.xz
+597a20895bbedcf87528b08fa9057426bd3c7638aa1ffac94f8987a90634513d ./netdata-kernel-collector-glibc-v1.1.2.tar.xz
+25db2232b75bdb7fc6e10db870c3a3290f52ecfcdcf546d0e51947f2a4c17ccf ./netdata-kernel-collector-musl-v1.1.2.tar.xz
+1d60425f5e8c6e30b3be86028dfc62c16022d8fe561e4c21c84cf6e8b998cd7d ./netdata-kernel-collector-static-v1.1.2.tar.xz
diff --git a/packaging/ebpf.version b/packaging/ebpf.version
index 795460fcec..0f1acbd565 100644
--- a/packaging/ebpf.version
+++ b/packaging/ebpf.version
@@ -1 +1 @@
-v1.1.0
+v1.1.2