diff options
author | Uman Shahzad <uman@mslm.io> | 2021-08-18 18:46:25 +0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-18 18:46:25 +0500 |
commit | bc3d8c99066fd80b8f99f10cba8b87ed7404c025 (patch) | |
tree | cde3e9790230c532bacdde76ad7eaaa7387c0846 /collectors/ebpf.plugin | |
parent | 28bea5ea903df78fd0212460b8ff9a1303784144 (diff) |
eBPF Hard IRQ latency (#11410)
Diffstat (limited to 'collectors/ebpf.plugin')
-rw-r--r-- | collectors/ebpf.plugin/Makefile.am | 1 | ||||
-rw-r--r-- | collectors/ebpf.plugin/README.md | 3 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.c | 115 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.d.conf | 2 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.d/hardirq.conf | 8 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.h | 13 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_apps.h | 1 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_hardirq.c | 495 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_hardirq.h | 74 |
9 files changed, 707 insertions, 5 deletions
diff --git a/collectors/ebpf.plugin/Makefile.am b/collectors/ebpf.plugin/Makefile.am index 2778dce843..5a659f47d3 100644 --- a/collectors/ebpf.plugin/Makefile.am +++ b/collectors/ebpf.plugin/Makefile.am @@ -37,6 +37,7 @@ dist_ebpfconfig_DATA = \ ebpf.d/disk.conf \ ebpf.d/fd.conf \ ebpf.d/filesystem.conf \ + ebpf.d/hardirq.conf \ ebpf.d/mount.conf \ ebpf.d/network.conf \ ebpf.d/process.conf \ diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md index fc5f2d9aa5..4a42290c42 100644 --- a/collectors/ebpf.plugin/README.md +++ b/collectors/ebpf.plugin/README.md @@ -218,6 +218,8 @@ The eBPF collector enables and runs the following eBPF programs by default: - `vfs`: This eBPF program creates charts that show information about VFS (Virtual File System) functions. - `process`: This eBPF program creates charts that show information about process life. When in `return` mode, it also creates charts showing errors when these operations are executed. +- `hardirq`: This eBPF program creates charts that show information about + time spent servicing individual hardware interrupt requests (hard IRQs). You can also enable the following eBPF programs: - `cachestat`: Netdata's eBPF data collector creates charts about the memory page cache. When the integration with @@ -249,6 +251,7 @@ The following configuration files are available: - `disk.conf`: Configuration for the `disk` thread. - `fd.conf`: Configuration for the `file descriptor` thread. - `filesystem.conf`: Configuration for the `filesystem` thread. +- `hardirq.conf`: Configuration for the `hardirq` thread. - `process.conf`: Configuration for the `process` thread. - `network.conf`: Configuration for the `network viewer` thread. This config file overwrites the global options and also lets you specify which network the eBPF collector monitors. diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c index ab3115f8d7..7e85dc14ca 100644 --- a/collectors/ebpf.plugin/ebpf.c +++ b/collectors/ebpf.plugin/ebpf.c @@ -130,6 +130,11 @@ ebpf_module_t ebpf_modules[] = { .optional = 0, .apps_routine = ebpf_fd_create_apps_charts, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fd_config, .config_file = NETDATA_FD_CONFIG_FILE}, + { .thread_name = "hardirq", .config_name = "hardirq", .enabled = 0, .start_routine = ebpf_hardirq_thread, + .update_time = 1, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, + .optional = 0, .apps_routine = NULL, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &hardirq_config, + .config_file = NETDATA_HARDIRQ_CONFIG_FILE}, { .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_time = 1, .global_charts = 0, .apps_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = 0, .names = NULL, @@ -296,8 +301,7 @@ inline void write_end_chart() */ void write_chart_dimension(char *dim, long long value) { - int ret = printf("SET %s = %lld\n", dim, value); - UNUSED(ret); + printf("SET %s = %lld\n", dim, value); } /** @@ -507,7 +511,9 @@ void ebpf_create_chart(char *type, { ebpf_write_chart_cmd(type, id, title, units, family, charttype, context, order, module); - ncd(move, end); + if (ncd) { + ncd(move, end); + } } /** @@ -713,6 +719,8 @@ void ebpf_print_help() "\n" " --filesystem or -i Enable chart related to filesystem run time.\n" "\n" + " --hardirq or -q Enable chart related to hard IRQ latency.\n" + "\n" " --mount or -m Enable charts related to mount monitoring.\n" "\n" " --net or -n Enable network viewer charts.\n" @@ -733,6 +741,87 @@ void ebpf_print_help() /***************************************************************** * + * TRACEPOINT MANAGEMENT FUNCTIONS + * + *****************************************************************/ + +/** + * Enable a tracepoint. + * + * @return 0 on success, -1 on error. + */ +int ebpf_enable_tracepoint(ebpf_tracepoint_t *tp) +{ + int test = ebpf_is_tracepoint_enabled(tp->class, tp->event); + + // err? + if (test == -1) { + return -1; + } + // disabled? + else if (test == 0) { + // enable it then. + if (ebpf_enable_tracing_values(tp->class, tp->event)) { + return -1; + } + } + + // enabled now or already was. + tp->enabled = true; + + return 0; +} + +/** + * Disable a tracepoint if it's enabled. + * + * @return 0 on success, -1 on error. + */ +int ebpf_disable_tracepoint(ebpf_tracepoint_t *tp) +{ + int test = ebpf_is_tracepoint_enabled(tp->class, tp->event); + + // err? + if (test == -1) { + return -1; + } + // enabled? + else if (test == 1) { + // disable it then. + if (ebpf_disable_tracing_values(tp->class, tp->event)) { + return -1; + } + } + + // disable now or already was. + tp->enabled = false; + + return 0; +} + +/** + * Enable multiple tracepoints on a list of tracepoints which end when the + * class is NULL. + * + * @return the number of successful enables. + */ +uint32_t ebpf_enable_tracepoints(ebpf_tracepoint_t *tps) +{ + uint32_t cnt = 0; + for (int i = 0; tps[i].class != NULL; i++) { + if (ebpf_enable_tracepoint(&tps[i]) == -1) { + infoerr("failed to enable tracepoint %s:%s", + tps[i].class, tps[i].event); + } + else { + cnt += 1; + } + } + return cnt; +} + +/***************************************************************** + * * AUXILIAR FUNCTIONS USED DURING INITIALIZATION * *****************************************************************/ @@ -1058,6 +1147,13 @@ static void read_collector_values(int *disable_apps) started++; } + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "hardirq", + CONFIG_BOOLEAN_YES); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_HARDIRQ_IDX, *disable_apps); + started++; + } + if (!started){ ebpf_enable_all_charts(*disable_apps); // Read network viewer section @@ -1145,6 +1241,7 @@ static void parse_args(int argc, char **argv) {"disk", no_argument, 0, 'k' }, {"filesystem", no_argument, 0, 'i' }, {"filedescriptor", no_argument, 0, 'e' }, + {"hardirq", no_argument, 0, 'q' }, {"mount", no_argument, 0, 'm' }, {"net", no_argument, 0, 'n' }, {"process", no_argument, 0, 'p' }, @@ -1166,7 +1263,7 @@ static void parse_args(int argc, char **argv) } while (1) { - int c = getopt_long(argc, argv, "hvgacdnprsw", long_options, &option_index); + int c = getopt_long(argc, argv, "hvgacdkieqmnprswf", long_options, &option_index); if (c == -1) break; @@ -1221,6 +1318,14 @@ static void parse_args(int argc, char **argv) #endif break; } + case 'q': { + enabled = 1; + ebpf_enable_chart(EBPF_MODULE_HARDIRQ_IDX, disable_apps); +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF enabling \"hardirq\" chart, because it was started with the option \"--hardirq\" or \"-q\"."); +#endif + break; + } case 'k': { enabled = 1; ebpf_enable_chart(EBPF_MODULE_DISK_IDX, disable_apps); @@ -1573,6 +1678,8 @@ int main(int argc, char **argv) NULL, NULL, ebpf_modules[EBPF_MODULE_MOUNT_IDX].start_routine}, {"EBPF FD" , NULL, NULL, 1, NULL, NULL, ebpf_modules[EBPF_MODULE_FD_IDX].start_routine}, + {"EBPF HARDIRQ" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_HARDIRQ_IDX].start_routine}, {NULL , NULL, NULL, 0, NULL, NULL, NULL} }; diff --git a/collectors/ebpf.plugin/ebpf.d.conf b/collectors/ebpf.plugin/ebpf.d.conf index 870e8021a1..4062e22c5f 100644 --- a/collectors/ebpf.plugin/ebpf.d.conf +++ b/collectors/ebpf.plugin/ebpf.d.conf @@ -31,6 +31,7 @@ # `fd` : This eBPF program creates charts that show information about file manipulation. # `mount` : Monitor calls for syscalls mount and umount # `filesystem`: Monitor calls for functions used to manipulate specific filesystems +# `hardirq` : Monitor latency of serving hardware interrupt requests (hard IRQs). # `process` : This eBPF program creates charts that show information about process life. # `socket` : This eBPF program creates charts with information about `TCP` and `UDP` functions, including the # bandwidth consumed by each. @@ -44,6 +45,7 @@ disk = no fd = yes filesystem = no + hardirq = yes mount = yes process = yes socket = yes diff --git a/collectors/ebpf.plugin/ebpf.d/hardirq.conf b/collectors/ebpf.plugin/ebpf.d/hardirq.conf new file mode 100644 index 0000000000..976991a086 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/hardirq.conf @@ -0,0 +1,8 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +[global] + ebpf load mode = entry + update every = 1 diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h index 9d3270e374..0dd565e14c 100644 --- a/collectors/ebpf.plugin/ebpf.h +++ b/collectors/ebpf.plugin/ebpf.h @@ -84,9 +84,16 @@ enum ebpf_module_indexes { EBPF_MODULE_FILESYSTEM_IDX, EBPF_MODULE_DISK_IDX, EBPF_MODULE_MOUNT_IDX, - EBPF_MODULE_FD_IDX + EBPF_MODULE_FD_IDX, + EBPF_MODULE_HARDIRQ_IDX }; +typedef struct ebpf_tracepoint { + bool enabled; + char *class; + char *event; +} ebpf_tracepoint_t; + // Copied from musl header #ifndef offsetof #if __GNUC__ > 3 @@ -202,6 +209,10 @@ extern void write_end_chart(); extern void ebpf_cleanup_publish_syscall(netdata_publish_syscall_t *nps); +extern int ebpf_enable_tracepoint(ebpf_tracepoint_t *tp); +extern int ebpf_disable_tracepoint(ebpf_tracepoint_t *tp); +extern uint32_t ebpf_enable_tracepoints(ebpf_tracepoint_t *tps); + #define EBPF_PROGRAMS_SECTION "ebpf programs" #define EBPF_COMMON_DIMENSION_PERCENTAGE "%" diff --git a/collectors/ebpf.plugin/ebpf_apps.h b/collectors/ebpf.plugin/ebpf_apps.h index be54bd98c4..4f4caa2464 100644 --- a/collectors/ebpf.plugin/ebpf_apps.h +++ b/collectors/ebpf.plugin/ebpf_apps.h @@ -28,6 +28,7 @@ #include "ebpf_sync.h" #include "ebpf_swap.h" #include "ebpf_vfs.h" +#include "ebpf_hardirq.h" #define MAX_COMPARE_NAME 100 #define MAX_NAME 100 diff --git a/collectors/ebpf.plugin/ebpf_hardirq.c b/collectors/ebpf.plugin/ebpf_hardirq.c new file mode 100644 index 0000000000..e95091e691 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_hardirq.c @@ -0,0 +1,495 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "ebpf.h" +#include "ebpf_hardirq.h" + +struct config hardirq_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +#define HARDIRQ_MAP_LATENCY 0 +#define HARDIRQ_MAP_LATENCY_STATIC 1 +static ebpf_local_maps_t hardirq_maps[] = { + { + .name = "tbl_hardirq", + .internal_input = NETDATA_HARDIRQ_MAX_IRQS, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + { + .name = "tbl_hardirq_static", + .internal_input = HARDIRQ_EBPF_STATIC_END, + .user_input = 0, + .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + }, + /* end */ + { + .name = NULL, + .internal_input = 0, + .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED + } +}; + +static ebpf_data_t hardirq_data; + +#define HARDIRQ_TP_CLASS_IRQ "irq" +#define HARDIRQ_TP_CLASS_IRQ_VECTORS "irq_vectors" +static ebpf_tracepoint_t hardirq_tracepoints[] = { + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ, .event = "irq_handler_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ, .event = "irq_handler_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "thermal_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "thermal_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "threshold_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "threshold_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "error_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "error_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "deferred_error_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "deferred_error_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "spurious_apic_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "spurious_apic_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_single_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "call_function_single_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "reschedule_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "reschedule_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "local_timer_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "local_timer_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "irq_work_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "irq_work_exit"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "x86_platform_ipi_entry"}, + {.enabled = false, .class = HARDIRQ_TP_CLASS_IRQ_VECTORS, .event = "x86_platform_ipi_exit"}, + /* end */ + {.enabled = false, .class = NULL, .event = NULL} +}; + +static hardirq_static_val_t hardirq_static_vals[] = { + { + .idx = HARDIRQ_EBPF_STATIC_APIC_THERMAL, + .name = "apic_thermal", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_THRESHOLD, + .name = "apic_threshold", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_ERROR, + .name = "apic_error", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_DEFERRED_ERROR, + .name = "apic_deferred_error", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_APIC_SPURIOUS, + .name = "apic_spurious", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_FUNC_CALL, + .name = "func_call", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_FUNC_CALL_SINGLE, + .name = "func_call_single", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_RESCHEDULE, + .name = "reschedule", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_LOCAL_TIMER, + .name = "local_timer", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_IRQ_WORK, + .name = "irq_work", + .latency = 0 + }, + { + .idx = HARDIRQ_EBPF_STATIC_X86_PLATFORM_IPI, + .name = "x86_platform_ipi", + .latency = 0 + }, +}; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +static int read_thread_closed = 1; + +// store for "published" data from the reader thread, which the collector +// thread will write to netdata agent. +static avl_tree_lock hardirq_pub; + +// tmp store for dynamic hard IRQ values we get from a per-CPU eBPF map. +static hardirq_ebpf_val_t *hardirq_ebpf_vals = NULL; + +// tmp store for static hard IRQ values we get from a per-CPU eBPF map. +static hardirq_ebpf_static_val_t *hardirq_ebpf_static_vals = NULL; + +static struct netdata_static_thread hardirq_threads = {"HARDIRQ KERNEL", + NULL, NULL, 1, NULL, + NULL, NULL }; + +/** + * Clean up the main thread. + * + * @param ptr thread data. + */ +static void hardirq_cleanup(void *ptr) +{ + for (int i = 0; hardirq_tracepoints[i].class != NULL; i++) { + ebpf_disable_tracepoint(&hardirq_tracepoints[i]); + } + + ebpf_module_t *em = (ebpf_module_t *)ptr; + if (!em->enabled) { + return; + } + + heartbeat_t hb; + heartbeat_init(&hb); + uint32_t tick = 1 * USEC_PER_MS; + while (!read_thread_closed) { + usec_t dt = heartbeat_next(&hb, tick); + UNUSED(dt); + } + + freez(hardirq_ebpf_vals); + freez(hardirq_ebpf_static_vals); + freez(hardirq_threads.thread); + + if (probe_links) { + struct bpf_program *prog; + size_t i = 0 ; + bpf_object__for_each_program(prog, objects) { + bpf_link__destroy(probe_links[i]); + i++; + } + bpf_object__close(objects); + } +} + +/***************************************************************** + * MAIN LOOP + *****************************************************************/ + +/** + * Compare hard IRQ values. + * + * @param a `hardirq_val_t *`. + * @param b `hardirq_val_t *`. + * + * @return 0 if a==b, 1 if a>b, -1 if a<b. +*/ +static int hardirq_val_cmp(void *a, void *b) +{ + hardirq_val_t *ptr1 = a; + hardirq_val_t *ptr2 = b; + + if (ptr1->irq > ptr2->irq) { + return 1; + } + else if (ptr1->irq < ptr2->irq) { + return -1; + } + else { + return 0; + } +} + +static void hardirq_read_latency_map(int mapfd) +{ + hardirq_ebpf_key_t key = {}; + hardirq_ebpf_key_t next_key = {}; + hardirq_val_t search_v = {}; + hardirq_val_t *v = NULL; + + while (bpf_map_get_next_key(mapfd, &key, &next_key) == 0) { + // get val for this key. + int test = bpf_map_lookup_elem(mapfd, &key, hardirq_ebpf_vals); + if (unlikely(test < 0)) { + key = next_key; + continue; + } + + // is this IRQ saved yet? + // + // if not, make a new one, mark it as unsaved for now, and continue; we + // will insert it at the end after all of its values are correctly set, + // so that we can safely publish it to the collector within a single, + // short locked operation. + // + // otherwise simply continue; we will only update the latency, which + // can be republished safely without a lock. + // + // NOTE: lock isn't strictly necessary for this initial search, as only + // this thread does writing, but the AVL is using a read-write lock so + // there is no congestion. + bool v_is_new = false; + search_v.irq = key.irq; + v = (hardirq_val_t *)avl_search_lock(&hardirq_pub, (avl_t *)&search_v); + if (unlikely(v == NULL)) { + // latency/name can only be added reliably at a later time. + // when they're added, only then will we AVL insert. + v = callocz(1, sizeof(hardirq_val_t)); + v->irq = key.irq; + v->dim_exists = false; + + v_is_new = true; + } + + // note two things: + // 1. we must add up latency value for this IRQ across all CPUs. + // 2. the name is unfortunately *not* available on all CPU maps - only + // a single map contains the name, so we must find it. we only need + // to copy it though if the IRQ is new for us. + bool name_saved = false; + uint64_t total_latency = 0; + int i; + int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + for (i = 0; i < end; i++) { + total_latency += hardirq_ebpf_vals[i].latency/1000; + + // copy name for new IRQs. + if (v_is_new && !name_saved && hardirq_ebpf_vals[i].name[0] != '\0') { + strncpyz( + v->name, + hardirq_ebpf_vals[i].name, + NETDATA_HARDIRQ_NAME_LEN + ); + name_saved = true; + } + } + + // can now safely publish latency for existing IRQs. + v->latency = total_latency; + + // can now safely publish new IRQ. + if (v_is_new) { + avl_t *check = avl_insert_lock(&hardirq_pub, (avl_t *)v); + if (check != (avl_t *)v) { + error("Internal error, cannot insert the AVL tree."); + } + } + + key = next_key; + } +} + +static void hardirq_read_latency_static_map(int mapfd) +{ + uint32_t i; + for (i = 0; i < HARDIRQ_EBPF_STATIC_END; i++) { + uint32_t map_i = hardirq_static_vals[i].idx; + int test = bpf_map_lookup_elem(mapfd, &map_i, hardirq_ebpf_static_vals); + if (unlikely(test < 0)) { + continue; + } + + uint64_t total_latency = 0; + int cpu_i; + int end = (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs; + for (cpu_i = 0; cpu_i < end; cpu_i++) { + total_latency += hardirq_ebpf_static_vals[cpu_i].latency/1000; + } + + hardirq_static_vals[i].latency = total_latency; + } +} + +/** + * Read eBPF maps for hard IRQ. + */ +static void *hardirq_reader(void *ptr) +{ + read_thread_closed = 0; + + heartbeat_t hb; + heartbeat_init(&hb); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + + usec_t step = NETDATA_HARDIRQ_SLEEP_MS * em->update_time; + while (!close_ebpf_plugin) { + usec_t dt = heartbeat_next(&hb, step); + UNUSED(dt); + + hardirq_read_latency_map(hardirq_maps[HARDIRQ_MAP_LATENCY].map_fd); + hardirq_read_latency_static_map(hardirq_maps[HARDIRQ_MAP_LATENCY_STATIC].map_fd); + } + + read_thread_closed = 1; + return NULL; +} + +static void hardirq_create_charts() +{ + ebpf_create_chart( + NETDATA_EBPF_SYSTEM_GROUP, + "hardirq_latency", + "Hardware IRQ latency", + "milliseconds", + "interrupts", + NULL, + NETDATA_EBPF_CHART_TYPE_STACKED, + NETDATA_CHART_PRIO_HARDIRQ_LATENCY, + NULL, NULL, 0, + NETDATA_EBPF_MODULE_NAME_HARDIRQ + ); + + fflush(stdout); +} + +static void hardirq_create_static_dims() +{ + uint32_t i; + for (i = 0; i < HARDIRQ_EBPF_STATIC_END; i++) { + ebpf_write_global_dimension( + hardirq_static_vals[i].name, hardirq_static_vals[i].name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX] + ); + } +} + +// callback for avl tree traversal on `hardirq_pub`. +static int hardirq_write_dims(void *entry, void *data) +{ + UNUSED(data); + + hardirq_val_t *v = entry; + + // IRQs get dynamically added in, so add the dimension if we haven't yet. + if (!v->dim_exists) { + ebpf_write_global_dimension( + v->name, v->name, + ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX] + ); + v->dim_exists = true; + } + + write_chart_dimension(v->name, v->latency); + + return 1; +} + +static inline void hardirq_write_static_dims() +{ + uint32_t i; + for (i = 0; i < HARDIRQ_EBPF_STATIC_END; i++) { + write_chart_dimension( + hardirq_static_vals[i].name, + hardirq_static_vals[i].latency + ); + } +} + +/** +* Main loop for this collector. +*/ +static void hardirq_collector(ebpf_module_t *em) +{ + hardirq_ebpf_vals = callocz( + (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs, + sizeof(hardirq_ebpf_val_t) + ); + hardirq_ebpf_static_vals = callocz( + (running_on_kernel < NETDATA_KERNEL_V4_15) ? 1 : ebpf_nprocs, + sizeof(hardirq_ebpf_static_val_t) + ); + + avl_init_lock(&hardirq_pub, hardirq_val_cmp); + + // create reader thread. + hardirq_threads.thread = mallocz(sizeof(netdata_thread_t)); + hardirq_threads.start_routine = hardirq_reader; + netdata_thread_create( + hardirq_threads.thread, + hardirq_threads.name, + NETDATA_THREAD_OPTION_JOINABLE, + hardirq_reader, + em + ); + + // create chart and static dims. + pthread_mutex_lock(&lock); + hardirq_create_charts(); + hardirq_create_static_dims(); + pthread_mutex_unlock(&lock); + + // loop and read from published data until ebpf plugin is closed. + while (!close_ebpf_plugin) { + pthread_mutex_lock(&collect_data_mutex); + pthread_cond_wait(&collect_data_cond_var, &collect_data_mutex); + pthread_mutex_lock(&lock); + + // write dims now for all hitherto discovered IRQs. + write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "hardirq_latency"); + avl_traverse_lock(&hardirq_pub, hardirq_write_dims, NULL); + hardirq_write_static_dims(); + write_end_chart(); + + pthread_mutex_unlock(&lock); + pthread_mutex_unlock(&collect_data_mutex); + } +} + +/***************************************************************** + * EBPF HARDIRQ THREAD + *****************************************************************/ + +/** + * Hard IRQ latency thread. + * + * @param ptr a `ebpf_module_t *`. + * @return always NULL. + */ +void *ebpf_hardirq_thread(void *ptr) +{ + netdata_thread_cleanup_push(hardirq_cleanup, ptr); + + ebpf_module_t *em = (ebpf_module_t *)ptr; + em->maps = hardirq_maps; + + fill_ebpf_data(&hardirq_data); + + if (!em->enabled) { + goto endhardirq; + } + + if (ebpf_update_kernel(&hardirq_data)) { + goto endhardirq; + } + + if (ebpf_enable_tracepoints(hardirq_tracepoints) == 0) { + em->enabled = CONFIG_BOOLEAN_NO; + goto endhardirq; + } + + probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects, hardirq_data.map_fd); + if (!probe_links) { + goto endhardirq; + } + + hardirq_collector(em); + +endhardirq: + netdata_thread_cleanup_pop(1); + + return NULL; +} diff --git a/collectors/ebpf.plugin/ebpf_hardirq.h b/collectors/ebpf.plugin/ebpf_hardirq.h new file mode 100644 index 0000000000..d67c337247 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_hardirq.h @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_HARDIRQ_H +#define NETDATA_EBPF_HARDIRQ_H 1 + +/***************************************************************** + * copied from kernel-collectors repo, with modifications needed + * for inclusion here. + *****************************************************************/ + +#define NETDATA_HARDIRQ_NAME_LEN 32 +#define NETDATA_HARDIRQ_MAX_IRQS 1024L + +typedef struct hardirq_ebpf_key { + int irq; +} hardirq_ebpf_key_t; + +typedef struct hardirq_ebpf_val { + uint64_t latency; + uint64_t ts; + char name[NETDATA_HARDIRQ_NAME_LEN]; +} hardirq_ebpf_val_t; + +enum hardirq_ebpf_static { + HARDIRQ_EBPF_STATIC_APIC_THERMAL, + HARDIRQ_EBPF_STATIC_APIC_THRESHOLD, + HARDIRQ_EBPF_STATIC_APIC_ERROR, + HARDIRQ_EBPF_STATIC_APIC_DEFERRED_ERROR, + HARDIRQ_EBPF_STATIC_APIC_SPURIOUS, + HARDIRQ_EBPF_STATIC_FUNC_CALL, + HARDIRQ_EBPF_STATIC_FUNC_CALL_SINGLE, + HARDIRQ_EBPF_STATIC_RESCHEDULE, + HARDIRQ_EBPF_STATIC_LOCAL_TIMER, + HARDIRQ_EBPF_STATIC_IRQ_WORK, + HARDIRQ_EBPF_STATIC_X86_PLATFORM_IPI, + + HARDIRQ_EBPF_STATIC_END +}; + +typedef struct hardirq_ebpf_static_val { + uint64_t latency; + uint64_t ts; +} hardirq_ebpf_static_val_t; + +/***************************************************************** + * below this is eBPF plugin-specific code. + *****************************************************************/ + +#define NETDATA_EBPF_MODULE_NAME_HARDIRQ "hardirq" +#define NETDATA_HARDIRQ_SLEEP_MS 650000ULL +#define NETDATA_HARDIRQ_CONFIG_FILE "hardirq.conf" + +typedef struct hardirq_val { + // must be at top for simplified AVL tree usage. + // if it's not at the top, we need to use `containerof` for almost all ops. + avl_t avl; + + int irq; + bool dim_exists; // keep this after `int irq` for alignment byte savings. + uint64_t latency; + char name[NETDATA_HARDIRQ_NAME_LEN]; +} hardirq_val_t; + +typedef struct hardirq_static_val { + enum hardirq_ebpf_static idx; + char *name; + uint64_t latency; +} hardirq_static_val_t; + +extern struct config hardirq_config; +extern void *ebpf_hardirq_thread(void *ptr); +extern void ebpf_hardirq_create_apps_charts(struct ebpf_module *em, void *ptr); + +#endif /* NETDATA_EBPF_HARDIRQ_H */ |