diff options
author | thiagoftsm <thiagoftsm@gmail.com> | 2021-07-02 12:28:50 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-02 12:28:50 +0000 |
commit | 37686c8d682dc5235f8c1b790f484242fc194401 (patch) | |
tree | 28fc88465248ffa38608419e6d99cca04071b151 | |
parent | 180ace8264c21ae77635965edd21e369ad45701e (diff) |
Ebpf disk latency (#11276)
Add disk monitoring independent of filesystem.
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | Makefile.am | 2 | ||||
-rw-r--r-- | collectors/all.h | 1 | ||||
-rw-r--r-- | collectors/ebpf.plugin/Makefile.am | 1 | ||||
-rw-r--r-- | collectors/ebpf.plugin/README.md | 2 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.c | 50 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.d.conf | 3 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.d/disk.conf | 13 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf.h | 4 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_apps.h | 1 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_disk.c | 837 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_disk.h | 72 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_filesystem.c | 63 | ||||
-rw-r--r-- | collectors/ebpf.plugin/ebpf_filesystem.h | 9 | ||||
-rw-r--r-- | libnetdata/ebpf/ebpf.c | 111 | ||||
-rw-r--r-- | libnetdata/ebpf/ebpf.h | 19 | ||||
-rw-r--r-- | packaging/ebpf.checksums | 6 | ||||
-rw-r--r-- | packaging/ebpf.version | 2 | ||||
-rw-r--r-- | web/gui/dashboard_info.js | 4 |
19 files changed, 1145 insertions, 57 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index c5d4bc8f52..2a50ff51d3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -490,6 +490,8 @@ set(EBPF_PROCESS_PLUGIN_FILES collectors/ebpf.plugin/ebpf_cachestat.h collectors/ebpf.plugin/ebpf_dcstat.c collectors/ebpf.plugin/ebpf_dcstat.h + collectors/ebpf.plugin/ebpf_disk.c + collectors/ebpf.plugin/ebpf_disk.h collectors/ebpf.plugin/ebpf_filesystem.c collectors/ebpf.plugin/ebpf_filesystem.h collectors/ebpf.plugin/ebpf_process.c diff --git a/Makefile.am b/Makefile.am index 1c98e30b6a..3c47b62e6d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -289,6 +289,8 @@ EBPF_PLUGIN_FILES = \ collectors/ebpf.plugin/ebpf_cachestat.h \ collectors/ebpf.plugin/ebpf_dcstat.c \ collectors/ebpf.plugin/ebpf_dcstat.h \ + collectors/ebpf.plugin/ebpf_disk.c \ + collectors/ebpf.plugin/ebpf_disk.h \ collectors/ebpf.plugin/ebpf_filesystem.c \ collectors/ebpf.plugin/ebpf_filesystem.h \ collectors/ebpf.plugin/ebpf_process.c \ diff --git a/collectors/all.h b/collectors/all.h index b0e09f5364..41bed0592f 100644 --- a/collectors/all.h +++ b/collectors/all.h @@ -116,6 +116,7 @@ #define NETDATA_CHART_PRIO_DISK_SVCTM 2070 #define NETDATA_CHART_PRIO_DISK_MOPS 2080 #define NETDATA_CHART_PRIO_DISK_IOTIME 2090 +#define NETDATA_CHART_PRIO_DISK_LATENCY 2095 #define NETDATA_CHART_PRIO_BCACHE_CACHE_ALLOC 2120 #define NETDATA_CHART_PRIO_BCACHE_HIT_RATIO 2120 #define NETDATA_CHART_PRIO_BCACHE_RATES 2121 diff --git a/collectors/ebpf.plugin/Makefile.am b/collectors/ebpf.plugin/Makefile.am index 2b73ed2ddc..30552dd006 100644 --- a/collectors/ebpf.plugin/Makefile.am +++ b/collectors/ebpf.plugin/Makefile.am @@ -34,6 +34,7 @@ dist_ebpfconfig_DATA = \ ebpf.d/ebpf_kernel_reject_list.txt \ ebpf.d/cachestat.conf \ ebpf.d/dcstat.conf \ + ebpf.d/disk.conf \ ebpf.d/filesystem.conf \ ebpf.d/network.conf \ ebpf.d/process.conf \ diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md index 839c6ab721..3ab1a8ea17 100644 --- a/collectors/ebpf.plugin/README.md +++ b/collectors/ebpf.plugin/README.md @@ -216,6 +216,7 @@ The eBPF collector enables and runs the following eBPF programs by default: - `dcstat` : This eBPF program creates charts that show information about file access using directory cache. It appends `kprobes` for `lookup_fast()` and `d_lookup()` to identify if files are inside directory cache, outside and files are not found. +- `disk` : This eBPF program creates charts that show information about disk latency independent of filesystem. - `filesystem`: This eBPF program creates charts that show latency information for selected filesystem. - `process`: This eBPF program creates charts that show information about process creation, calls to open files. When in `return` mode, it also creates charts showing errors when these operations are executed. @@ -240,6 +241,7 @@ The following configuration files are available: - `cachestat.conf`: Configuration for the `cachestat` thread. - `dcstat.conf`: Configuration for the `dcstat` thread. +- `disk.conf`: Configuration for the `disk` thread. - `filesystem.conf`: Configuration for the `filesystem` thread. - `process.conf`: Configuration for the `process` thread. - `network.conf`: Configuration for the `network viewer` thread. This config file overwrites the global options and diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c index d419861904..9807cda2a2 100644 --- a/collectors/ebpf.plugin/ebpf.c +++ b/collectors/ebpf.plugin/ebpf.c @@ -115,6 +115,11 @@ ebpf_module_t ebpf_modules[] = { .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fs_config, .config_file = NETDATA_SYNC_CONFIG_FILE}, + { .thread_name = "disk", .config_name = "disk", .enabled = 0, .start_routine = ebpf_disk_thread, + .update_time = 1, .global_charts = 1, .apps_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, + .optional = 0, .apps_routine = NULL, .maps = NULL, + .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &disk_config, + .config_file = NETDATA_SYNC_CONFIG_FILE}, { .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_time = 1, .global_charts = 0, .apps_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = 0, .names = NULL, @@ -509,6 +514,31 @@ void ebpf_create_charts_on_apps(char *id, char *title, char *units, char *family } } +/** + * Call the necessary functions to create a name. + * + * @param family family name + * @param name chart name + * @param hist0 histogram values + * @param dimensions dimension values. + * @param end number of bins that will be sent to Netdata. + * + * @return It returns a variable tha maps the charts that did not have zero values. + */ +void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, char **dimensions, uint32_t end) +{ + write_begin_chart(family, name); + + uint32_t i; + for (i = 0; i < end; i++) { + write_chart_dimension(dimensions[i], (long long) hist[i]); + } + + write_end_chart(); + + fflush(stdout); +} + /***************************************************************** * * FUNCTIONS TO DEFINE OPTIONS @@ -658,6 +688,8 @@ void ebpf_print_help() "\n" " --dcstat or -d Enable charts related to directory cache.\n" "\n" + " --disk or -k Enable charts related to disk monitoring.\n" + "\n" " --filesystem or -i Enable chart related to filesystem run time.\n" "\n" " --net or -n Enable network viewer charts.\n" @@ -982,6 +1014,13 @@ static void read_collector_values(int *disable_apps) started++; } + enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "disk", + CONFIG_BOOLEAN_NO); + if (enabled) { + ebpf_enable_chart(EBPF_MODULE_DISK_IDX, *disable_apps); + started++; + } + if (!started){ ebpf_enable_all_charts(*disable_apps); // Read network viewer section @@ -1066,6 +1105,7 @@ static void parse_args(int argc, char **argv) {"all", no_argument, 0, 'a' }, {"cachestat", no_argument, 0, 'c' }, {"dcstat", no_argument, 0, 'd' }, + {"disk", no_argument, 0, 'k' }, {"filesystem", no_argument, 0, 'i' }, {"net", no_argument, 0, 'n' }, {"process", no_argument, 0, 'p' }, @@ -1142,6 +1182,14 @@ static void parse_args(int argc, char **argv) #endif break; } + case 'k': { + enabled = 1; + ebpf_enable_chart(EBPF_MODULE_DISK_IDX, disable_apps); +#ifdef NETDATA_INTERNAL_CHECKS + info("EBPF enabling \"disk\" chart, because it was started with the option \"--disk\" or \"-k\"."); +#endif + break; + } case 'n': { enabled = 1; ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, disable_apps); @@ -1464,6 +1512,8 @@ int main(int argc, char **argv) NULL, NULL, ebpf_modules[EBPF_MODULE_VFS_IDX].start_routine}, {"EBPF FILESYSTEM" , NULL, NULL, 1, NULL, NULL, ebpf_modules[EBPF_MODULE_FILESYSTEM_IDX].start_routine}, + {"EBPF DISK" , NULL, NULL, 1, + NULL, NULL, ebpf_modules[EBPF_MODULE_DISK_IDX].start_routine}, {NULL , NULL, NULL, 0, NULL, NULL, NULL} }; diff --git a/collectors/ebpf.plugin/ebpf.d.conf b/collectors/ebpf.plugin/ebpf.d.conf index 6bc771e9a2..15512e76fd 100644 --- a/collectors/ebpf.plugin/ebpf.d.conf +++ b/collectors/ebpf.plugin/ebpf.d.conf @@ -26,6 +26,8 @@ # The eBPF collector enables and runs the following eBPF programs by default: # # `cachestat` : Make charts for kernel functions related to page cache. +# `dcstat` : Make charts for kernel functions related to directory cache. +# `disk` : Monitor I/O latencies for disks # `filesystem`: Monitor calls for functions used to manipulate specific filesystems # `process` : This eBPF program creates charts that show information about process creation, and file manipulation. # `socket` : This eBPF program creates charts with information about `TCP` and `UDP` functions, including the @@ -37,6 +39,7 @@ [ebpf programs] cachestat = no dcstat = no + disk = no filesystem = no process = yes socket = yes diff --git a/collectors/ebpf.plugin/ebpf.d/disk.conf b/collectors/ebpf.plugin/ebpf.d/disk.conf new file mode 100644 index 0000000000..6ba18477c4 --- /dev/null +++ b/collectors/ebpf.plugin/ebpf.d/disk.conf @@ -0,0 +1,13 @@ +# The `ebpf load mode` option accepts the following values : +# `entry` : The eBPF collector only monitors calls for the functions, and does not show charts related to errors. +# `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates +# new charts for the return of these functions, such as errors. +# +# The eBPF collector also creates charts for each running application through an integration with the `apps plugin`. +# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to +# 'no'. +# +[global] + ebpf load mode = entry + update every = 2 + diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h index 7036385add..9c099815a3 100644 --- a/collectors/ebpf.plugin/ebpf.h +++ b/collectors/ebpf.plugin/ebpf.h @@ -81,7 +81,8 @@ enum ebpf_module_indexes { EBPF_MODULE_DCSTAT_IDX, EBPF_MODULE_SWAP_IDX, EBPF_MODULE_VFS_IDX, - EBPF_MODULE_FILESYSTEM_IDX + EBPF_MODULE_FILESYSTEM_IDX, + EBPF_MODULE_DISK_IDX }; // Copied from musl header @@ -226,6 +227,7 @@ extern collected_number get_value_from_structure(char *basis, size_t offset); extern void ebpf_update_pid_table(ebpf_local_maps_t *pid, ebpf_module_t *em); extern void ebpf_write_chart_obsolete(char *type, char *id, char *title, char *units, char *family, char *charttype, char *context, int order); +extern void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, char **dimensions, uint32_t end); #define EBPF_MAX_SYNCHRONIZATION_TIME 300 diff --git a/collectors/ebpf.plugin/ebpf_apps.h b/collectors/ebpf.plugin/ebpf_apps.h index 338433d404..4b7c92ddb5 100644 --- a/collectors/ebpf.plugin/ebpf_apps.h +++ b/collectors/ebpf.plugin/ebpf_apps.h @@ -20,6 +20,7 @@ #include "ebpf_process.h" #include "ebpf_dcstat.h" +#include "ebpf_disk.h" #include "ebpf_filesystem.h" #include "ebpf_cachestat.h" #include "ebpf_sync.h" diff --git a/collectors/ebpf.plugin/ebpf_disk.c b/collectors/ebpf.plugin/ebpf_disk.c new file mode 100644 index 0000000000..5f62aeaf7c --- /dev/null +++ b/collectors/ebpf.plugin/ebpf_disk.c @@ -0,0 +1,837 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <sys/resource.h> +#include <stdlib.h> + +#include "ebpf.h" +#include "ebpf_disk.h" + +struct config disk_config = { .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare }, + .rwlock = AVL_LOCK_INITIALIZER } }; + +static ebpf_local_maps_t disk_maps[] = {{.name = "tbl_disk_iocall", .internal_input = NETDATA_DISK_HISTOGRAM_LENGTH, + .user_input = 0, .type = NETDATA_EBPF_MAP_STATIC, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}, + {.name = NULL, .internal_input = 0, .user_input = 0, + .type = NETDATA_EBPF_MAP_CONTROLLER, + .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED}}; +static ebpf_data_t disk_data; + +static avl_tree_lock disk_tree; +netdata_ebpf_disks_t *disk_list = NULL; + +char *tracepoint_block_type = { "block"} ; +char *tracepoint_block_issue = { "block_rq_issue" }; +char *tracepoint_block_rq_complete = { "block_rq_complete" }; + +static struct bpf_link **probe_links = NULL; +static struct bpf_object *objects = NULL; + +static int was_block_issue_enabled = 0; +static int was_block_rq_complete_enabled = 0; + +static char **dimensions = NULL; +static netdata_syscall_stat_t disk_aggregated_data[NETDATA_EBPF_HIST_MAX_BINS]; +static netdata_publish_syscall_t disk_publish_aggregated[NETDATA_EBPF_HIST_MAX_BINS]; + +static int read_thread_closed = 1; + +static netdata_idx_t *disk_hash_values = NULL; +static struct netdata_static_thread disk_threads = {"DISK KERNEL", + NULL, NULL, 1, NULL, + NULL, NULL }; + +ebpf_publish_disk_t *plot_disks = NULL; +pthread_mutex_t plot_mutex; + +/***************************************************************** + * + * FUNCTIONS TO MANIPULATE HARD DISKS + * + *****************************************************************/ + +/** + * Parse start + * + * Parse start address of disk + * + * @param w structure where data is stored + * @param filename variable used to store value + * + * @return It returns 0 on success and -1 otherwise + */ +static inline int ebpf_disk_parse_start(netdata_ebpf_disks_t *w, char *filename) +{ + char content[FILENAME_MAX + 1]; + int fd = open(filename, O_RDONLY, 0); + if (fd < 0) { + return -1; + } + + ssize_t file_length = read(fd, content, 4095); + if (file_length > 0) { + if (file_length > FILENAME_MAX) + file_length = FILENAME_MAX; + + content[file_length] = '\0'; + w->start = strtoul(content, NULL, 10); + } + close(fd); + + return 0; +} + +/** + * Parse uevent + * + * Parse uevent file + * + * @param w structure where data is stored + * @param filename variable used to store value + * + * @return It returns 0 on success and -1 otherwise + */ +static inline int ebpf_parse_uevent(netdata_ebpf_disks_t *w, char *filename) +{ + char content[FILENAME_MAX + 1]; + int fd = open(filename, O_RDONLY, 0); + if (fd < 0) { + return -1; + } + + ssize_t file_length = read(fd, content, FILENAME_MAX); + if (file_length > 0) { + if (file_length > FILENAME_MAX) + file_length = FILENAME_MAX; + + content[file_length] = '\0'; + + char *s = strstr(content, "PARTNAME=EFI"); + if (s) { + w->main->boot_partition = w; + w->flags |= NETDATA_DISK_HAS_EFI; + w->boot_chart = strdupz("disk_bootsector"); + } + } + close(fd); + + return 0; +} + +/** + * Parse Size + * + * @param w structure where data is stored + * @param filename variable used to store value + * + * @return It returns 0 on success and -1 otherwise + */ +static inline int ebpf_parse_size(netdata_ebpf_disks_t *w, char *filename) +{ + char content[FILENAME_MAX + 1]; + int fd = open(filename, O_RDONLY, 0); + if (fd < 0) { + return -1; + } + + ssize_t file_length = read(fd, content, FILENAME_MAX); + if (file_length > 0) { + if (file_length > FILENAME_MAX) + file_length = FILENAME_MAX; + + content[file_length] = '\0'; + w->end = w->start + strtoul(content, NULL, 10) -1; + } + close(fd); + + return 0; +} + +/** + * Read Disk information + * + * Read disk information from /sys/block + * + * @param w structure where data is stored + * @param name disk name + */ +static void ebpf_read_disk_info(netdata_ebpf_disks_t *w, char *name) +{ + static netdata_ebpf_disks_t *main_disk = NULL; + static uint32_t key = 0; + char *path = { "/sys/block" }; + char disk[NETDATA_DISK_NAME_LEN + 1]; + char filename[FILENAME_MAX + 1]; + snprintfz(disk, NETDATA_DISK_NAME_LEN, "%s", name); + size_t length = strlen(disk); + if (!length) { + return; + } + + length--; + size_t curr = length; + while (isdigit((int)disk[length])) { + disk[length--] = '\0'; + } + + // We are looking for partition information, if it is a device we will ignore it. + if (curr == length) { + main_disk = w; + key = MKDEV(w->major, w->minor); + w->bootsector_key = key; + return; + } + w->bootsector_key = key; + w->main = main_disk; + + snprintfz(filename, FILENAME_MAX, "%s/%s/%s/uevent", path, disk, name); + if (ebpf_parse_uevent(w, filename)) + return; + + snprintfz(filename, FILENAME_MAX, "%s/%s/%s/start", path, disk, name); + if (ebpf_disk_parse_start(w, filename)) + return; + + snprintfz(filename, FILENAME_MAX, "%s/%s/%s/size", path, disk, name); + ebpf_parse_size(w, filename); +} + +/** + * New encode dev + * + * New encode algorithm extracted from https://elixir.bootlin.com/linux/v5.10.8/source/include/linux/kdev_t.h#L39 + * + * @param major driver major number + * @param minor driver minor number + * + * @return + */ +static inline uint32_t netdata_new_encode_dev(uint32_t major, uint32_t minor) { + return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12); +} + +/** + * Compare disks + * + * Compare major and minor values to add disks to tree. + * + * @param a pointer to netdata_ebpf_disks + * @param b pointer to netdata_ebpf_disks + * + * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b. +*/ +static int ebpf_compare_disks(void *a, void *b) +{ + netdata_ebpf_disks_t *ptr1 = a; + netdata_ebpf_disks_t *ptr2 = b; + + if (ptr1->dev > ptr2->dev) + return 1; + if (ptr1->dev < ptr2->dev) + return -1; + + return 0; +} + +/** + * Update listen table + * + * Update link list when it is necessary. + * + * @param name disk name + * @param major major disk identifier + * @param minor minor disk identifier + * @param current_time current timestamp + */ +static void update_disk_table(char *name, int major, int minor, time_t current_time) +{ + netdata_ebpf_disks_t find; + netdata_ebpf_disks_t *w; + size_t length; + + uint32_t dev = netdata_new_encode_dev(major, minor); + find.dev = dev; + netdata_ebpf_disks_t *ret = (netdata_ebpf_disks_t *) avl_search_lock(&disk_tree, (avl_t *)&find); + if (ret) { // Disk is already present + ret->flags |= NETDATA_DISK_IS_HERE; + ret->last_update = current_time; + return; + } + + netdata_ebpf_disks_t *update_next = |