summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorthiagoftsm <thiagoftsm@gmail.com>2021-03-24 12:00:04 +0000
committerGitHub <noreply@github.com>2021-03-24 12:00:04 +0000
commita714066bcc47b4a0608e7d0eb601754054d114d3 (patch)
treec16df96f4f5e4a54ea2cb909ca60e77f43df9ce2
parentc527863c5327123a43047bb61e7fdf5325177cbd (diff)
Ebpf extend sync (#10814)
Extend synchronization thread.
-rw-r--r--collectors/ebpf.plugin/README.md20
-rw-r--r--collectors/ebpf.plugin/ebpf.d/sync.conf9
-rw-r--r--collectors/ebpf.plugin/ebpf_sync.c255
-rw-r--r--collectors/ebpf.plugin/ebpf_sync.h26
-rw-r--r--packaging/ebpf.checksums6
-rw-r--r--packaging/ebpf.version2
-rw-r--r--web/gui/dashboard_info.js14
7 files changed, 282 insertions, 50 deletions
diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md
index 06a76473a6..405eab8758 100644
--- a/collectors/ebpf.plugin/README.md
+++ b/collectors/ebpf.plugin/README.md
@@ -198,7 +198,7 @@ The eBPF collector enables and runs the following eBPF programs by default:
When in `return` mode, it also creates charts showing errors when these operations are executed.
- `network viewer`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the
bandwidth consumed by each.
-- `sync`: Montitor calls for syscall sync(2).
+- `sync`: Montitor calls for syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2).
## Thread configuration
@@ -218,6 +218,7 @@ The following configuration files are available:
- `process.conf`: Configuration for the `process` thread.
- `network.conf`: Configuration for the `network viewer` thread. This config file overwrites the global options and
also lets you specify which network the eBPF collector monitors.
+- `sync.conf`: Configuration for the `sync` thread.
### Network configuration
@@ -261,7 +262,7 @@ The dimensions for the traffic charts are created using the destination IPs of t
changed setting `resolve hostname ips = yes` and restarting Netdata, after this Netdata will create dimensions using
the `hostnames` every time that is possible to resolve IPs to their hostnames.
-### `[service name]`
+#### `[service name]`
Netdata uses the list of services in `/etc/services` to plot network connection charts. If this file does not contain the
name for a particular service you use in your infrastructure, you will need to add it to the `[service name]` section.
@@ -274,6 +275,21 @@ service in network connection charts, and thus see the name of the service inste
19999 = Netdata
```
+### Sync configuration
+
+The sync configuration has specific options to disable monitoring for syscalls, as default option all syscalls are
+monitored.
+
+```conf
+[syscalls]
+ sync = yes
+ msync = yes
+ fsync = yes
+ fdatasync = yes
+ syncfs = yes
+ sync_file_range = yes
+```
+
## Troubleshooting
If the eBPF collector does not work, you can troubleshoot it by running the `ebpf.plugin` command and investigating its
diff --git a/collectors/ebpf.plugin/ebpf.d/sync.conf b/collectors/ebpf.plugin/ebpf.d/sync.conf
index 78277cf560..de28f33941 100644
--- a/collectors/ebpf.plugin/ebpf.d/sync.conf
+++ b/collectors/ebpf.plugin/ebpf.d/sync.conf
@@ -12,3 +12,12 @@
ebpf load mode = entry
apps = yes
update every = 2
+
+# List of monitored syscalls
+[syscalls]
+ sync = yes
+ msync = yes
+ fsync = yes
+ fdatasync = yes
+ syncfs = yes
+ sync_file_range = yes
diff --git a/collectors/ebpf.plugin/ebpf_sync.c b/collectors/ebpf.plugin/ebpf_sync.c
index 58ac7e46e6..f0db1cc4ac 100644
--- a/collectors/ebpf.plugin/ebpf_sync.c
+++ b/collectors/ebpf.plugin/ebpf_sync.c
@@ -5,17 +5,14 @@
static ebpf_data_t sync_data;
-static struct bpf_link **probe_links = NULL;
-static struct bpf_object *objects = NULL;
-
-static char *sync_counter_dimension_name[NETDATA_SYNC_END] = { "sync" };
-static netdata_syscall_stat_t sync_counter_aggregated_data;
-static netdata_publish_syscall_t sync_counter_publish_aggregated;
+static char *sync_counter_dimension_name[NETDATA_SYNC_IDX_END] = { "sync", "syncfs", "msync", "fsync", "fdatasync",
+ "sync_file_range" };
+static netdata_syscall_stat_t sync_counter_aggregated_data[NETDATA_SYNC_IDX_END];
+static netdata_publish_syscall_t sync_counter_publish_aggregated[NETDATA_SYNC_IDX_END];
static int read_thread_closed = 1;
-static int *map_fd = NULL;
-static netdata_idx_t sync_hash_values = 0;
+static netdata_idx_t sync_hash_values[NETDATA_SYNC_IDX_END];
struct netdata_static_thread sync_threads = {"SYNC KERNEL", NULL, NULL, 1,
NULL, NULL, NULL};
@@ -26,6 +23,60 @@ struct config sync_config = { .first_section = NULL,
.index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
.rwlock = AVL_LOCK_INITIALIZER } };
+ebpf_sync_syscalls_t local_syscalls[] = {
+ {.syscall = "sync", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+ {.syscall = "syncfs", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+ {.syscall = "msync", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+ {.syscall = "fsync", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+ {.syscall = "fdatasync", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+ {.syscall = "sync_file_range", .enabled = CONFIG_BOOLEAN_YES, .objects = NULL, .probe_links = NULL},
+ {.syscall = NULL, .enabled = CONFIG_BOOLEAN_NO, .objects = NULL, .probe_links = NULL}
+};
+
+/*****************************************************************
+ *
+ * INITIALIZE THREAD
+ *
+ *****************************************************************/
+
+/*
+ * Initialize Syscalls
+ *
+ * Load the eBPF programs to monitor syscalls
+ *
+ * @return 0 on success and -1 otherwise.
+ */
+static int ebpf_sync_initialize_syscall(ebpf_module_t *em)
+{
+ int i;
+ const char *saved_name = em->thread_name;
+ for (i = 0; local_syscalls[i].syscall; i++) {
+ ebpf_sync_syscalls_t *w = &local_syscalls[i];
+ if (!w->probe_links && w->enabled) {
+ fill_ebpf_data(&w->kernel_info);
+ if (ebpf_update_kernel(&w->kernel_info)) {
+ em->thread_name = saved_name;
+ error("Cannot update the kernel for eBPF module %s", w->syscall);
+ return -1;
+ }
+
+ em->thread_name = w->syscall;
+ w->probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &w->objects, w->kernel_info.map_fd);
+ if (!w->probe_links) {
+ em->thread_name = saved_name;
+ return -1;
+ }
+ }
+ }
+ em->thread_name = saved_name;
+
+ memset(sync_counter_aggregated_data, 0 , NETDATA_SYNC_IDX_END * sizeof(netdata_syscall_stat_t));
+ memset(sync_counter_publish_aggregated, 0 , NETDATA_SYNC_IDX_END * sizeof(netdata_publish_syscall_t));
+ memset(sync_hash_values, 0 , NETDATA_SYNC_IDX_END * sizeof(netdata_idx_t));
+
+ return 0;
+}
+
/*****************************************************************
*
* DATA THREAD
@@ -39,12 +90,16 @@ struct config sync_config = { .first_section = NULL,
*/
static void read_global_table()
{
- uint32_t idx = NETDATA_SYNC_CALL;
netdata_idx_t stored;
- int fd = map_fd[NETDATA_SYNC_GLOBLAL_TABLE];
-
- if (!bpf_map_lookup_elem(fd, &idx, &stored)) {
- sync_hash_values = stored;
+ uint32_t idx = NETDATA_SYNC_CALL;
+ int i;
+ for (i = 0; local_syscalls[i].syscall; i++) {
+ if (local_syscalls[i].enabled) {
+ int fd = local_syscalls[i].kernel_info.map_fd[NETDATA_SYNC_GLOBLAL_TABLE];
+ if (!bpf_map_lookup_elem(fd, &idx, &stored)) {
+ sync_hash_values[i] = stored;
+ }
+ }
}
}
@@ -78,14 +133,57 @@ void *ebpf_sync_read_hash(void *ptr)
}
/**
- * Send global
+ * Create Sync charts
+ *
+ * Create charts and dimensions according user input.
+ *
+ * @param id chart id
+ * @param idx the first index with data.
+ * @param end the last index with data.
+ */
+static void ebpf_send_sync_chart(char *id,
+ int idx,
+ int end)
+{
+ write_begin_chart(NETDATA_EBPF_MEMORY_GROUP, id);
+
+ netdata_publish_syscall_t *move = &sync_counter_publish_aggregated[idx];
+
+ while (move && idx <= end) {
+ if (local_syscalls[idx].enabled)
+ write_chart_dimension(move->name, sync_hash_values[idx]);
+
+ move = move->next;
+ idx++;
+ }
+
+ write_end_chart();
+}
+
+/**
+ * Send data
*
* Send global charts to Netdata
*/
-static void sync_send_global()
+static void sync_send_data()
{
- ebpf_one_dimension_write_charts(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_SYNC_CHART,
- sync_counter_publish_aggregated.dimension, sync_hash_values);
+ if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled) {
+ ebpf_send_sync_chart(NETDATA_EBPF_FILE_SYNC_CHART, NETDATA_SYNC_FSYNC_IDX, NETDATA_SYNC_FDATASYNC_IDX);
+ }
+
+ if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled)
+ ebpf_one_dimension_write_charts(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_MSYNC_CHART,
+ sync_counter_publish_aggregated[NETDATA_SYNC_MSYNC_IDX].dimension,
+ sync_hash_values[NETDATA_SYNC_MSYNC_IDX]);
+
+ if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled) {
+ ebpf_send_sync_chart(NETDATA_EBPF_SYNC_CHART, NETDATA_SYNC_SYNC_IDX, NETDATA_SYNC_SYNCFS_IDX);
+ }
+
+ if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled)
+ ebpf_one_dimension_write_charts(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_FILE_SEGMENT_CHART,
+ sync_counter_publish_aggregated[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].dimension,
+ sync_hash_values[NETDATA_SYNC_SYNC_FILE_RANGE_IDX]);
}
/**
@@ -96,8 +194,6 @@ static void sync_collector(ebpf_module_t *em)
sync_threads.thread = mallocz(sizeof(netdata_thread_t));
sync_threads.start_routine = ebpf_sync_read_hash;
- map_fd = sync_data.map_fd;
-
netdata_thread_create(sync_threads.thread, sync_threads.name, NETDATA_THREAD_OPTION_JOINABLE,
ebpf_sync_read_hash, em);
@@ -107,7 +203,7 @@ static void sync_collector(ebpf_module_t *em)
pthread_mutex_lock(&lock);
- sync_send_global();
+ sync_send_data();
pthread_mutex_unlock(&lock);
pthread_mutex_unlock(&collect_data_mutex);
@@ -122,6 +218,30 @@ static void sync_collector(ebpf_module_t *em)
*****************************************************************/
/**
+ * Cleanup Objects
+ *
+ * Cleanup loaded objects when thread was initialized.
+ */
+void ebpf_sync_cleanup_objects()
+{
+ int i;
+ for (i = 0; local_syscalls[i].syscall; i++) {
+ ebpf_sync_syscalls_t *w = &local_syscalls[i];
+ if (w->probe_links) {
+ freez(w->kernel_info.map_fd);
+
+ struct bpf_program *prog;
+ size_t j = 0 ;
+ bpf_object__for_each_program(prog, w->objects) {
+ bpf_link__destroy(w->probe_links[j]);
+ j++;
+ }
+ bpf_object__close(w->objects);
+ }
+ }
+}
+
+/**
* Clean up the main thread.
*
* @param ptr thread data.
@@ -140,15 +260,8 @@ static void ebpf_sync_cleanup(void *ptr)
UNUSED(dt);
}
+ ebpf_sync_cleanup_objects();
freez(sync_threads.thread);
-
- struct bpf_program *prog;
- size_t i = 0 ;
- bpf_object__for_each_program(prog, objects) {
- bpf_link__destroy(probe_links[i]);
- i++;
- }
- bpf_object__close(objects);
}
/*****************************************************************
@@ -158,17 +271,76 @@ static void ebpf_sync_cleanup(void *ptr)
*****************************************************************/
/**
+ * Create Sync charts
+ *
+ * Create charts and dimensions according user input.
+ *
+ * @param id chart id
+ * @param title chart title
+ * @param order order number of the specified chart
+ * @param idx the first index with data.
+ * @param end the last index with data.
+ */
+static void ebpf_create_sync_chart(char *id,
+ char *title,
+ int order,
+ int idx,
+ int end)
+{
+ ebpf_write_chart_cmd(NETDATA_EBPF_MEMORY_GROUP, id, title, EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NULL, order);
+
+ netdata_publish_syscall_t *move = &sync_counter_publish_aggregated[idx];
+
+ while (move && idx <= end) {
+ if (local_syscalls[idx].enabled)
+ ebpf_write_global_dimension(move->name, move->dimension, move->algorithm);
+
+ move = move->next;
+ idx++;
+ }
+}
+
+/**
* Create global charts
*
* Call ebpf_create_chart to create the charts for the collector.
*/
static void ebpf_create_sync_charts()
{
- ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_EBPF_SYNC_CHART,
- "Monitor calls for <a href=\"https://linux.die.net/man/2/sync\">sync(2)</a> syscall.",
- EBPF_COMMON_DIMENSION_CALL, NETDATA_EBPF_SYNC_SUBMENU, NULL,
- NETDATA_EBPF_CHART_TYPE_LINE, 21300,
- ebpf_create_global_dimension, &sync_counter_publish_aggregated, 1);
+ if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled)
+ ebpf_create_sync_chart(NETDATA_EBPF_FILE_SYNC_CHART,
+ "Monitor calls for <code>fsync(2)</code> and <code>fdatasync(2)</code>.", 21300,
+ NETDATA_SYNC_FSYNC_IDX, NETDATA_SYNC_FDATASYNC_IDX);
+
+ if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled)
+ ebpf_create_sync_chart(NETDATA_EBPF_MSYNC_CHART,
+ "Monitor calls for <code>msync(2)</code>.", 21301,
+ NETDATA_SYNC_MSYNC_IDX, NETDATA_SYNC_MSYNC_IDX);
+
+ if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled || local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled)
+ ebpf_create_sync_chart(NETDATA_EBPF_SYNC_CHART,
+ "Monitor calls for <code>sync(2)</code> and <code>syncfs(2)</code>.", 21302,
+ NETDATA_SYNC_SYNC_IDX, NETDATA_SYNC_SYNCFS_IDX);
+
+ if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled)
+ ebpf_create_sync_chart(NETDATA_EBPF_FILE_SEGMENT_CHART,
+ "Monitor calls for <code>sync_file_range(2)</code>.", 21303,
+ NETDATA_SYNC_SYNC_FILE_RANGE_IDX, NETDATA_SYNC_SYNC_FILE_RANGE_IDX);
+}
+
+/**
+ * Parse Syscalls
+ *
+ * Parse syscall options available inside ebpf.d/sync.conf
+ */
+static void ebpf_sync_parse_syscalls()
+{
+ int i;
+ for (i = 0; local_syscalls[i].syscall; i++) {
+ local_syscalls[i].enabled = appconfig_get_boolean(&sync_config, NETDATA_SYNC_CONFIG_NAME,
+ local_syscalls[i].syscall, CONFIG_BOOLEAN_YES);
+ }
}
/**
@@ -188,25 +360,22 @@ void *ebpf_sync_thread(void *ptr)
fill_ebpf_data(&sync_data);
ebpf_update_module(em, &sync_config, NETDATA_SYNC_CONFIG_FILE);
+ ebpf_sync_parse_syscalls();
if (!em->enabled)
goto endsync;
- if (ebpf_update_kernel(&sync_data)) {
- pthread_mutex_unlock(&lock);
- goto endsync;
- }
-
- probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects, sync_data.map_fd);
- if (!probe_links) {
+ if (ebpf_sync_initialize_syscall(em)) {
pthread_mutex_unlock(&lock);
goto endsync;
}
- int algorithm = NETDATA_EBPF_INCREMENTAL_IDX;
- ebpf_global_labels(&sync_counter_aggregated_data, &sync_counter_publish_aggregated,
+ int algorithms[NETDATA_SYNC_IDX_END] = { NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX,
+ NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX,
+ NETDATA_EBPF_INCREMENTAL_IDX, NETDATA_EBPF_INCREMENTAL_IDX };
+ ebpf_global_labels(sync_counter_aggregated_data, sync_counter_publish_aggregated,
sync_counter_dimension_name, sync_counter_dimension_name,
- &algorithm, NETDATA_SYNC_END);
+ algorithms, NETDATA_SYNC_IDX_END);
pthread_mutex_lock(&lock);
ebpf_create_sync_charts();
diff --git a/collectors/ebpf.plugin/ebpf_sync.h b/collectors/ebpf.plugin/ebpf_sync.h
index bec7a428a8..458318218b 100644
--- a/collectors/ebpf.plugin/ebpf_sync.h
+++ b/collectors/ebpf.plugin/ebpf_sync.h
@@ -5,12 +5,38 @@
// charts
#define NETDATA_EBPF_SYNC_CHART "sync"
+#define NETDATA_EBPF_MSYNC_CHART "memory_map"
+#define NETDATA_EBPF_FILE_SYNC_CHART "file_sync"
+#define NETDATA_EBPF_FILE_SEGMENT_CHART "file_segment"
#define NETDATA_EBPF_SYNC_SUBMENU "synchronization (eBPF)"
#define NETDATA_EBPF_SYNC_SLEEP_MS 800000ULL
// configuration file
#define NETDATA_SYNC_CONFIG_FILE "sync.conf"
+#define NETDATA_SYNC_CONFIG_NAME "syscalls"
+
+enum sync_syscalls_index {
+ NETDATA_SYNC_SYNC_IDX,
+ NETDATA_SYNC_SYNCFS_IDX,
+ NETDATA_SYNC_MSYNC_IDX,
+ NETDATA_SYNC_FSYNC_IDX,
+ NETDATA_SYNC_FDATASYNC_IDX,
+ NETDATA_SYNC_SYNC_FILE_RANGE_IDX,
+
+ NETDATA_SYNC_IDX_END
+};
+
+typedef struct ebpf_sync_syscalls {
+ char *syscall;
+ int enabled;
+ uint32_t flags;
+
+ struct bpf_object *objects;
+ struct bpf_link **probe_links;
+
+ ebpf_data_t kernel_info;
+} ebpf_sync_syscalls_t;
enum netdata_sync_charts {
NETDATA_SYNC_CALL,
diff --git a/packaging/ebpf.checksums b/packaging/ebpf.checksums
index de89202a0e..d8fdbb699e 100644
--- a/packaging/ebpf.checksums
+++ b/packaging/ebpf.checksums
@@ -1,3 +1,3 @@
-fd93c6cda92c1eda6bedb384dd79095bb1ba7e87493153ec455efa9cb6365b48 netdata-kernel-collector-glibc-v0.6.0.tar.xz
-a19e776eb714f2ed08eb4e43bfd9798a06fad8f1b15421e18b22a36583f41a6f netdata-kernel-collector-musl-v0.6.0.tar.xz
-582b2c5dec077266c5f993f3a735cc46f09448cd6b24e6397daafde97616b113 netdata-kernel-collector-static-v0.6.0.tar.xz
+380e31fe143e7b53bcebaaf03a04d143ae82e13318b264461ebb5d3ac9026ae5 netdata-kernel-collector-glibc-v0.6.1.tar.xz
+5a196ab8a00d307a4f6a5c213178bd62e5720173f433afc6e77dfa911fb6ca56 netdata-kernel-collector-musl-v0.6.1.tar.xz
+683e6676c1eee0cd4a7da5be953e94052e780de1ca375146a488d62593220c46 netdata-kernel-collector-static-v0.6.1.tar.xz
diff --git a/packaging/ebpf.version b/packaging/ebpf.version
index 60f6343282..14909610e6 100644
--- a/packaging/ebpf.version
+++ b/packaging/ebpf.version
@@ -1 +1 @@
-v0.6.0
+v0.6.1
diff --git a/web/gui/dashboard_info.js b/web/gui/dashboard_info.js
index 6437265cdd..bf76855eb5 100644
--- a/web/gui/dashboard_info.js
+++ b/web/gui/dashboard_info.js
@@ -1035,7 +1035,19 @@ netdataDashboard.context = {
},
'mem.sync': {
- info: 'System calls for <code>sync()</code> which flushes file system buffers to storage devices. These calls can cause performance perturbations, and it can be useful to know if they are happening and how frequently. Based on the eBPF <a href="https://github.com/iovisor/bcc/blob/master/tools/syncsnoop.py" target="_blank">syncsnoop</a> from BCC tools.'
+ info: 'System calls for <a href="https://man7.org/linux/man-pages/man2/sync.2.html" target="_blank">sync() and syncfs()</a> which flush the file system buffers to storage devices. Performance perturbations might be caused by these calls. The <code>sync()</code> calls are based on the eBPF <a href="https://github.com/iovisor/bcc/blob/master/tools/syncsnoop.py" target="_blank">syncsnoop</a> from BCC tools.'
+ },
+
+ 'mem.file_sync': {
+ info: 'System calls for <a href="https://man7.org/linux/man-pages/man2/fsync.2.html" target="_blank">fsync() and fdatasync()</a> transfer all modified page caches for the files on disk devices. These calls block until the device reports that the transfer has been completed.'
+ },
+
+ 'mem.memory_map': {
+ info: 'System calls for <a href="https://man7.org/linux/man-pages/man2/msync.2.html" target="_blank">msync()</a> which flushes changes made to the in-core copy of a file that was mapped.'
+ },
+
+ 'mem.file_segment': {
+ info: 'System calls for <a href="https://man7.org/linux/man-pages/man2/sync_file_range.2.html" target="_blank">sync_file_range()</a> permits fine control when synchronizing the open file referred to by the file descriptor fd with disk. This system call is extremely dangerous and should not be used in portable programs.'
},
// ------------------------------------------------------------------------