summaryrefslogtreecommitdiffstats
path: root/collectors
diff options
context:
space:
mode:
authorthiagoftsm <thiagoftsm@gmail.com>2021-04-15 18:23:55 +0000
committerGitHub <noreply@github.com>2021-04-15 18:23:55 +0000
commitfa12e3bb0f0cc368b97fd59e4b99073ae647d976 (patch)
treee35374faac6f190e3a342a43020400ede78012ad /collectors
parentd416099ffb8ad1d15cf77aa6b4a40e5f3b855ee3 (diff)
Bring flexible adjust for eBPF hash tables (#10962)
Give possibility for users to set hash table size.
Diffstat (limited to 'collectors')
-rw-r--r--collectors/ebpf.plugin/README.md21
-rw-r--r--collectors/ebpf.plugin/ebpf.c35
-rw-r--r--collectors/ebpf.plugin/ebpf.d.conf4
-rw-r--r--collectors/ebpf.plugin/ebpf.d/cachestat.conf2
-rw-r--r--collectors/ebpf.plugin/ebpf.d/network.conf11
-rw-r--r--collectors/ebpf.plugin/ebpf.d/process.conf2
-rw-r--r--collectors/ebpf.plugin/ebpf.h1
-rw-r--r--collectors/ebpf.plugin/ebpf_cachestat.c6
-rw-r--r--collectors/ebpf.plugin/ebpf_process.c6
-rw-r--r--collectors/ebpf.plugin/ebpf_socket.c35
-rw-r--r--collectors/ebpf.plugin/ebpf_socket.h15
11 files changed, 124 insertions, 14 deletions
diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md
index c264b3f5f6..fb5c8b8d90 100644
--- a/collectors/ebpf.plugin/README.md
+++ b/collectors/ebpf.plugin/README.md
@@ -149,6 +149,7 @@ accepts the following values: ​
new charts for the return of these functions, such as errors. Monitoring function returns can help in debugging
software, such as failing to close file descriptors or creating zombie processes.
- `update every`: Number of seconds used for eBPF to send data for Netdata.
+- `pid table size`: Defines the maximum number of PIDs stored inside the application hash table.
#### Integration with `apps.plugin`
@@ -187,6 +188,11 @@ If you want to _disable_ the integration with `apps.plugin` along with the above
apps = yes
```
+When the integration is enabled, eBPF collector allocates memory for each process running. The total
+ allocated memory has direct relationship with the kernel version. When the eBPF plugin is running on kernels newer than `4.15`,
+ it uses per-cpu maps to speed up the update of hash tables. This also implies storing data for the same PID
+ for each processor it runs.
+
#### `[ebpf programs]`
The eBPF collector enables and runs the following eBPF programs by default:
@@ -347,13 +353,16 @@ mount these filesystems on startup. More information can be found in the [ftrace
## Performance
-Because eBPF monitoring is complex, we are evaluating the performance of this new collector in various real-world
-conditions, across various system loads, and when monitoring complex applications.
+eBPF monitoring is complex and produces a large volume of metrics. We've discovered scenarios where the eBPF plugin
+significantly increases kernel memory usage by several hundred MB.
+
+If your node is experiencing high memory usage and there is no obvious culprit to be found in the `apps.mem` chart,
+consider testing for high kernel memory usage by [disabling eBPF monitoring](#configuration). Next,
+[restart Netdata](/docs/configure/start-stop-restart.md) with `sudo systemctl restart netdata` to see if system
+memory usage (see the `system.ram` chart) has dropped significantly.
-Our [initial testing](https://github.com/netdata/netdata/issues/8195) shows the performance of the eBPF collector is
-nearly identical to our [apps.plugin collector](/collectors/apps.plugin/README.md), despite collecting and displaying
-much more sophisticated metrics. You can now use the eBPF to gather deeper insights without affecting the performance of
-your complex applications at any load.
+Beginning with `v1.31`, kernel memory usage is configurable via the [`pid table size` setting](#ebpf-load-mode)
+in `ebpf.conf`.
## SELinux
diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c
index fd5341bffd..838d213e6e 100644
--- a/collectors/ebpf.plugin/ebpf.c
+++ b/collectors/ebpf.plugin/ebpf.c
@@ -77,19 +77,22 @@ pthread_cond_t collect_data_cond_var;
ebpf_module_t ebpf_modules[] = {
{ .thread_name = "process", .config_name = "process", .enabled = 0, .start_routine = ebpf_process_thread,
.update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY,
- .optional = 0, .apps_routine = ebpf_process_create_apps_charts },
+ .optional = 0, .apps_routine = ebpf_process_create_apps_charts, .maps = NULL,
+ .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE},
{ .thread_name = "socket", .config_name = "socket", .enabled = 0, .start_routine = ebpf_socket_thread,
.update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY,
- .optional = 0, .apps_routine = ebpf_socket_create_apps_charts },
+ .optional = 0, .apps_routine = ebpf_socket_create_apps_charts, .maps = NULL,
+ .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE},
{ .thread_name = "cachestat", .config_name = "cachestat", .enabled = 0, .start_routine = ebpf_cachestat_thread,
- .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY,
- .optional = 0, .apps_routine = ebpf_cachestat_create_apps_charts },
+ .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY,
+ .optional = 0, .apps_routine = ebpf_cachestat_create_apps_charts, .maps = NULL,
+ .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE},
{ .thread_name = "sync", .config_name = "sync", .enabled = 0, .start_routine = ebpf_sync_thread,
- .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY,
- .optional = 0, .apps_routine = NULL },
+ .update_time = 1, .global_charts = 1, .apps_charts = 1, .mode = MODE_ENTRY,
+ .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE },
{ .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_time = 1,
.global_charts = 0, .apps_charts = 1, .mode = MODE_ENTRY,
- .optional = 0, .apps_routine = NULL },
+ .optional = 0, .apps_routine = NULL, .maps = NULL, .pid_map_size = 0 },
};
// Link with apps.plugin
@@ -774,6 +777,22 @@ static void ebpf_update_interval()
}
/**
+ * Update PID table size
+ *
+ * Update default size with value from user
+ */
+static void ebpf_update_table_size()
+{
+ int i;
+ uint32_t value = (uint32_t) appconfig_get_number(&collector_config, EBPF_GLOBAL_SECTION,
+ EBPF_CFG_PID_SIZE, ND_EBPF_DEFAULT_PID_SIZE);
+ for (i = 0; ebpf_modules[i].thread_name; i++) {
+ ebpf_modules[i].pid_map_size = value;
+ }
+}
+
+
+/**
* Read collector values
*
* @param disable_apps variable to store information related to apps.
@@ -793,6 +812,8 @@ static void read_collector_values(int *disable_apps)
ebpf_update_interval();
+ ebpf_update_table_size();
+
// This is kept to keep compatibility
uint32_t enabled = appconfig_get_boolean(&collector_config, EBPF_GLOBAL_SECTION, "disable apps",
CONFIG_BOOLEAN_NO);
diff --git a/collectors/ebpf.plugin/ebpf.d.conf b/collectors/ebpf.plugin/ebpf.d.conf
index 7191d7416c..4f436790d6 100644
--- a/collectors/ebpf.plugin/ebpf.d.conf
+++ b/collectors/ebpf.plugin/ebpf.d.conf
@@ -11,10 +11,14 @@
# 'no'.
#
# The `update every` option defines the number of seconds used to read data from kernel and send to netdata
+#
+# The `pid table size` defines the maximum number of PIDs stored in the application hash tables.
+#
[global]
ebpf load mode = entry
apps = yes
update every = 1
+ pid table size = 32768
#
# eBPF Programs
diff --git a/collectors/ebpf.plugin/ebpf.d/cachestat.conf b/collectors/ebpf.plugin/ebpf.d/cachestat.conf
index 78277cf560..0c4d991dfd 100644
--- a/collectors/ebpf.plugin/ebpf.d/cachestat.conf
+++ b/collectors/ebpf.plugin/ebpf.d/cachestat.conf
@@ -7,8 +7,10 @@
# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to
# 'no'.
#
+# The `pid table size` defines the maximum number of PIDs stored inside the application hash table.
#
[global]
ebpf load mode = entry
apps = yes
update every = 2
+ pid table size = 32768
diff --git a/collectors/ebpf.plugin/ebpf.d/network.conf b/collectors/ebpf.plugin/ebpf.d/network.conf
index b033bc39c0..6bbd49a497 100644
--- a/collectors/ebpf.plugin/ebpf.d/network.conf
+++ b/collectors/ebpf.plugin/ebpf.d/network.conf
@@ -7,11 +7,20 @@
# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to
# 'no'.
#
-#
+# The following options change the hash table size:
+# `bandwidth table size`: Maximum number of connections monitored
+# `ipv4 connection table size`: Maximum number of IPV4 connections monitored
+# `ipv6 connection table size`: Maximum number of IPV6 connections monitored
+# `udp connection table size`: Maximum number of UDP connections monitored
+#
[global]
ebpf load mode = entry
apps = yes
update every = 1
+ bandwidth table size = 16384
+ ipv4 connection table size = 16384
+ ipv6 connection table size = 16384
+ udp connection table size = 4096
#
# Network Connection
diff --git a/collectors/ebpf.plugin/ebpf.d/process.conf b/collectors/ebpf.plugin/ebpf.d/process.conf
index 7806dc8443..511da95adc 100644
--- a/collectors/ebpf.plugin/ebpf.d/process.conf
+++ b/collectors/ebpf.plugin/ebpf.d/process.conf
@@ -7,8 +7,10 @@
# If you want to disable the integration with `apps.plugin` along with the above charts, change the setting `apps` to
# 'no'.
#
+# The `pid table size` defines the maximum number of PIDs stored inside the hash table.
#
[global]
ebpf load mode = entry
apps = yes
update every = 1
+ pid table size = 32768
diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h
index 66a00a9dd8..59cb1f2359 100644
--- a/collectors/ebpf.plugin/ebpf.h
+++ b/collectors/ebpf.plugin/ebpf.h
@@ -215,6 +215,7 @@ extern void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr);
extern void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *root);
extern void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long long v1);
extern collected_number get_value_from_structure(char *basis, size_t offset);
+extern void ebpf_update_pid_table(ebpf_local_maps_t *pid, ebpf_module_t *em);
#define EBPF_MAX_SYNCHRONIZATION_TIME 300
diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c
index 6f5fd26bad..4f8121caa9 100644
--- a/collectors/ebpf.plugin/ebpf_cachestat.c
+++ b/collectors/ebpf.plugin/ebpf_cachestat.c
@@ -24,6 +24,10 @@ struct netdata_static_thread cachestat_threads = {"CACHESTAT KERNEL",
NULL, NULL, 1, NULL,
NULL, NULL};
+static ebpf_local_maps_t cachestat_maps[] = {{.name = "cstat_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE,
+ .user_input = 0},
+ {.name = NULL, .internal_input = 0, .user_input = 0}};
+
static int *map_fd = NULL;
struct config cachestat_config = { .first_section = NULL,
@@ -608,9 +612,11 @@ void *ebpf_cachestat_thread(void *ptr)
netdata_thread_cleanup_push(ebpf_cachestat_cleanup, ptr);
ebpf_module_t *em = (ebpf_module_t *)ptr;
+ em->maps = cachestat_maps;
fill_ebpf_data(&cachestat_data);
ebpf_update_module(em, &cachestat_config, NETDATA_CACHESTAT_CONFIG_FILE);
+ ebpf_update_pid_table(&cachestat_maps[0], em);
if (!em->enabled)
goto endcachestat;
diff --git a/collectors/ebpf.plugin/ebpf_process.c b/collectors/ebpf.plugin/ebpf_process.c
index 8d5714ef82..9b15c84074 100644
--- a/collectors/ebpf.plugin/ebpf_process.c
+++ b/collectors/ebpf.plugin/ebpf_process.c
@@ -18,6 +18,10 @@ static char *process_id_names[NETDATA_KEY_PUBLISH_PROCESS_END] = { "do_sys_open"
"release_task", "_do_fork", "sys_clone" };
static char *status[] = { "process", "zombie" };
+static ebpf_local_maps_t process_maps[] = {{.name = "tbl_pid_stats", .internal_input = ND_EBPF_DEFAULT_PID_SIZE,
+ .user_input = 0},
+ {.name = NULL, .internal_input = 0, .user_input = 0}};
+
static netdata_idx_t *process_hash_values = NULL;
static netdata_syscall_stat_t process_aggregated_data[NETDATA_KEY_PUBLISH_PROCESS_END];
static netdata_publish_syscall_t process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_END];
@@ -1029,6 +1033,7 @@ void *ebpf_process_thread(void *ptr)
netdata_thread_cleanup_push(ebpf_process_cleanup, ptr);
ebpf_module_t *em = (ebpf_module_t *)ptr;
+ em->maps = process_maps;
process_enabled = em->enabled;
fill_ebpf_data(&process_data);
@@ -1041,6 +1046,7 @@ void *ebpf_process_thread(void *ptr)
}
ebpf_update_module(em, &process_config, NETDATA_PROCESS_CONFIG_FILE);
+ ebpf_update_pid_table(&process_maps[0], em);
set_local_pointers();
probe_links = ebpf_load_program(ebpf_plugin_dir, em, kernel_string, &objects, process_data.map_fd);
diff --git a/collectors/ebpf.plugin/ebpf_socket.c b/collectors/ebpf.plugin/ebpf_socket.c
index 075426b52b..cbb4dded08 100644
--- a/collectors/ebpf.plugin/ebpf_socket.c
+++ b/collectors/ebpf.plugin/ebpf_socket.c
@@ -16,6 +16,20 @@ static char *socket_dimension_names[NETDATA_MAX_SOCKET_VECTOR] = { "sent", "rece
static char *socket_id_names[NETDATA_MAX_SOCKET_VECTOR] = { "tcp_sendmsg", "tcp_cleanup_rbuf", "tcp_close",
"udp_sendmsg", "udp_recvmsg", "tcp_retransmit_skb" };
+static ebpf_local_maps_t socket_maps[] = {{.name = "tbl_bandwidth",
+ .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED,
+ .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED},
+ {.name = "tbl_conn_ipv4",
+ .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED,
+ .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED},
+ {.name = "tbl_conn_ipv6",
+ .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED,
+ .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED},
+ {.name = "tbl_nv_udp_conn_stats",
+ .internal_input = NETDATA_COMPILED_UDP_CONNECTIONS_ALLOWED,
+ .user_input = NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED},
+ {.name = NULL, .internal_input = 0, .user_input = 0}};
+
static netdata_idx_t *socket_hash_values = NULL;
static netdata_syscall_stat_t socket_aggregated_data[NETDATA_MAX_SOCKET_VECTOR];
static netdata_publish_syscall_t socket_publish_aggregated[NETDATA_MAX_SOCKET_VECTOR];
@@ -2807,6 +2821,25 @@ void parse_service_name_section(struct config *cfg)
}
}
+void parse_table_size_options(struct config *cfg)
+{
+ socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].user_input = (uint32_t) appconfig_get_number(cfg,
+ EBPF_GLOBAL_SECTION,
+ EBPF_CONFIG_BANDWIDTH_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED);
+
+ socket_maps[NETDATA_SOCKET_TABLE_IPV4].user_input = (uint32_t) appconfig_get_number(cfg,
+ EBPF_GLOBAL_SECTION,
+ EBPF_CONFIG_IPV4_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED);
+
+ socket_maps[NETDATA_SOCKET_TABLE_IPV6].user_input = (uint32_t) appconfig_get_number(cfg,
+ EBPF_GLOBAL_SECTION,
+ EBPF_CONFIG_IPV6_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED);
+
+ socket_maps[NETDATA_SOCKET_TABLE_UDP].user_input = (uint32_t) appconfig_get_number(cfg,
+ EBPF_GLOBAL_SECTION,
+ EBPF_CONFIG_UDP_SIZE, NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED);
+}
+
/**
* Socket thread
*
@@ -2826,11 +2859,13 @@ void *ebpf_socket_thread(void *ptr)
avl_init_lock(&outbound_vectors.tree, compare_sockets);
ebpf_module_t *em = (ebpf_module_t *)ptr;
+ em->maps = socket_maps;
fill_ebpf_data(&socket_data);
ebpf_update_module(em, &socket_config, NETDATA_NETWORK_CONFIG_FILE);
parse_network_viewer_section(&socket_config);
parse_service_name_section(&socket_config);
+ parse_table_size_options(&socket_config);
if (!em->enabled)
goto endsocket;
diff --git a/collectors/ebpf.plugin/ebpf_socket.h b/collectors/ebpf.plugin/ebpf_socket.h
index 832a69b076..8dd422507f 100644
--- a/collectors/ebpf.plugin/ebpf_socket.h
+++ b/collectors/ebpf.plugin/ebpf_socket.h
@@ -24,8 +24,19 @@
#define EBPF_CONFIG_RESOLVE_SERVICE "resolve service names"
#define EBPF_CONFIG_PORTS "ports"
#define EBPF_CONFIG_HOSTNAMES "hostnames"
+#define EBPF_CONFIG_BANDWIDTH_SIZE "bandwidth table size"
+#define EBPF_CONFIG_IPV4_SIZE "ipv4 connection table size"
+#define EBPF_CONFIG_IPV6_SIZE "ipv6 connection table size"
+#define EBPF_CONFIG_UDP_SIZE "udp connection table size"
#define EBPF_MAXIMUM_DIMENSIONS "maximum dimensions"
+enum ebpf_socket_table_list {
+ NETDATA_SOCKET_TABLE_BANDWIDTH,
+ NETDATA_SOCKET_TABLE_IPV4,
+ NETDATA_SOCKET_TABLE_IPV6,
+ NETDATA_SOCKET_TABLE_UDP
+};
+
enum ebpf_socket_publish_index {
NETDATA_IDX_TCP_SENDMSG,
NETDATA_IDX_TCP_CLEANUP_RBUF,
@@ -94,6 +105,10 @@ typedef enum ebpf_socket_idx {
// Port range
#define NETDATA_MINIMUM_PORT_VALUE 1
#define NETDATA_MAXIMUM_PORT_VALUE 65535
+#define NETDATA_COMPILED_CONNECTIONS_ALLOWED 65535U
+#define NETDATA_MAXIMUM_CONNECTIONS_ALLOWED 16384U
+#define NETDATA_COMPILED_UDP_CONNECTIONS_ALLOWED 8192U
+#define NETDATA_MAXIMUM_UDP_CONNECTIONS_ALLOWED 4096U
#define NETDATA_MINIMUM_IPV4_CIDR 0
#define NETDATA_MAXIMUM_IPV4_CIDR 32