summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-08-13 10:07:23 -0700
committerDavid S. Miller <davem@davemloft.net>2018-08-13 10:07:23 -0700
commitc1617fb4c5eea2ad38b5ffb937a732dbb137117f (patch)
tree7331bdc1fd66799533d2339816c688ded60e0475 /tools
parent961d9735357ec59be3e3e2c37c0ca6494f04461b (diff)
parent2ce3206b9eb3943de09f3bf4ec9134568420d8b9 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2018-08-13 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Add driver XDP support for veth. This can be used in conjunction with redirect of another XDP program e.g. sitting on NIC so the xdp_frame can be forwarded to the peer veth directly without modification, from Toshiaki. 2) Add a new BPF map type REUSEPORT_SOCKARRAY and prog type SK_REUSEPORT in order to provide more control and visibility on where a SO_REUSEPORT sk should be located, and the latter enables to directly select a sk from the bpf map. This also enables map-in-map for application migration use cases, from Martin. 3) Add a new BPF helper bpf_skb_ancestor_cgroup_id() that returns the id of cgroup v2 that is the ancestor of the cgroup associated with the skb at the ancestor_level, from Andrey. 4) Implement BPF fs map pretty-print support based on BTF data for regular hash table and LRU map, from Yonghong. 5) Decouple the ability to attach BTF for a map from the key and value pretty-printer in BPF fs, and enable further support of BTF for maps for percpu and LPM trie, from Daniel. 6) Implement a better BPF sample of using XDP's CPU redirect feature for load balancing SKB processing to remote CPU. The sample implements the same XDP load balancing as Suricata does which is symmetric hash based on IP and L4 protocol, from Jesper. 7) Revert adding NULL pointer check with WARN_ON_ONCE() in __xdp_return()'s critical path as it is ensured that the allocator is present, from Björn. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'tools')
-rw-r--r--tools/include/uapi/linux/bpf.h56
-rw-r--r--tools/lib/bpf/bpf.c1
-rw-r--r--tools/lib/bpf/bpf.h1
-rw-r--r--tools/lib/bpf/libbpf.c1
-rw-r--r--tools/testing/selftests/bpf/Makefile11
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h8
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h4
-rw-r--r--tools/testing/selftests/bpf/test_align.c5
-rw-r--r--tools/testing/selftests/bpf/test_btf.c92
-rw-r--r--tools/testing/selftests/bpf/test_maps.c262
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport.c688
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport_common.h36
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport_kern.c180
-rwxr-xr-xtools/testing/selftests/bpf/test_skb_cgroup_id.sh62
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c47
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_user.c187
-rw-r--r--tools/testing/selftests/bpf/test_sock.c5
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c5
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c5
19 files changed, 1615 insertions, 41 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dd5758dc35d3..66917a4eba27 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -126,6 +126,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_XSKMAP,
BPF_MAP_TYPE_SOCKHASH,
BPF_MAP_TYPE_CGROUP_STORAGE,
+ BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
};
enum bpf_prog_type {
@@ -150,6 +151,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_PROG_TYPE_LWT_SEG6LOCAL,
BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_PROG_TYPE_SK_REUSEPORT,
};
enum bpf_attach_type {
@@ -2091,6 +2093,24 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
* u64 bpf_get_current_cgroup_id(void)
* Return
* A 64-bit integer containing the current cgroup id based
@@ -2113,6 +2133,14 @@ union bpf_attr {
* the shared data.
* Return
* Pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map
+ * It checks the selected sk is matching the incoming
+ * request in the skb.
+ * Return
+ * 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2196,7 +2224,9 @@ union bpf_attr {
FN(rc_keydown), \
FN(skb_cgroup_id), \
FN(get_current_cgroup_id), \
- FN(get_local_storage),
+ FN(get_local_storage), \
+ FN(sk_select_reuseport), \
+ FN(skb_ancestor_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2413,6 +2443,30 @@ struct sk_msg_md {
__u32 local_port; /* stored in host byte order */
};
+struct sk_reuseport_md {
+ /*
+ * Start of directly accessible data. It begins from
+ * the tcp/udp header.
+ */
+ void *data;
+ void *data_end; /* End of directly accessible data */
+ /*
+ * Total length of packet (starting from the tcp/udp header).
+ * Note that the directly accessible bytes (data_end - data)
+ * could be less than this "len". Those bytes could be
+ * indirectly read by a helper "bpf_skb_load_bytes()".
+ */
+ __u32 len;
+ /*
+ * Eth protocol in the mac header (network byte order). e.g.
+ * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+ */
+ __u32 eth_protocol;
+ __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+ __u32 bind_inany; /* Is sock bound to an INANY address? */
+ __u32 hash; /* A hash of the packet 4 tuples */
+};
+
#define BPF_TAG_SIZE 8
struct bpf_prog_info {
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 9ddc89dae962..60aa4ca8b2c5 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -92,6 +92,7 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
attr.btf_key_type_id = create_attr->btf_key_type_id;
attr.btf_value_type_id = create_attr->btf_value_type_id;
attr.map_ifindex = create_attr->map_ifindex;
+ attr.inner_map_fd = create_attr->inner_map_fd;
return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 0639a30a457d..6f38164b2618 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -39,6 +39,7 @@ struct bpf_create_map_attr {
__u32 btf_key_type_id;
__u32 btf_value_type_id;
__u32 map_ifindex;
+ __u32 inner_map_fd;
};
int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 40211b51427a..2abd0f112627 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1501,6 +1501,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
case BPF_PROG_TYPE_SK_MSG:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_LIRC_MODE2:
+ case BPF_PROG_TYPE_SK_REUSEPORT:
return false;
case BPF_PROG_TYPE_UNSPEC:
case BPF_PROG_TYPE_KPROBE:
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 17a7a5818ee1..fff7fb1285fc 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -23,7 +23,7 @@ $(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
- test_socket_cookie test_cgroup_storage
+ test_socket_cookie test_cgroup_storage test_select_reuseport
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
@@ -34,7 +34,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
- get_cgroup_id_kern.o socket_cookie_prog.o
+ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
+ test_skb_cgroup_id_kern.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@@ -45,10 +46,11 @@ TEST_PROGS := test_kmod.sh \
test_sock_addr.sh \
test_tunnel.sh \
test_lwt_seg6local.sh \
- test_lirc_mode2.sh
+ test_lirc_mode2.sh \
+ test_skb_cgroup_id.sh
# Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
include ../lib.mk
@@ -59,6 +61,7 @@ $(TEST_GEN_PROGS): $(BPFOBJ)
$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_sock: cgroup_helpers.c
$(OUTPUT)/test_sock_addr: cgroup_helpers.c
$(OUTPUT)/test_socket_cookie: cgroup_helpers.c
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 9ba1c72d7cf5..e4be7730222d 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -111,6 +111,8 @@ static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
int size, int flags) =
(void *) BPF_FUNC_skb_get_xfrm_state;
+static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) =
+ (void *) BPF_FUNC_sk_select_reuseport;
static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
(void *) BPF_FUNC_get_stack;
static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
@@ -137,6 +139,10 @@ static unsigned long long (*bpf_get_current_cgroup_id)(void) =
(void *) BPF_FUNC_get_current_cgroup_id;
static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
(void *) BPF_FUNC_get_local_storage;
+static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
+ (void *) BPF_FUNC_skb_cgroup_id;
+static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
+ (void *) BPF_FUNC_skb_ancestor_cgroup_id;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -173,6 +179,8 @@ struct bpf_map_def {
static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
(void *) BPF_FUNC_skb_load_bytes;
+static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) =
+ (void *) BPF_FUNC_skb_load_bytes_relative;
static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
(void *) BPF_FUNC_skb_store_bytes;
static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index d0811b3d6a6f..315a44fa32af 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -44,4 +44,8 @@ static inline unsigned int bpf_num_possible_cpus(void)
name[bpf_num_possible_cpus()]
#define bpf_percpu(name, cpu) name[(cpu)].v
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
#endif /* __BPF_UTIL__ */
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 6b1b302310fe..5f377ec53f2f 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -18,10 +18,7 @@
#include "../../../include/linux/filter.h"
#include "bpf_rlimit.h"
-
-#ifndef ARRAY_SIZE
-# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_util.h"
#define MAX_INSNS 512
#define MAX_MATCHES 16
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index ffdd27737c9e..6b5cfeb7a9cc 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -19,6 +19,7 @@
#include <bpf/btf.h>
#include "bpf_rlimit.h"
+#include "bpf_util.h"
static uint32_t pass_cnt;
static uint32_t error_cnt;
@@ -93,10 +94,6 @@ static int __base_pr(const char *format, ...)
#define MAX_NR_RAW_TYPES 1024
#define BTF_LOG_BUF_SIZE 65535
-#ifndef ARRAY_SIZE
-# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
static struct args {
unsigned int raw_test_num;
unsigned int file_test_num;
@@ -131,6 +128,8 @@ struct btf_raw_test {
__u32 max_entries;
bool btf_load_err;
bool map_create_err;
+ bool ordered_map;
+ bool lossless_map;
int hdr_len_delta;
int type_off_delta;
int str_off_delta;
@@ -2093,8 +2092,7 @@ struct pprint_mapv {
} aenum;
};
-static struct btf_raw_test pprint_test = {
- .descr = "BTF pretty print test #1",
+static struct btf_raw_test pprint_test_template = {
.raw_types = {
/* unsighed char */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1),
@@ -2146,8 +2144,6 @@ static struct btf_raw_test pprint_test = {
},
.str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum",
.str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"),
- .map_type = BPF_MAP_TYPE_ARRAY,
- .map_name = "pprint_test",
.key_size = sizeof(unsigned int),
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
@@ -2155,6 +2151,40 @@ static struct btf_raw_test pprint_test = {
.max_entries = 128 * 1024,
};
+static struct btf_pprint_test_meta {
+ const char *descr;
+ enum bpf_map_type map_type;
+ const char *map_name;
+ bool ordered_map;
+ bool lossless_map;
+} pprint_tests_meta[] = {
+{
+ .descr = "BTF pretty print array",
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "pprint_test_array",
+ .ordered_map = true,
+ .lossless_map = true,
+},
+
+{
+ .descr = "BTF pretty print hash",
+ .map_type = BPF_MAP_TYPE_HASH,
+ .map_name = "pprint_test_hash",
+ .ordered_map = false,
+ .lossless_map = true,
+},
+
+{
+ .descr = "BTF pretty print lru hash",
+ .map_type = BPF_MAP_TYPE_LRU_HASH,
+ .map_name = "pprint_test_lru_hash",
+ .ordered_map = false,
+ .lossless_map = false,
+},
+
+};
+
+
static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
{
v->ui32 = i;
@@ -2166,10 +2196,12 @@ static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
v->aenum = i & 0x03;
}
-static int test_pprint(void)
+static int do_test_pprint(void)
{
- const struct btf_raw_test *test = &pprint_test;
+ const struct btf_raw_test *test = &pprint_test_template;
struct bpf_create_map_attr create_attr = {};
+ unsigned int key, nr_read_elems;
+ bool ordered_map, lossless_map;
int map_fd = -1, btf_fd = -1;
struct pprint_mapv mapv = {};
unsigned int raw_btf_size;
@@ -2178,7 +2210,6 @@ static int test_pprint(void)
char pin_path[255];
size_t line_len = 0;
char *line = NULL;
- unsigned int key;
uint8_t *raw_btf;
ssize_t nread;
int err, ret;
@@ -2251,14 +2282,18 @@ static int test_pprint(void)
goto done;
}
- key = 0;
+ nr_read_elems = 0;
+ ordered_map = test->ordered_map;
+ lossless_map = test->lossless_map;
do {
ssize_t nexpected_line;
+ unsigned int next_key;
- set_pprint_mapv(&mapv, key);
+ next_key = ordered_map ? nr_read_elems : atoi(line);
+ set_pprint_mapv(&mapv, next_key);
nexpected_line = snprintf(expected_line, sizeof(expected_line),
"%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
- key,
+ next_key,
mapv.ui32, mapv.si32,
mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b,
mapv.ui64,
@@ -2281,11 +2316,12 @@ static int test_pprint(void)
}
nread = getline(&line, &line_len, pin_file);
- } while (++key < test->max_entries && nread > 0);
+ } while (++nr_read_elems < test->max_entries && nread > 0);
- if (CHECK(key < test->max_entries,
- "Unexpected EOF. key:%u test->max_entries:%u",
- key, test->max_entries)) {
+ if (lossless_map &&
+ CHECK(nr_read_elems < test->max_entries,
+ "Unexpected EOF. nr_read_elems:%u test->max_entries:%u",
+ nr_read_elems, test->max_entries)) {
err = -1;
goto done;
}
@@ -2314,6 +2350,24 @@ done:
return err;
}
+static int test_pprint(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) {
+ pprint_test_template.descr = pprint_tests_meta[i].descr;
+ pprint_test_template.map_type = pprint_tests_meta[i].map_type;
+ pprint_test_template.map_name = pprint_tests_meta[i].map_name;
+ pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map;
+ pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map;
+
+ err |= count_result(do_test_pprint());
+ }
+
+ return err;
+}
+
static void usage(const char *cmd)
{
fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n",
@@ -2409,7 +2463,7 @@ int main(int argc, char **argv)
err |= test_file();
if (args.pprint_test)
- err |= count_result(test_pprint());
+ err |= test_pprint();
if (args.raw_test || args.get_info_test || args.file_test ||
args.pprint_test)
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 6c253343a6f9..4b7c74f5faa7 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -17,7 +17,8 @@
#include <stdlib.h>
#include <sys/wait.h>
-
+#include <sys/socket.h>
+#include <netinet/in.h>
#include <linux/bpf.h>
#include <bpf/bpf.h>
@@ -26,8 +27,21 @@
#include "bpf_util.h"
#include "bpf_rlimit.h"
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
static int map_flags;
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
+ printf(format); \
+ exit(-1); \
+ } \
+})
+
static void test_hashmap(int task, void *data)
{
long long key, next_key, first_key, value;
@@ -1150,6 +1164,250 @@ static void test_map_wronly(void)
assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
}
+static void prepare_reuseport_grp(int type, int map_fd,
+ __s64 *fds64, __u64 *sk_cookies,
+ unsigned int n)
+{
+ socklen_t optlen, addrlen;
+ struct sockaddr_in6 s6;
+ const __u32 index0 = 0;
+ const int optval = 1;
+ unsigned int i;
+ u64 sk_cookie;
+ __s64 fd64;
+ int err;
+
+ s6.sin6_family = AF_INET6;
+ s6.sin6_addr = in6addr_any;
+ s6.sin6_port = 0;
+ addrlen = sizeof(s6);
+ optlen = sizeof(sk_cookie);
+
+ for (i = 0; i < n; i++) {
+ fd64 = socket(AF_INET6, type, 0);
+ CHECK(fd64 == -1, "socket()",
+ "sock_type:%d fd64:%lld errno:%d\n",
+ type, fd64, errno);
+
+ err = setsockopt(fd64, SOL_SOCKET, SO_REUSEPORT,
+ &optval, sizeof(optval));
+ CHECK(err == -1, "setsockopt(SO_REUSEEPORT)",
+ "err:%d errno:%d\n", err, errno);
+
+ /* reuseport_array does not allow unbound sk */
+ err = bpf_map_update_elem(map_fd, &index0, &fd64,
+ BPF_ANY);
+ CHECK(err != -1 || errno != EINVAL,
+ "reuseport array update unbound sk",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ err = bind(fd64, (struct sockaddr *)&s6, sizeof(s6));
+ CHECK(err == -1, "bind()",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ if (i == 0) {
+ err = getsockname(fd64, (struct sockaddr *)&s6,
+ &addrlen);
+ CHECK(err == -1, "getsockname()",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ }
+
+ err = getsockopt(fd64, SOL_SOCKET, SO_COOKIE, &sk_cookie,
+ &optlen);
+ CHECK(err == -1, "getsockopt(SO_COOKIE)",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ if (type == SOCK_STREAM) {
+ /*
+ * reuseport_array does not allow
+ * non-listening tcp sk.
+ */
+ err = bpf_map_update_elem(map_fd, &index0, &fd64,
+ BPF_ANY);
+ CHECK(err != -1 || errno != EINVAL,
+ "reuseport array update non-listening sk",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ err = listen(fd64, 0);
+ CHECK(err == -1, "listen()",
+ "sock_type:%d, err:%d errno:%d\n",
+ type, err, errno);
+ }
+
+ fds64[i] = fd64;
+ sk_cookies[i] = sk_cookie;
+ }
+}
+
+static void test_reuseport_array(void)
+{
+#define REUSEPORT_FD_IDX(err, last) ({ (err) ? last : !last; })
+
+ const __u32 array_size = 4, index0 = 0, index3 = 3;
+ int types[2] = { SOCK_STREAM, SOCK_DGRAM }, type;
+ __u64 grpa_cookies[2], sk_cookie, map_cookie;
+ __s64 grpa_fds64[2] = { -1, -1 }, fd64 = -1;
+ const __u32 bad_index = array_size;
+ int map_fd, err, t, f;
+ __u32 fds_idx = 0;
+ int fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ sizeof(__u32), sizeof(__u64), array_size, 0);
+ CHECK(map_fd == -1, "reuseport array create",
+ "map_fd:%d, errno:%d\n", map_fd, errno);
+
+ /* Test lookup/update/delete with invalid index */
+ err = bpf_map_delete_elem(map_fd, &bad_index);
+ CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+ "err:%d errno:%d\n", err, errno);
+
+ err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
+ CHECK(err != -1 || errno != E2BIG,
+ "reuseport array update >=max_entries",
+ "err:%d errno:%d\n", err, errno);
+
+ err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array update >=max_entries",
+ "err:%d errno:%d\n", err, errno);
+
+ /* Test lookup/delete non existence elem */
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array lookup not-exist elem",
+ "err:%d errno:%d\n", err, errno);
+ err = bpf_map_delete_elem(map_fd, &index3);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array del not-exist elem",
+ "err:%d errno:%d\n", err, errno);
+
+ for (t = 0; t < ARRAY_SIZE(types); t++) {
+ type = types[t];
+
+ prepare_reuseport_grp(type, map_fd, grpa_fds64,
+ grpa_cookies, ARRAY_SIZE(grpa_fds64));
+
+ /* Test BPF_* update flags */
+ /* BPF_EXIST failure case */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_EXIST);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array update empty elem BPF_EXIST",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_NOEXIST success case */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_NOEXIST);
+ CHECK(err == -1,
+ "reuseport array update empty elem BPF_NOEXIST",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_EXIST success case. */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_EXIST);
+ CHECK(err == -1,
+ "reuseport array update same elem BPF_EXIST",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_NOEXIST failure case */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_NOEXIST);
+ CHECK(err != -1 || errno != EEXIST,
+ "reuseport array update non-empty elem BPF_NOEXIST",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_ANY case (always succeed) */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_ANY);
+ CHECK(err == -1,
+ "reuseport array update same sk with BPF_ANY",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ fd64 = grpa_fds64[fds_idx];
+ sk_cookie = grpa_cookies[fds_idx];
+
+ /* The same sk cannot be added to reuseport_array twice */
+ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
+ CHECK(err != -1 || errno != EBUSY,
+ "reuseport array update same sk with same index",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
+ CHECK(err != -1 || errno != EBUSY,
+ "reuseport array update same sk with different index",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ /* Test delete elem */
+ err = bpf_map_delete_elem(map_fd, &index3);
+ CHECK(err == -1, "reuseport array delete sk",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ /* Add it back with BPF_NOEXIST */
+ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
+ CHECK(err == -1,
+ "reuseport array re-add with BPF_NOEXIST after del",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ /* Test cookie */
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err == -1 || sk_cookie != map_cookie,
+ "reuseport array lookup re-added sk",
+ "sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
+ type, err, errno, sk_cookie, map_cookie);
+
+ /* Test elem removed by close() */
+ for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
+ close(grpa_fds64[f]);
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array lookup after close()",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ }
+
+ /* Test SOCK_RAW */
+ fd64 = socket(AF_INET6, SOCK_RAW, IPPROTO_UDP);
+ CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
+ err, errno);
+ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
+ CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+ "err:%d errno:%d\n", err, errno);
+ close(fd64);
+
+ /* Close the 64 bit value map */
+ close(map_fd);
+
+ /* Test 32 bit fd */
+ map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ sizeof(__u32), sizeof(__u32), array_size, 0);
+ CHECK(map_fd == -1, "reuseport array create",
+ "map_fd:%d, errno:%d\n", map_fd, errno);
+ prepare_reuseport_grp(SOCK_STREAM, map_fd, &fd64, &sk_cookie, 1);
+ fd = fd64;
+ err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
+ CHECK(err == -1, "reuseport array update 32 bit fd",
+ "err:%d errno:%d\n", err, errno);
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err != -1 || errno != ENOSPC,
+ "reuseport array lookup 32 bit fd",
+ "err:%d errno:%d\n", err, errno);
+ close(fd);
+ close(map_fd);
+}
+
static void run_all_tests(void)
{
test_hashmap(0, NULL);
@@ -1170,6 +1428,8 @@ static void run_all_tests(void)
test_map_rdonly();
test_map_wronly();
+
+ test_reuseport_array();
}
int main(void)
diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/test_select_reuseport.c
new file mode 100644
index 000000000000..75646d9b34aa
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport.c
@@ -0,0 +1,688 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "test_select_reuseport_common.h"
+
+#define MIN_TCPHDR_LEN 20
+#define UDPHDR_LEN 8
+
+#define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
+#define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
+#define REUSEPORT_ARRAY_SIZE 32
+
+static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
+static enum result expected_results[NR_RESULTS];
+static int sk_fds[REUSEPORT_ARRAY_SIZE];
+static int reuseport_array, outer_map;
+static int select_by_skb_data_prog;
+static int saved_tcp_syncookie;
+static struct bpf_object *obj;
+static int saved_tcp_fo;
+static __u32 index_zero;
+static int epfd;
+
+static union sa46 {
+ struct sockaddr_in6 v6;
+ struct sockaddr_in v4;
+ sa_family_t family;
+} srv_sa;
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
+ printf(format); \
+ exit(-1); \
+ } \
+})
+
+static void create_maps(void)
+{
+ struct bpf_create_map_attr attr = {};
+
+ /* Creating reuseport_array */
+ attr.name = "reuseport_array";
+ attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
+ attr.key_size = sizeof(__u32);
+ attr.value_size = sizeof(__u32);
+ attr.max_entries = REUSEPORT_ARRAY_SIZE;
+
+ reuseport_array = bpf_create_map_xattr(&attr);
+ CHECK(reuseport_array == -1, "creating reuseport_array",
+ "reuseport_array:%d errno:%d\n", reuseport_array, errno);
+
+ /* Creating outer_map */
+ attr.name = "outer_map";
+ attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
+ attr.key_size = sizeof(__u32);
+ attr.value_size = sizeof(__u32);
+ attr.max_entries = 1;
+ attr.inner_map_fd = reuseport_array;
+ outer_map = bpf_create_map_xattr(&attr);
+ CHECK(outer_map == -1, "creating outer_map",
+ "outer_map:%d errno:%d\n", outer_map, errno);
+}
+
+static void prepare_bpf_obj(void)
+{
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ int err;
+ struct bpf_object_open_attr attr = {
+ .file = "test_select_reuseport_kern.o",
+ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+ };
+
+ obj = bpf_object__open_xattr(&attr);
+ CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
+ "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+
+ prog = bpf_program__next(NULL, obj);
+ CHECK(!prog, "get first bpf_program", "!prog\n");
+ bpf_program__set_type(prog, attr.prog_type);
+
+ map = bpf_object__find_map_by_name(obj, "outer_map");
+ CHECK(!map, "find outer_map", "!map\n");
+ err = bpf_map__reuse_fd(map, outer_map);
+ CHECK(err, "reuse outer_map", "err:%d\n", err);
+
+ err = bpf_object__load(obj);
+ CHECK(err, "load bpf_object", "err:%d\n", err);
+
+ select_by_skb_data_prog = bpf_program__fd(prog);
+ CHECK(select_by_skb_data_prog == -1, "get prog fd",
+ "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
+
+ map = bpf_object__find_map_by_name(obj, "result_map");
+ CHECK(!map, "find result_map", "!map\n");
+ result_map =