From 4178417cc5359c329790a4a8f4a6604612338cca Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Thu, 9 Apr 2020 15:17:52 -0700
Subject: arm, bpf: Fix offset overflow for BPF_MEM BPF_DW

This patch fixes an incorrect check in how immediate memory offsets are
computed for BPF_DW on arm.

For BPF_LDX/ST/STX + BPF_DW, the 32-bit arm JIT breaks down an 8-byte
access into two separate 4-byte accesses using off+0 and off+4. If off
fits in imm12, the JIT emits a ldr/str instruction with the immediate
and avoids the use of a temporary register. While the current check off
<= 0xfff ensures that the first immediate off+0 doesn't overflow imm12,
it's not sufficient for the second immediate off+4, which may cause the
second access of BPF_DW to read/write the wrong address.

This patch fixes the problem by changing the check to
off <= 0xfff - 4 for BPF_DW, ensuring off+4 will never overflow.

A side effect of simplifying the check is that it now allows using
negative immediate offsets in ldr/str. This means that small negative
offsets can also avoid the use of a temporary register.

This patch introduces no new failures in test_verifier or test_bpf.c.

Fixes: c5eae692571d6 ("ARM: net: bpf: improve 64-bit store implementation")
Fixes: ec19e02b343db ("ARM: net: bpf: fix LDX instructions")
Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200409221752.28448-1-luke.r.nels@gmail.com
---
 arch/arm/net/bpf_jit_32.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index d124f78e20ac..bf85d6db4931 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1000,21 +1000,35 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[],
 	arm_bpf_put_reg32(dst_hi, rd[0], ctx);
 }
 
+static bool is_ldst_imm(s16 off, const u8 size)
+{
+	s16 off_max = 0;
+
+	switch (size) {
+	case BPF_B:
+	case BPF_W:
+		off_max = 0xfff;
+		break;
+	case BPF_H:
+		off_max = 0xff;
+		break;
+	case BPF_DW:
+		/* Need to make sure off+4 does not overflow. */
+		off_max = 0xfff - 4;
+		break;
+	}
+	return -off_max <= off && off <= off_max;
+}
+
 /* *(size *)(dst + off) = src */
 static inline void emit_str_r(const s8 dst, const s8 src[],
-			      s32 off, struct jit_ctx *ctx, const u8 sz){
+			      s16 off, struct jit_ctx *ctx, const u8 sz){
 	const s8 *tmp = bpf2a32[TMP_REG_1];
-	s32 off_max;
 	s8 rd;
 
 	rd = arm_bpf_get_reg32(dst, tmp[1], ctx);
 
-	if (sz == BPF_H)
-		off_max = 0xff;
-	else
-		off_max = 0xfff;
-
-	if (off < 0 || off > off_max) {
+	if (!is_ldst_imm(off, sz)) {
 		emit_a32_mov_i(tmp[0], off, ctx);
 		emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx);
 		rd = tmp[0];
@@ -1043,18 +1057,12 @@ static inline void emit_str_r(const s8 dst, const s8 src[],
 
 /* dst = *(size*)(src + off) */
 static inline void emit_ldx_r(const s8 dst[], const s8 src,
-			      s32 off, struct jit_ctx *ctx, const u8 sz){
+			      s16 off, struct jit_ctx *ctx, const u8 sz){
 	const s8 *tmp = bpf2a32[TMP_REG_1];
 	const s8 *rd = is_stacked(dst_lo) ? tmp : dst;
 	s8 rm = src;
-	s32 off_max;
-
-	if (sz == BPF_H)
-		off_max = 0xff;
-	else
-		off_max = 0xfff;
 
-	if (off < 0 || off > off_max) {
+	if (!is_ldst_imm(off, sz)) {
 		emit_a32_mov_i(tmp[0], off, ctx);
 		emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx);
 		rm = tmp[0];
-- 
cgit v1.2.3


From 1f6cb19be2e231fe092f40decb71f066eba090d7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 10 Apr 2020 13:26:12 -0700
Subject: bpf: Prevent re-mmap()'ing BPF map as writable for initially r/o
 mapping

VM_MAYWRITE flag during initial memory mapping determines if already mmap()'ed
pages can be later remapped as writable ones through mprotect() call. To
prevent user application to rewrite contents of memory-mapped as read-only and
subsequently frozen BPF map, remove VM_MAYWRITE flag completely on initially
read-only mapping.

Alternatively, we could treat any memory-mapping on unfrozen map as writable
and bump writecnt instead. But there is little legitimate reason to map
BPF map as read-only and then re-mmap() it as writable through mprotect(),
instead of just mmap()'ing it as read/write from the very beginning.

Also, at the suggestion of Jann Horn, drop unnecessary refcounting in mmap
operations. We can just rely on VMA holding reference to BPF map's file
properly.

Fixes: fc9702273e2e ("bpf: Add mmap() support for BPF_MAP_TYPE_ARRAY")
Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jann Horn <jannh@google.com>
Link: https://lore.kernel.org/bpf/20200410202613.3679837-1-andriin@fb.com
---
 kernel/bpf/syscall.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 64783da34202..d85f37239540 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -586,9 +586,7 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma)
 {
 	struct bpf_map *map = vma->vm_file->private_data;
 
-	bpf_map_inc_with_uref(map);
-
-	if (vma->vm_flags & VM_WRITE) {
+	if (vma->vm_flags & VM_MAYWRITE) {
 		mutex_lock(&map->freeze_mutex);
 		map->writecnt++;
 		mutex_unlock(&map->freeze_mutex);
@@ -600,13 +598,11 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma)
 {
 	struct bpf_map *map = vma->vm_file->private_data;
 
-	if (vma->vm_flags & VM_WRITE) {
+	if (vma->vm_flags & VM_MAYWRITE) {
 		mutex_lock(&map->freeze_mutex);
 		map->writecnt--;
 		mutex_unlock(&map->freeze_mutex);
 	}
-
-	bpf_map_put_with_uref(map);
 }
 
 static const struct vm_operations_struct bpf_map_default_vmops = {
@@ -635,14 +631,16 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
 	/* set default open/close callbacks */
 	vma->vm_ops = &bpf_map_default_vmops;
 	vma->vm_private_data = map;
+	vma->vm_flags &= ~VM_MAYEXEC;
+	if (!(vma->vm_flags & VM_WRITE))
+		/* disallow re-mapping with PROT_WRITE */
+		vma->vm_flags &= ~VM_MAYWRITE;
 
 	err = map->ops->map_mmap(map, vma);
 	if (err)
 		goto out;
 
-	bpf_map_inc_with_uref(map);
-
-	if (vma->vm_flags & VM_WRITE)
+	if (vma->vm_flags & VM_MAYWRITE)
 		map->writecnt++;
 out:
 	mutex_unlock(&map->freeze_mutex);
-- 
cgit v1.2.3


From 642c1654702731ab42a3be771bebbd6ef938f0dc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 10 Apr 2020 13:26:13 -0700
Subject: selftests/bpf: Validate frozen map contents stays frozen

Test that frozen and mmap()'ed BPF map can't be mprotect()'ed as writable or
executable memory. Also validate that "downgrading" from writable to read-only
doesn't screw up internal writable count accounting for the purposes of map
freezing.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200410202613.3679837-2-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/mmap.c | 62 ++++++++++++++++++++++++++-
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 16a814eb4d64..56d80adcf4bd 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -19,15 +19,16 @@ void test_mmap(void)
 	const size_t map_sz = roundup_page(sizeof(struct map_data));
 	const int zero = 0, one = 1, two = 2, far = 1500;
 	const long page_size = sysconf(_SC_PAGE_SIZE);
-	int err, duration = 0, i, data_map_fd;
+	int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd;
 	struct bpf_map *data_map, *bss_map;
 	void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2;
 	struct test_mmap__bss *bss_data;
+	struct bpf_map_info map_info;
+	__u32 map_info_sz = sizeof(map_info);
 	struct map_data *map_data;
 	struct test_mmap *skel;
 	__u64 val = 0;
 
-
 	skel = test_mmap__open_and_load();
 	if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
 		return;
@@ -36,6 +37,14 @@ void test_mmap(void)
 	data_map = skel->maps.data_map;
 	data_map_fd = bpf_map__fd(data_map);
 
+	/* get map's ID */
+	memset(&map_info, 0, map_info_sz);
+	err = bpf_obj_get_info_by_fd(data_map_fd, &map_info, &map_info_sz);
+	if (CHECK(err, "map_get_info", "failed %d\n", errno))
+		goto cleanup;
+	data_map_id = map_info.id;
+
+	/* mmap BSS map */
 	bss_mmaped = mmap(NULL, bss_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
 			  bpf_map__fd(bss_map), 0);
 	if (CHECK(bss_mmaped == MAP_FAILED, "bss_mmap",
@@ -98,6 +107,10 @@ void test_mmap(void)
 		  "data_map freeze succeeded: err=%d, errno=%d\n", err, errno))
 		goto cleanup;
 
+	err = mprotect(map_mmaped, map_sz, PROT_READ);
+	if (CHECK(err, "mprotect_ro", "mprotect to r/o failed %d\n", errno))
+		goto cleanup;
+
 	/* unmap R/W mapping */
 	err = munmap(map_mmaped, map_sz);
 	map_mmaped = NULL;
@@ -111,6 +124,12 @@ void test_mmap(void)
 		map_mmaped = NULL;
 		goto cleanup;
 	}
+	err = mprotect(map_mmaped, map_sz, PROT_WRITE);
+	if (CHECK(!err, "mprotect_wr", "mprotect() succeeded unexpectedly!\n"))
+		goto cleanup;
+	err = mprotect(map_mmaped, map_sz, PROT_EXEC);
+	if (CHECK(!err, "mprotect_ex", "mprotect() succeeded unexpectedly!\n"))
+		goto cleanup;
 	map_data = map_mmaped;
 
 	/* map/unmap in a loop to test ref counting */
@@ -197,6 +216,45 @@ void test_mmap(void)
 	CHECK_FAIL(map_data->val[far] != 3 * 321);
 
 	munmap(tmp2, 4 * page_size);
+
+	tmp1 = mmap(NULL, map_sz, PROT_READ, MAP_SHARED, data_map_fd, 0);
+	if (CHECK(tmp1 == MAP_FAILED, "last_mmap", "failed %d\n", errno))
+		goto cleanup;
+
+	test_mmap__destroy(skel);
+	skel = NULL;
+	CHECK_FAIL(munmap(bss_mmaped, bss_sz));
+	bss_mmaped = NULL;
+	CHECK_FAIL(munmap(map_mmaped, map_sz));
+	map_mmaped = NULL;
+
+	/* map should be still held by active mmap */
+	tmp_fd = bpf_map_get_fd_by_id(data_map_id);
+	if (CHECK(tmp_fd < 0, "get_map_by_id", "failed %d\n", errno)) {
+		munmap(tmp1, map_sz);
+		goto cleanup;
+	}
+	close(tmp_fd);
+
+	/* this should release data map finally */
+	munmap(tmp1, map_sz);
+
+	/* we need to wait for RCU grace period */
+	for (i = 0; i < 10000; i++) {
+		__u32 id = data_map_id - 1;
+		if (bpf_map_get_next_id(id, &id) || id > data_map_id)
+			break;
+		usleep(1);
+	}
+
+	/* should fail to get map FD by non-existing ID */
+	tmp_fd = bpf_map_get_fd_by_id(data_map_id);
+	if (CHECK(tmp_fd >= 0, "get_map_by_id_after",
+		  "unexpectedly succeeded %d\n", tmp_fd)) {
+		close(tmp_fd);
+		goto cleanup;
+	}
+
 cleanup:
 	if (bss_mmaped)
 		CHECK_FAIL(munmap(bss_mmaped, bss_sz));
-- 
cgit v1.2.3


From 96b2eb6e77959b4b52f80e7a61d03db77606aac6 Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Fri, 10 Apr 2020 11:06:12 +0900
Subject: tools, bpftool: Fix struct_ops command invalid pointer free

In commit 65c93628599d ("bpftool: Add struct_ops support") a new
type of command named struct_ops has been added. This command requires
a kernel with CONFIG_DEBUG_INFO_BTF=y set and for retrieving BTF info
in bpftool, the helper get_btf_vmlinux() is used.

When running this command on kernel without BTF debug info, this will
lead to 'btf_vmlinux' variable being an invalid(error) pointer. And by
this, btf_free() causes a segfault when executing 'bpftool struct_ops'.

This commit adds pointer validation with IS_ERR not to free invalid
pointer, and this will fix the segfault issue.

Fixes: 65c93628599d ("bpftool: Add struct_ops support")
Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200410020612.2930667-1-danieltimlee@gmail.com
---
 tools/bpf/bpftool/struct_ops.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index 2a7befbd11ad..0fe0d584c57e 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -591,6 +591,8 @@ int do_struct_ops(int argc, char **argv)
 
 	err = cmd_select(cmds, argc, argv, do_help);
 
-	btf__free(btf_vmlinux);
+	if (!IS_ERR(btf_vmlinux))
+		btf__free(btf_vmlinux);
+
 	return err;
 }
-- 
cgit v1.2.3


From dfa74909cb6b846cbdabfc2c3c7de1d507fca075 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 12 Apr 2020 07:32:04 -0600
Subject: xdp: Reset prog in dev_change_xdp_fd when fd is negative
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The commit mentioned in the Fixes tag reuses the local prog variable
when looking up an expected_fd. The variable is not reset when fd < 0
causing a detach with the expected_fd set to actually call
dev_xdp_install for the existing program. The end result is that the
detach does not happen.

Fixes: 92234c8f15c8 ("xdp: Support specifying expected existing program when attaching XDP")
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20200412133204.43847-1-dsahern@kernel.org
---
 net/core/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index df8097b8e286..522288177bbd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8667,8 +8667,8 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 	const struct net_device_ops *ops = dev->netdev_ops;
 	enum bpf_netdev_command query;
 	u32 prog_id, expected_id = 0;
-	struct bpf_prog *prog = NULL;
 	bpf_op_t bpf_op, bpf_chk;
+	struct bpf_prog *prog;
 	bool offload;
 	int err;
 
@@ -8734,6 +8734,7 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 	} else {
 		if (!prog_id)
 			return 0;
+		prog = NULL;
 	}
 
 	err = dev_xdp_install(dev, bpf_op, extack, flags, prog);
-- 
cgit v1.2.3


From 89f33dcadb349eb926a92633e2c5f61466afc596 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Mon, 13 Apr 2020 19:57:56 +0800
Subject: bpf: remove unneeded conversion to bool in __mark_reg_unknown

This issue was detected by using the Coccinelle software:

  kernel/bpf/verifier.c:1259:16-21: WARNING: conversion to bool not needed here

The conversion to bool is unneeded, remove it.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/1586779076-101346-1-git-send-email-zou_wei@huawei.com
---
 kernel/bpf/verifier.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 04c6630cc18f..38cfcf701eeb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1255,8 +1255,7 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env,
 	reg->type = SCALAR_VALUE;
 	reg->var_off = tnum_unknown;
 	reg->frameno = 0;
-	reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
-		       true : false;
+	reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks;
 	__mark_reg_unbounded(reg);
 }
 
-- 
cgit v1.2.3


From 99e3a236dd43d06c65af0a2ef9cb44306aef6e02 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 14 Apr 2020 09:35:15 +0200
Subject: xsk: Add missing check on user supplied headroom size

Add a check that the headroom cannot be larger than the available
space in the chunk. In the current code, a malicious user can set the
headroom to a value larger than the chunk size minus the fixed XDP
headroom. That way packets with a length larger than the supported
size in the umem could get accepted and result in an out-of-bounds
write.

Fixes: c0c77d8fb787 ("xsk: add user memory registration support sockopt")
Reported-by: Bui Quang Minh <minhquangbui99@gmail.com>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://bugzilla.kernel.org/show_bug.cgi?id=207225
Link: https://lore.kernel.org/bpf/1586849715-23490-1-git-send-email-magnus.karlsson@intel.com
---
 net/xdp/xdp_umem.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index fa7bb5e060d0..ed7a6060f73c 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -343,7 +343,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
 	unsigned int chunks, chunks_per_page;
 	u64 addr = mr->addr, size = mr->len;
-	int size_chk, err;
+	int err;
 
 	if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
 		/* Strictly speaking we could support this, if:
@@ -382,8 +382,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 			return -EINVAL;
 	}
 
-	size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
-	if (size_chk < 0)
+	if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
 		return -EINVAL;
 
 	umem->address = (unsigned long)addr;
-- 
cgit v1.2.3


From 25498a1969bf3687c29c29bbac92821d7a0f8b4a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 14 Apr 2020 11:26:45 -0700
Subject: libbpf: Always specify expected_attach_type on program load if
 supported

For some types of BPF programs that utilize expected_attach_type, libbpf won't
set load_attr.expected_attach_type, even if expected_attach_type is known from
section definition. This was done to preserve backwards compatibility with old
kernels that didn't recognize expected_attach_type attribute yet (which was
added in 5e43f899b03a ("bpf: Check attach type at prog load time"). But this
is problematic for some BPF programs that utilize newer features that require
kernel to know specific expected_attach_type (e.g., extended set of return
codes for cgroup_skb/egress programs).

This patch makes libbpf specify expected_attach_type by default, but also
detect support for this field in kernel and not set it during program load.
This allows to have a good metadata for bpf_program
(e.g., bpf_program__get_extected_attach_type()), but still work with old
kernels (for cases where it can work at all).

Additionally, due to expected_attach_type being always set for recognized
program types, bpf_program__attach_cgroup doesn't have to do extra checks to
determine correct attach type, so remove that additional logic.

Also adjust section_names selftest to account for this change.

More detailed discussion can be found in [0].

  [0] https://lore.kernel.org/bpf/20200412003604.GA15986@rdna-mbp.dhcp.thefacebook.com/

Fixes: 5cf1e9145630 ("bpf: cgroup inet skb programs can return 0 to 3")
Fixes: 5e43f899b03a ("bpf: Check attach type at prog load time")
Reported-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Andrey Ignatov <rdna@fb.com>
Link: https://lore.kernel.org/bpf/20200414182645.1368174-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c                             | 126 ++++++++++++++-------
 .../selftests/bpf/prog_tests/section_names.c       |  42 ++++---
 2 files changed, 109 insertions(+), 59 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ff9174282a8c..8f480e29a6b0 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -178,6 +178,8 @@ struct bpf_capabilities {
 	__u32 array_mmap:1;
 	/* BTF_FUNC_GLOBAL is supported */
 	__u32 btf_func_global:1;
+	/* kernel support for expected_attach_type in BPF_PROG_LOAD */
+	__u32 exp_attach_type:1;
 };
 
 enum reloc_type {
@@ -194,6 +196,22 @@ struct reloc_desc {
 	int sym_off;
 };
 
+struct bpf_sec_def;
+
+typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
+					struct bpf_program *prog);
+
+struct bpf_sec_def {
+	const char *sec;
+	size_t len;
+	enum bpf_prog_type prog_type;
+	enum bpf_attach_type expected_attach_type;
+	bool is_exp_attach_type_optional;
+	bool is_attachable;
+	bool is_attach_btf;
+	attach_fn_t attach_fn;
+};
+
 /*
  * bpf_prog should be a better name but it has been used in
  * linux/filter.h.
@@ -204,6 +222,7 @@ struct bpf_program {
 	char *name;
 	int prog_ifindex;
 	char *section_name;
+	const struct bpf_sec_def *sec_def;
 	/* section_name with / replaced by _; makes recursive pinning
 	 * in bpf_object__pin_programs easier
 	 */
@@ -3315,6 +3334,37 @@ static int bpf_object__probe_array_mmap(struct bpf_object *obj)
 	return 0;
 }
 
+static int
+bpf_object__probe_exp_attach_type(struct bpf_object *obj)
+{
+	struct bpf_load_program_attr attr;
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int fd;
+
+	memset(&attr, 0, sizeof(attr));
+	/* use any valid combination of program type and (optional)
+	 * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
+	 * to see if kernel supports expected_attach_type field for
+	 * BPF_PROG_LOAD command
+	 */
+	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+	attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
+	attr.insns = insns;
+	attr.insns_cnt = ARRAY_SIZE(insns);
+	attr.license = "GPL";
+
+	fd = bpf_load_program_xattr(&attr, NULL, 0);
+	if (fd >= 0) {
+		obj->caps.exp_attach_type = 1;
+		close(fd);
+		return 1;
+	}
+	return 0;
+}
+
 static int
 bpf_object__probe_caps(struct bpf_object *obj)
 {
@@ -3325,6 +3375,7 @@ bpf_object__probe_caps(struct bpf_object *obj)
 		bpf_object__probe_btf_func_global,
 		bpf_object__probe_btf_datasec,
 		bpf_object__probe_array_mmap,
+		bpf_object__probe_exp_attach_type,
 	};
 	int i, ret;
 
@@ -4861,7 +4912,12 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 
 	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
 	load_attr.prog_type = prog->type;
-	load_attr.expected_attach_type = prog->expected_attach_type;
+	/* old kernels might not support specifying expected_attach_type */
+	if (!prog->caps->exp_attach_type && prog->sec_def &&
+	    prog->sec_def->is_exp_attach_type_optional)
+		load_attr.expected_attach_type = 0;
+	else
+		load_attr.expected_attach_type = prog->expected_attach_type;
 	if (prog->caps->name)
 		load_attr.name = prog->name;
 	load_attr.insns = insns;
@@ -5062,6 +5118,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
 	return 0;
 }
 
+static const struct bpf_sec_def *find_sec_def(const char *sec_name);
+
 static struct bpf_object *
 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 		   const struct bpf_object_open_opts *opts)
@@ -5117,24 +5175,17 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 	bpf_object__elf_finish(obj);
 
 	bpf_object__for_each_program(prog, obj) {
-		enum bpf_prog_type prog_type;
-		enum bpf_attach_type attach_type;
-
-		if (prog->type != BPF_PROG_TYPE_UNSPEC)
-			continue;
-
-		err = libbpf_prog_type_by_name(prog->section_name, &prog_type,
-					       &attach_type);
-		if (err == -ESRCH)
+		prog->sec_def = find_sec_def(prog->section_name);
+		if (!prog->sec_def)
 			/* couldn't guess, but user might manually specify */
 			continue;
-		if (err)
-			goto out;
 
-		bpf_program__set_type(prog, prog_type);
-		bpf_program__set_expected_attach_type(prog, attach_type);
-		if (prog_type == BPF_PROG_TYPE_TRACING ||
-		    prog_type == BPF_PROG_TYPE_EXT)
+		bpf_program__set_type(prog, prog->sec_def->prog_type);
+		bpf_program__set_expected_attach_type(prog,
+				prog->sec_def->expected_attach_type);
+
+		if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
+		    prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
 			prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
 	}
 
@@ -6223,23 +6274,32 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
 	prog->expected_attach_type = type;
 }
 
-#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \
-	{ string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype }
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional,	    \
+			  attachable, attach_btf)			    \
+	{								    \
+		.sec = string,						    \
+		.len = sizeof(string) - 1,				    \
+		.prog_type = ptype,					    \
+		.expected_attach_type = eatype,				    \
+		.is_exp_attach_type_optional = eatype_optional,		    \
+		.is_attachable = attachable,				    \
+		.is_attach_btf = attach_btf,				    \
+	}
 
 /* Programs that can NOT be attached. */
 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
 
 /* Programs that can be attached. */
 #define BPF_APROG_SEC(string, ptype, atype) \
-	BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype)
+	BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
 
 /* Programs that must specify expected attach type at load time. */
 #define BPF_EAPROG_SEC(string, ptype, eatype) \
-	BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype)
+	BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
 
 /* Programs that use BTF to identify attach point */
 #define BPF_PROG_BTF(string, ptype, eatype) \
-	BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0)
+	BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
 
 /* Programs that can be attached but attach type can't be identified by section
  * name. Kept for backward compatibility.
@@ -6253,11 +6313,6 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
 	__VA_ARGS__							    \
 }
 
-struct bpf_sec_def;
-
-typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
-					struct bpf_program *prog);
-
 static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
 				      struct bpf_program *prog);
 static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
@@ -6269,17 +6324,6 @@ static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
 				   struct bpf_program *prog);
 
-struct bpf_sec_def {
-	const char *sec;
-	size_t len;
-	enum bpf_prog_type prog_type;
-	enum bpf_attach_type expected_attach_type;
-	bool is_attachable;
-	bool is_attach_btf;
-	enum bpf_attach_type attach_type;
-	attach_fn_t attach_fn;
-};
-
 static const struct bpf_sec_def section_defs[] = {
 	BPF_PROG_SEC("socket",			BPF_PROG_TYPE_SOCKET_FILTER),
 	BPF_PROG_SEC("sk_reuseport",		BPF_PROG_TYPE_SK_REUSEPORT),
@@ -6713,7 +6757,7 @@ int libbpf_attach_type_by_name(const char *name,
 			continue;
 		if (!section_defs[i].is_attachable)
 			return -EINVAL;
-		*attach_type = section_defs[i].attach_type;
+		*attach_type = section_defs[i].expected_attach_type;
 		return 0;
 	}
 	pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
@@ -7542,7 +7586,6 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
 struct bpf_link *
 bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
 {
-	const struct bpf_sec_def *sec_def;
 	enum bpf_attach_type attach_type;
 	char errmsg[STRERR_BUFSIZE];
 	struct bpf_link *link;
@@ -7561,11 +7604,6 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
 	link->detach = &bpf_link__detach_fd;
 
 	attach_type = bpf_program__get_expected_attach_type(prog);
-	if (!attach_type) {
-		sec_def = find_sec_def(bpf_program__title(prog, false));
-		if (sec_def)
-			attach_type = sec_def->attach_type;
-	}
 	link_fd = bpf_link_create(prog_fd, cgroup_fd, attach_type, NULL);
 	if (link_fd < 0) {
 		link_fd = -errno;
diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
index 9d9351dc2ded..713167449c98 100644
--- a/tools/testing/selftests/bpf/prog_tests/section_names.c
+++ b/tools/testing/selftests/bpf/prog_tests/section_names.c
@@ -43,18 +43,18 @@ static struct sec_name_test tests[] = {
 	{"lwt_seg6local", {0, BPF_PROG_TYPE_LWT_SEG6LOCAL, 0}, {-EINVAL, 0} },
 	{
 		"cgroup_skb/ingress",
-		{0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+		{0, BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_INGRESS},
 		{0, BPF_CGROUP_INET_INGRESS},
 	},
 	{
 		"cgroup_skb/egress",
-		{0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+		{0, BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_EGRESS},
 		{0, BPF_CGROUP_INET_EGRESS},
 	},
 	{"cgroup/skb", {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, {-EINVAL, 0} },
 	{
 		"cgroup/sock",
-		{0, BPF_PROG_TYPE_CGROUP_SOCK, 0},
+		{0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE},
 		{0, BPF_CGROUP_INET_SOCK_CREATE},
 	},
 	{
@@ -69,26 +69,38 @@ static struct sec_name_test tests[] = {
 	},
 	{
 		"cgroup/dev",
-		{0, BPF_PROG_TYPE_CGROUP_DEVICE, 0},
+		{0, BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE},
 		{0, BPF_CGROUP_DEVICE},
 	},
-	{"sockops", {0, BPF_PROG_TYPE_SOCK_OPS, 0}, {0, BPF_CGROUP_SOCK_OPS} },
+	{
+		"sockops",
+		{0, BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS},
+		{0, BPF_CGROUP_SOCK_OPS},
+	},
 	{
 		"sk_skb/stream_parser",
-		{0, BPF_PROG_TYPE_SK_SKB, 0},
+		{0, BPF_PROG_TYPE_SK_SKB, BPF_SK_SKB_STREAM_PARSER},
 		{0, BPF_SK_SKB_STREAM_PARSER},
 	},
 	{
 		"sk_skb/stream_verdict",
-		{0, BPF_PROG_TYPE_SK_SKB, 0},
+		{0, BPF_PROG_TYPE_SK_SKB, BPF_SK_SKB_STREAM_VERDICT},
 		{0, BPF_SK_SKB_STREAM_VERDICT},
 	},
 	{"sk_skb", {0, BPF_PROG_TYPE_SK_SKB, 0}, {-EINVAL, 0} },
-	{"sk_msg", {0, BPF_PROG_TYPE_SK_MSG, 0}, {0, BPF_SK_MSG_VERDICT} },
-	{"lirc_mode2", {0, BPF_PROG_TYPE_LIRC_MODE2, 0}, {0, BPF_LIRC_MODE2} },
+	{
+		"sk_msg",
+		{0, BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT},
+		{0, BPF_SK_MSG_VERDICT},
+	},
+	{
+		"lirc_mode2",
+		{0, BPF_PROG_TYPE_LIRC_MODE2, BPF_LIRC_MODE2},
+		{0, BPF_LIRC_MODE2},
+	},
 	{
 		"flow_dissector",
-		{0, BPF_PROG_TYPE_FLOW_DISSECTOR, 0},
+		{0, BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_FLOW_DISSECTOR},
 		{0, BPF_FLOW_DISSECTOR},
 	},
 	{
@@ -158,17 +170,17 @@ static void test_prog_type_by_name(const struct sec_name_test *test)
 				      &expected_attach_type);
 
 	CHECK(rc != test->expected_load.rc, "check_code",
-	      "prog: unexpected rc=%d for %s", rc, test->sec_name);
+	      "prog: unexpected rc=%d for %s\n", rc, test->sec_name);
 
 	if (rc)
 		return;
 
 	CHECK(prog_type != test->expected_load.prog_type, "check_prog_type",
-	      "prog: unexpected prog_type=%d for %s",
+	      "prog: unexpected prog_type=%d for %s\n",
 	      prog_type, test->sec_name);
 
 	CHECK(expected_attach_type != test->expected_load.expected_attach_type,
-	      "check_attach_type", "prog: unexpected expected_attach_type=%d for %s",
+	      "check_attach_type", "prog: unexpected expected_attach_type=%d for %s\n",
 	      expected_attach_type, test->sec_name);
 }
 
@@ -180,13 +192,13 @@ static void test_attach_type_by_name(const struct sec_name_test *test)
 	rc = libbpf_attach_type_by_name(test->sec_name, &attach_type);
 
 	CHECK(rc != test->expected_attach.rc, "check_ret",
-	      "attach: unexpected rc=%d for %s", rc, test->sec_name);
+	      "attach: unexpected rc=%d for %s\n", rc, test->sec_name);
 
 	if (rc)
 		return;
 
 	CHECK(attach_type != test->expected_attach.attach_type,
-	      "check_attach_type", "attach: unexpected attach_type=%d for %s",
+	      "check_attach_type", "attach: unexpected attach_type=%d for %s\n",
 	      attach_type, test->sec_name);
 }
 
-- 
cgit v1.2.3


From 49b452c382da2c2d1ccee1265cbb92da905c82f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Tue, 14 Apr 2020 16:50:24 +0200
Subject: libbpf: Fix type of old_fd in bpf_xdp_set_link_opts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 'old_fd' parameter used for atomic replacement of XDP programs is
supposed to be an FD, but was left as a u32 from an earlier iteration of
the patch that added it. It was converted to an int when read, so things
worked correctly even with negative values, but better change the
definition to correctly reflect the intention.

Fixes: bd5ca3ef93cd ("libbpf: Add function to set link XDP fd while specifying old program")
Reported-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: David Ahern <dsahern@gmail.com>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200414145025.182163-1-toke@redhat.com
---
 tools/lib/bpf/libbpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 44df1d3e7287..f1dacecb1619 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -458,7 +458,7 @@ struct xdp_link_info {
 
 struct bpf_xdp_set_link_opts {
 	size_t sz;
-	__u32 old_fd;
+	int old_fd;
 };
 #define bpf_xdp_set_link_opts__last_field old_fd
 
-- 
cgit v1.2.3


From c6c111523d9e697bfb463870759825be5d6caff6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Tue, 14 Apr 2020 16:50:25 +0200
Subject: selftests/bpf: Check for correct program attach/detach in xdp_attach
 test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

David Ahern noticed that there was a bug in the EXPECTED_FD code so
programs did not get detached properly when that parameter was supplied.
This case was not included in the xdp_attach tests; so let's add it to be
sure that such a bug does not sneak back in down.

Fixes: 87854a0b57b3 ("selftests/bpf: Add tests for attaching XDP programs")
Reported-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200414145025.182163-2-toke@redhat.com
---
 .../testing/selftests/bpf/prog_tests/xdp_attach.c  | 30 +++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index 05b294d6b923..15ef3531483e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -6,19 +6,34 @@
 
 void test_xdp_attach(void)
 {
+	__u32 duration = 0, id1, id2, id0 = 0, len;
 	struct bpf_object *obj1, *obj2, *obj3;
 	const char *file = "./test_xdp.o";
+	struct bpf_prog_info info = {};
 	int err, fd1, fd2, fd3;
-	__u32 duration = 0;
 	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts,
 			    .old_fd = -1);
 
+	len = sizeof(info);
+
 	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1);
 	if (CHECK_FAIL(err))
 		return;
+	err = bpf_obj_get_info_by_fd(fd1, &info, &len);
+	if (CHECK_FAIL(err))
+		goto out_1;
+	id1 = info.id;
+
 	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2);
 	if (CHECK_FAIL(err))
 		goto out_1;
+
+	memset(&info, 0, sizeof(info));
+	err = bpf_obj_get_info_by_fd(fd2, &info, &len);
+	if (CHECK_FAIL(err))
+		goto out_2;
+	id2 = info.id;
+
 	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3);
 	if (CHECK_FAIL(err))
 		goto out_2;
@@ -28,6 +43,11 @@ void test_xdp_attach(void)
 	if (CHECK(err, "load_ok", "initial load failed"))
 		goto out_close;
 
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	if (CHECK(err || id0 != id1, "id1_check",
+		  "loaded prog id %u != id1 %u, err %d", id0, id1, err))
+		goto out_close;
+
 	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE,
 				       &opts);
 	if (CHECK(!err, "load_fail", "load with expected id didn't fail"))
@@ -37,6 +57,10 @@ void test_xdp_attach(void)
 	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts);
 	if (CHECK(err, "replace_ok", "replace valid old_fd failed"))
 		goto out;
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	if (CHECK(err || id0 != id2, "id2_check",
+		  "loaded prog id %u != id2 %u, err %d", id0, id2, err))
+		goto out_close;
 
 	err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts);
 	if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail"))
@@ -51,6 +75,10 @@ void test_xdp_attach(void)
 	if (CHECK(err, "remove_ok", "remove valid old_fd failed"))
 		goto out;
 
+	err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+	if (CHECK(err || id0 != 0, "unload_check",
+		  "loaded prog id %u != 0, err %d", id0, err))
+		goto out_close;
 out:
 	bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
 out_close:
-- 
cgit v1.2.3