From cf28f3bbfca097d956f9021cb710dfad56adcc62 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 17 Aug 2020 10:42:14 -0700 Subject: bpf: Use get_file_rcu() instead of get_file() for task_file iterator With latest `bpftool prog` command, we observed the following kernel panic. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page PGD dfe894067 P4D dfe894067 PUD deb663067 PMD 0 Oops: 0010 [#1] SMP CPU: 9 PID: 6023 ... RIP: 0010:0x0 Code: Bad RIP value. RSP: 0000:ffffc900002b8f18 EFLAGS: 00010286 RAX: ffff8883a405f400 RBX: ffff888e46a6bf00 RCX: 000000008020000c RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8883a405f400 RBP: ffff888e46a6bf50 R08: 0000000000000000 R09: ffffffff81129600 R10: ffff8883a405f300 R11: 0000160000000000 R12: 0000000000002710 R13: 000000e9494b690c R14: 0000000000000202 R15: 0000000000000009 FS: 00007fd9187fe700(0000) GS:ffff888e46a40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 0000000de5d33002 CR4: 0000000000360ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: rcu_core+0x1a4/0x440 __do_softirq+0xd3/0x2c8 irq_exit+0x9d/0xa0 smp_apic_timer_interrupt+0x68/0x120 apic_timer_interrupt+0xf/0x20 RIP: 0033:0x47ce80 Code: Bad RIP value. RSP: 002b:00007fd9187fba40 EFLAGS: 00000206 ORIG_RAX: ffffffffffffff13 RAX: 0000000000000002 RBX: 00007fd931789160 RCX: 000000000000010c RDX: 00007fd9308cdfb4 RSI: 00007fd9308cdfb4 RDI: 00007ffedd1ea0a8 RBP: 00007fd9187fbab0 R08: 000000000000000e R09: 000000000000002a R10: 0000000000480210 R11: 00007fd9187fc570 R12: 00007fd9316cc400 R13: 0000000000000118 R14: 00007fd9308cdfb4 R15: 00007fd9317a9380 After further analysis, the bug is triggered by Commit eaaacd23910f ("bpf: Add task and task/file iterator targets") which introduced task_file bpf iterator, which traverses all open file descriptors for all tasks in the current namespace. The latest `bpftool prog` calls a task_file bpf program to traverse all files in the system in order to associate processes with progs/maps, etc. When traversing files for a given task, rcu read_lock is taken to access all files in a file_struct. But it used get_file() to grab a file, which is not right. It is possible file->f_count is 0 and get_file() will unconditionally increase it. Later put_file() may cause all kind of issues with the above as one of sympotoms. The failure can be reproduced with the following steps in a few seconds: $ cat t.c #include #include #include #include #include #define N 10000 int fd[N]; int main() { int i; for (i = 0; i < N; i++) { fd[i] = open("./note.txt", 'r'); if (fd[i] < 0) { fprintf(stderr, "failed\n"); return -1; } } for (i = 0; i < N; i++) close(fd[i]); return 0; } $ gcc -O2 t.c $ cat run.sh #/bin/bash for i in {1..100} do while true; do ./a.out; done & done $ ./run.sh $ while true; do bpftool prog >& /dev/null; done This patch used get_file_rcu() which only grabs a file if the file->f_count is not zero. This is to ensure the file pointer is always valid. The above reproducer did not fail for more than 30 minutes. Fixes: eaaacd23910f ("bpf: Add task and task/file iterator targets") Suggested-by: Josef Bacik Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Reviewed-by: Josef Bacik Link: https://lore.kernel.org/bpf/20200817174214.252601-1-yhs@fb.com --- kernel/bpf/task_iter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 232df29793e9..f21b5e1e4540 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -178,10 +178,11 @@ again: f = fcheck_files(curr_files, curr_fd); if (!f) continue; + if (!get_file_rcu(f)) + continue; /* set info->fd */ info->fd = curr_fd; - get_file(f); rcu_read_unlock(); return f; } -- cgit v1.2.3 From 3fb1a96a91120877488071a167d26d76be4be977 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 18 Aug 2020 09:44:56 -0700 Subject: libbpf: Fix build on ppc64le architecture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ppc64le we get the following warning: In file included from btf_dump.c:16:0: btf_dump.c: In function ‘btf_dump_emit_struct_def’: ../include/linux/kernel.h:20:17: error: comparison of distinct pointer types lacks a cast [-Werror] (void) (&_max1 == &_max2); \ ^ btf_dump.c:882:11: note: in expansion of macro ‘max’ m_sz = max(0LL, btf__resolve_size(d->btf, m->type)); ^~~ Fix by explicitly casting to __s64, which is a return type from btf__resolve_size(). Fixes: 702eddc77a90 ("libbpf: Handle GCC built-in types for Arm NEON") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200818164456.1181661-1-andriin@fb.com --- tools/lib/bpf/btf_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index fe39bd774697..57c00fa63932 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -879,7 +879,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, btf_dump_printf(d, ": %d", m_sz); off = m_off + m_sz; } else { - m_sz = max(0LL, btf__resolve_size(d->btf, m->type)); + m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type)); off = m_off + m_sz * 8; } btf_dump_printf(d, ";"); -- cgit v1.2.3 From e679654a704e5bd676ea6446fa7b764cbabf168a Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 18 Aug 2020 15:23:09 -0700 Subject: bpf: Fix a rcu_sched stall issue with bpf task/task_file iterator In our production system, we observed rcu stalls when 'bpftool prog` is running. rcu: INFO: rcu_sched self-detected stall on CPU rcu: \x097-....: (20999 ticks this GP) idle=302/1/0x4000000000000000 softirq=1508852/1508852 fqs=4913 \x09(t=21031 jiffies g=2534773 q=179750) NMI backtrace for cpu 7 CPU: 7 PID: 184195 Comm: bpftool Kdump: loaded Tainted: G W 5.8.0-00004-g68bfc7f8c1b4 #6 Hardware name: Quanta Twin Lakes MP/Twin Lakes Passive MP, BIOS F09_3A17 05/03/2019 Call Trace: dump_stack+0x57/0x70 nmi_cpu_backtrace.cold+0x14/0x53 ? lapic_can_unplug_cpu.cold+0x39/0x39 nmi_trigger_cpumask_backtrace+0xb7/0xc7 rcu_dump_cpu_stacks+0xa2/0xd0 rcu_sched_clock_irq.cold+0x1ff/0x3d9 ? tick_nohz_handler+0x100/0x100 update_process_times+0x5b/0x90 tick_sched_timer+0x5e/0xf0 __hrtimer_run_queues+0x12a/0x2a0 hrtimer_interrupt+0x10e/0x280 __sysvec_apic_timer_interrupt+0x51/0xe0 asm_call_on_stack+0xf/0x20 sysvec_apic_timer_interrupt+0x6f/0x80 asm_sysvec_apic_timer_interrupt+0x12/0x20 RIP: 0010:task_file_seq_get_next+0x71/0x220 Code: 00 00 8b 53 1c 49 8b 7d 00 89 d6 48 8b 47 20 44 8b 18 41 39 d3 76 75 48 8b 4f 20 8b 01 39 d0 76 61 41 89 d1 49 39 c1 48 19 c0 <48> 8b 49 08 21 d0 48 8d 04 c1 4c 8b 08 4d 85 c9 74 46 49 8b 41 38 RSP: 0018:ffffc90006223e10 EFLAGS: 00000297 RAX: ffffffffffffffff RBX: ffff888f0d172388 RCX: ffff888c8c07c1c0 RDX: 00000000000f017b RSI: 00000000000f017b RDI: ffff888c254702c0 RBP: ffffc90006223e68 R08: ffff888be2a1c140 R09: 00000000000f017b R10: 0000000000000002 R11: 0000000000100000 R12: ffff888f23c24118 R13: ffffc90006223e60 R14: ffffffff828509a0 R15: 00000000ffffffff task_file_seq_next+0x52/0xa0 bpf_seq_read+0xb9/0x320 vfs_read+0x9d/0x180 ksys_read+0x5f/0xe0 do_syscall_64+0x38/0x60 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f8815f4f76e Code: c0 e9 f6 fe ff ff 55 48 8d 3d 76 70 0a 00 48 89 e5 e8 36 06 02 00 66 0f 1f 44 00 00 64 8b 04 25 18 00 00 00 85 c0 75 14 0f 05 <48> 3d 00 f0 ff ff 77 52 c3 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 RSP: 002b:00007fff8f9df578 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 000000000170b9c0 RCX: 00007f8815f4f76e RDX: 0000000000001000 RSI: 00007fff8f9df5b0 RDI: 0000000000000007 RBP: 00007fff8f9e05f0 R08: 0000000000000049 R09: 0000000000000010 R10: 00007f881601fa40 R11: 0000000000000246 R12: 00007fff8f9e05a8 R13: 00007fff8f9e05a8 R14: 0000000001917f90 R15: 000000000000e22e Note that `bpftool prog` actually calls a task_file bpf iterator program to establish an association between prog/map/link/btf anon files and processes. In the case where the above rcu stall occured, we had a process having 1587 tasks and each task having roughly 81305 files. This implied 129 million bpf prog invocations. Unfortunwtely none of these files are prog/map/link/btf files so bpf iterator/prog needs to traverse all these files and not able to return to user space since there are no seq_file buffer overflow. This patch fixed the issue in bpf_seq_read() to limit the number of visited objects. If the maximum number of visited objects is reached, no more objects will be visited in the current syscall. If there is nothing written in the seq_file buffer, -EAGAIN will return to the user so user can try again. The maximum number of visited objects is set at 1 million. In our Intel Xeon D-2191 2.3GHZ 18-core server, bpf_seq_read() visiting 1 million files takes around 0.18 seconds. We did not use cond_resched() since for some iterators, e.g., netlink iterator, where rcu read_lock critical section spans between consecutive seq_ops->next(), which makes impossible to do cond_resched() in the key while loop of function bpf_seq_read(). Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Cc: Paul E. McKenney Link: https://lore.kernel.org/bpf/20200818222309.2181348-1-yhs@fb.com --- kernel/bpf/bpf_iter.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index b6715964b685..8faa2ce89396 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -67,6 +67,9 @@ static void bpf_iter_done_stop(struct seq_file *seq) iter_priv->done_stop = true; } +/* maximum visited objects before bailing out */ +#define MAX_ITER_OBJECTS 1000000 + /* bpf_seq_read, a customized and simpler version for bpf iterator. * no_llseek is assumed for this file. * The following are differences from seq_read(): @@ -79,7 +82,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, { struct seq_file *seq = file->private_data; size_t n, offs, copied = 0; - int err = 0; + int err = 0, num_objs = 0; void *p; mutex_lock(&seq->lock); @@ -135,6 +138,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, while (1) { loff_t pos = seq->index; + num_objs++; offs = seq->count; p = seq->op->next(seq, p, &seq->index); if (pos == seq->index) { @@ -153,6 +157,15 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, if (seq->count >= size) break; + if (num_objs >= MAX_ITER_OBJECTS) { + if (offs == 0) { + err = -EAGAIN; + seq->op->stop(seq, p); + goto done; + } + break; + } + err = seq->op->show(seq, p); if (err > 0) { bpf_iter_dec_seq_num(seq); -- cgit v1.2.3 From e60572b8d4c39572be6857d1ec91fdf979f8775f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 18 Aug 2020 15:23:10 -0700 Subject: bpf: Avoid visit same object multiple times Currently when traversing all tasks, the next tid is always increased by one. This may result in visiting the same task multiple times in a pid namespace. This patch fixed the issue by seting the next tid as pid_nr_ns(pid, ns) + 1, similar to funciton next_tgid(). Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Cc: Rik van Riel Link: https://lore.kernel.org/bpf/20200818222310.2181500-1-yhs@fb.com --- kernel/bpf/task_iter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index f21b5e1e4540..99af4cea1102 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -29,8 +29,9 @@ static struct task_struct *task_seq_get_next(struct pid_namespace *ns, rcu_read_lock(); retry: - pid = idr_get_next(&ns->idr, tid); + pid = find_ge_pid(*tid, ns); if (pid) { + *tid = pid_nr_ns(pid, ns); task = get_pid_task(pid, PIDTYPE_PID); if (!task) { ++*tid; -- cgit v1.2.3 From 00fa1d83a8b50351c830521d00135e823c46e7d0 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 18 Aug 2020 15:23:12 -0700 Subject: bpftool: Handle EAGAIN error code properly in pids collection When the error code is EAGAIN, the kernel signals the user space should retry the read() operation for bpf iterators. Let us do it. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200818222312.2181675-1-yhs@fb.com --- tools/bpf/bpftool/pids.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index e3b116325403..df7d8ec76036 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -134,6 +134,8 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) while (true) { ret = read(fd, buf, sizeof(buf)); if (ret < 0) { + if (errno == EAGAIN) + continue; err = -errno; p_err("failed to read PID iterator output: %d", err); goto out; -- cgit v1.2.3 From 1e891e513e16c145cc9b45b1fdb8bf4a4f2f9557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 19 Aug 2020 13:05:34 +0200 Subject: libbpf: Fix map index used in error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The error message emitted by bpf_object__init_user_btf_maps() was using the wrong section ID. Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200819110534.9058-1-toke@redhat.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5d20b2da4427..0ad0b0491e1f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2264,7 +2264,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, data = elf_getdata(scn, NULL); if (!scn || !data) { pr_warn("failed to get Elf_Data from map section %d (%s)\n", - obj->efile.maps_shndx, MAPS_ELF_SEC); + obj->efile.btf_maps_shndx, MAPS_ELF_SEC); return -EINVAL; } -- cgit v1.2.3 From 51f6463aacfbfd322bcaadc606da56acef644b05 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 19 Aug 2020 11:23:42 +0200 Subject: tools/resolve_btfids: Fix sections with wrong alignment The data of compressed section should be aligned to 4 (for 32bit) or 8 (for 64 bit) bytes. The binutils ld sets sh_addralign to 1, which makes libelf fail with misaligned section error during the update as reported by Jesper: FAILED elf_update(WRITE): invalid section alignment While waiting for ld fix, we can fix compressed sections sh_addralign value manually. Adding warning in -vv mode when the fix is triggered: $ ./tools/bpf/resolve_btfids/resolve_btfids -vv vmlinux ... section(36) .comment, size 44, link 0, flags 30, type=1 section(37) .debug_aranges, size 45684, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 16, expected 8 section(38) .debug_info, size 129104957, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 1, expected 8 section(39) .debug_abbrev, size 1152583, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 1, expected 8 section(40) .debug_line, size 7374522, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 1, expected 8 section(41) .debug_frame, size 702463, link 0, flags 800, type=1 section(42) .debug_str, size 1017571, link 0, flags 830, type=1 - fixing wrong alignment sh_addralign 1, expected 8 section(43) .debug_loc, size 3019453, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 1, expected 8 section(44) .debug_ranges, size 1744583, link 0, flags 800, type=1 - fixing wrong alignment sh_addralign 16, expected 8 section(45) .symtab, size 2955888, link 46, flags 0, type=2 section(46) .strtab, size 2613072, link 0, flags 0, type=3 ... update ok for vmlinux Another workaround is to disable compressed debug info data CONFIG_DEBUG_INFO_COMPRESSED kernel option. Fixes: fbbb68de80a4 ("bpf: Add resolve_btfids tool to resolve BTF IDs in ELF object") Reported-by: Jesper Dangaard Brouer Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Acked-by: Yonghong Song Cc: Mark Wielaard Cc: Nick Clifton Link: https://lore.kernel.org/bpf/20200819092342.259004-1-jolsa@kernel.org --- tools/bpf/resolve_btfids/main.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 4d9ecb975862..0def0bb1f783 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -233,6 +233,39 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size) return btf_id__add(root, id, false); } +/* + * The data of compressed section should be aligned to 4 + * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld + * sets sh_addralign to 1, which makes libelf fail with + * misaligned section error during the update: + * FAILED elf_update(WRITE): invalid section alignment + * + * While waiting for ld fix, we fix the compressed sections + * sh_addralign value manualy. + */ +static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh) +{ + int expected = gelf_getclass(elf) == ELFCLASS32 ? 4 : 8; + + if (!(sh->sh_flags & SHF_COMPRESSED)) + return 0; + + if (sh->sh_addralign == expected) + return 0; + + pr_debug2(" - fixing wrong alignment sh_addralign %u, expected %u\n", + sh->sh_addralign, expected); + + sh->sh_addralign = expected; + + if (gelf_update_shdr(scn, sh) == 0) { + printf("FAILED cannot update section header: %s\n", + elf_errmsg(-1)); + return -1; + } + return 0; +} + static int elf_collect(struct object *obj) { Elf_Scn *scn = NULL; @@ -309,6 +342,9 @@ static int elf_collect(struct object *obj) obj->efile.idlist_shndx = idx; obj->efile.idlist_addr = sh.sh_addr; } + + if (compressed_section_fix(elf, scn, &sh)) + return -1; } return 0; -- cgit v1.2.3 From 5597432dde62befd3ab92e6ef9e073564e277ea8 Mon Sep 17 00:00:00 2001 From: Veronika Kabatova Date: Wed, 19 Aug 2020 18:07:10 +0200 Subject: selftests/bpf: Remove test_align leftovers Calling generic selftests "make install" fails as rsync expects all files from TEST_GEN_PROGS to be present. The binary is not generated anymore (commit 3b09d27cc93d) so we can safely remove it from there and also from gitignore. Fixes: 3b09d27cc93d ("selftests/bpf: Move test_align under test_progs") Signed-off-by: Veronika Kabatova Signed-off-by: Alexei Starovoitov Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/bpf/20200819160710.1345956-1-vkabatov@redhat.com --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 1bb204cee853..9a0946ddb705 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -6,7 +6,6 @@ test_lpm_map test_tag FEATURE-DUMP.libbpf fixdep -test_align test_dev_cgroup /test_progs* test_tcpbpf_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index a83b5827532f..fc946b7ac288 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -32,7 +32,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ + test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \ -- cgit v1.2.3 From c8a36f1945b2b1b3f9823b66fc2181dc069cf803 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 19 Aug 2020 22:28:41 -0700 Subject: bpf: xdp: Fix XDP mode when no mode flags specified 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") inadvertently changed which XDP mode is assumed when no mode flags are specified explicitly. Previously, driver mode was preferred, if driver supported it. If not, generic SKB mode was chosen. That commit changed default to SKB mode always. This patch fixes the issue and restores the original logic. Fixes: 7f0a838254bd ("bpf, xdp: Maintain info on attached XDP BPF programs in net_device") Reported-by: Lorenzo Bianconi Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Tested-by: Lorenzo Bianconi Link: https://lore.kernel.org/bpf/20200820052841.1559757-1-andriin@fb.com --- net/core/dev.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index b5d1129d8310..d42c9ea0c3c0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8742,13 +8742,15 @@ struct bpf_xdp_link { int flags; }; -static enum bpf_xdp_mode dev_xdp_mode(u32 flags) +static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags) { if (flags & XDP_FLAGS_HW_MODE) return XDP_MODE_HW; if (flags & XDP_FLAGS_DRV_MODE) return XDP_MODE_DRV; - return XDP_MODE_SKB; + if (flags & XDP_FLAGS_SKB_MODE) + return XDP_MODE_SKB; + return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB; } static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode) @@ -8896,7 +8898,7 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack return -EINVAL; } - mode = dev_xdp_mode(flags); + mode = dev_xdp_mode(dev, flags); /* can't replace attached link */ if (dev_xdp_link(dev, mode)) { NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link"); @@ -8984,7 +8986,7 @@ static int dev_xdp_detach_link(struct net_device *dev, ASSERT_RTNL(); - mode = dev_xdp_mode(link->flags); + mode = dev_xdp_mode(dev, link->flags); if (dev_xdp_link(dev, mode) != link) return -EINVAL; @@ -9080,7 +9082,7 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, goto out_unlock; } - mode = dev_xdp_mode(xdp_link->flags); + mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags); bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode); err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL, xdp_link->flags, new_prog); @@ -9164,7 +9166,7 @@ out_put_dev: int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags) { - enum bpf_xdp_mode mode = dev_xdp_mode(flags); + enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags); struct bpf_prog *new_prog = NULL, *old_prog = NULL; int err; -- cgit v1.2.3 From c210773d6c6f595f5922d56b7391fe343bc7310e Mon Sep 17 00:00:00 2001 From: Yauheni Kaliuta Date: Thu, 20 Aug 2020 14:58:43 +0300 Subject: bpf: selftests: global_funcs: Check err_str before strstr The error path in libbpf.c:load_program() has calls to pr_warn() which ends up for global_funcs tests to test_global_funcs.c:libbpf_debug_print(). For the tests with no struct test_def::err_str initialized with a string, it causes call of strstr() with NULL as the second argument and it segfaults. Fix it by calling strstr() only for non-NULL err_str. Signed-off-by: Yauheni Kaliuta Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200820115843.39454-1-yauheni.kaliuta@redhat.com --- tools/testing/selftests/bpf/prog_tests/test_global_funcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index 25b068591e9a..193002b14d7f 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -19,7 +19,7 @@ static int libbpf_debug_print(enum libbpf_print_level level, log_buf = va_arg(args, char *); if (!log_buf) goto out; - if (strstr(log_buf, err_str) == 0) + if (err_str && strstr(log_buf, err_str) == 0) found = true; out: printf(format, log_buf); -- cgit v1.2.3 From b16fc097bc283184cde40e5b30d15705e1590410 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 21 Aug 2020 15:36:42 +0200 Subject: bpf: Fix two typos in uapi/linux/bpf.h Also remove trailing whitespaces in bpf_skb_get_tunnel_key example code. Signed-off-by: Tobias Klauser Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200821133642.18870-1-tklauser@distanz.ch --- include/uapi/linux/bpf.h | 10 +++++----- tools/include/uapi/linux/bpf.h | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0480f893facd..b6238b2209b7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -767,7 +767,7 @@ union bpf_attr { * * Also, note that **bpf_trace_printk**\ () is slow, and should * only be used for debugging purposes. For this reason, a notice - * bloc (spanning several lines) is printed to kernel logs and + * block (spanning several lines) is printed to kernel logs and * states that the helper should not be used "for production use" * the first time this helper is used (or more precisely, when * **trace_printk**\ () buffers are allocated). For passing values @@ -1033,14 +1033,14 @@ union bpf_attr { * * int ret; * struct bpf_tunnel_key key = {}; - * + * * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); * if (ret < 0) * return TC_ACT_SHOT; // drop packet - * + * * if (key.remote_ipv4 != 0x0a000001) * return TC_ACT_SHOT; // drop packet - * + * * return TC_ACT_OK; // accept packet * * This interface can also be used with all encapsulation devices @@ -1147,7 +1147,7 @@ union bpf_attr { * Description * Retrieve the realm or the route, that is to say the * **tclassid** field of the destination for the *skb*. The - * indentifier retrieved is a user-provided tag, similar to the + * identifier retrieved is a user-provided tag, similar to the * one used with the net_cls cgroup (see description for * **bpf_get_cgroup_classid**\ () helper), but here this tag is * held by a route (a destination entry), not by a task. diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0480f893facd..b6238b2209b7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -767,7 +767,7 @@ union bpf_attr { * * Also, note that **bpf_trace_printk**\ () is slow, and should * only be used for debugging purposes. For this reason, a notice - * bloc (spanning several lines) is printed to kernel logs and + * block (spanning several lines) is printed to kernel logs and * states that the helper should not be used "for production use" * the first time this helper is used (or more precisely, when * **trace_printk**\ () buffers are allocated). For passing values @@ -1033,14 +1033,14 @@ union bpf_attr { * * int ret; * struct bpf_tunnel_key key = {}; - * + * * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); * if (ret < 0) * return TC_ACT_SHOT; // drop packet - * + * * if (key.remote_ipv4 != 0x0a000001) * return TC_ACT_SHOT; // drop packet - * + * * return TC_ACT_OK; // accept packet * * This interface can also be used with all encapsulation devices @@ -1147,7 +1147,7 @@ union bpf_attr { * Description * Retrieve the realm or the route, that is to say the * **tclassid** field of the destination for the *skb*. The - * indentifier retrieved is a user-provided tag, similar to the + * identifier retrieved is a user-provided tag, similar to the * one used with the net_cls cgroup (see description for * **bpf_get_cgroup_classid**\ () helper), but here this tag is * held by a route (a destination entry), not by a task. -- cgit v1.2.3