diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-31 17:29:33 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-31 17:29:33 -0700 |
commit | 29d9f30d4ce6c7a38745a54a8cddface10013490 (patch) | |
tree | 85649ba6a7b39203584d8db9365e03f64e62c136 /arch/x86 | |
parent | 56a451b780676bc1cdac011735fe2869fa2e9abf (diff) | |
parent | 7f80ccfe996871ca69648efee74a60ae7ad0dcd9 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller:
"Highlights:
1) Fix the iwlwifi regression, from Johannes Berg.
2) Support BSS coloring and 802.11 encapsulation offloading in
hardware, from John Crispin.
3) Fix some potential Spectre issues in qtnfmac, from Sergey
Matyukevich.
4) Add TTL decrement action to openvswitch, from Matteo Croce.
5) Allow paralleization through flow_action setup by not taking the
RTNL mutex, from Vlad Buslov.
6) A lot of zero-length array to flexible-array conversions, from
Gustavo A. R. Silva.
7) Align XDP statistics names across several drivers for consistency,
from Lorenzo Bianconi.
8) Add various pieces of infrastructure for offloading conntrack, and
make use of it in mlx5 driver, from Paul Blakey.
9) Allow using listening sockets in BPF sockmap, from Jakub Sitnicki.
10) Lots of parallelization improvements during configuration changes
in mlxsw driver, from Ido Schimmel.
11) Add support to devlink for generic packet traps, which report
packets dropped during ACL processing. And use them in mlxsw
driver. From Jiri Pirko.
12) Support bcmgenet on ACPI, from Jeremy Linton.
13) Make BPF compatible with RT, from Thomas Gleixnet, Alexei
Starovoitov, and your's truly.
14) Support XDP meta-data in virtio_net, from Yuya Kusakabe.
15) Fix sysfs permissions when network devices change namespaces, from
Christian Brauner.
16) Add a flags element to ethtool_ops so that drivers can more simply
indicate which coalescing parameters they actually support, and
therefore the generic layer can validate the user's ethtool
request. Use this in all drivers, from Jakub Kicinski.
17) Offload FIFO qdisc in mlxsw, from Petr Machata.
18) Support UDP sockets in sockmap, from Lorenz Bauer.
19) Fix stretch ACK bugs in several TCP congestion control modules,
from Pengcheng Yang.
20) Support virtual functiosn in octeontx2 driver, from Tomasz
Duszynski.
21) Add region operations for devlink and use it in ice driver to dump
NVM contents, from Jacob Keller.
22) Add support for hw offload of MACSEC, from Antoine Tenart.
23) Add support for BPF programs that can be attached to LSM hooks,
from KP Singh.
24) Support for multiple paths, path managers, and counters in MPTCP.
From Peter Krystad, Paolo Abeni, Florian Westphal, Davide Caratti,
and others.
25) More progress on adding the netlink interface to ethtool, from
Michal Kubecek"
* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2121 commits)
net: ipv6: rpl_iptunnel: Fix potential memory leak in rpl_do_srh_inline
cxgb4/chcr: nic-tls stats in ethtool
net: dsa: fix oops while probing Marvell DSA switches
net/bpfilter: remove superfluous testing message
net: macb: Fix handling of fixed-link node
net: dsa: ksz: Select KSZ protocol tag
netdevsim: dev: Fix memory leak in nsim_dev_take_snapshot_write
net: stmmac: add EHL 2.5Gbps PCI info and PCI ID
net: stmmac: add EHL PSE0 & PSE1 1Gbps PCI info and PCI ID
net: stmmac: create dwmac-intel.c to contain all Intel platform
net: dsa: bcm_sf2: Support specifying VLAN tag egress rule
net: dsa: bcm_sf2: Add support for matching VLAN TCI
net: dsa: bcm_sf2: Move writing of CFP_DATA(5) into slicing functions
net: dsa: bcm_sf2: Check earlier for FLOW_EXT and FLOW_MAC_EXT
net: dsa: bcm_sf2: Disable learning for ASP port
net: dsa: b53: Deny enslaving port 7 for 7278 into a bridge
net: dsa: b53: Prevent tagged VLAN on port 7 for 7278
net: dsa: b53: Restore VLAN entries upon (re)configuration
net: dsa: bcm_sf2: Fix overflow checks
hv_netvsc: Remove unnecessary round_up for recv_completion_cnt
...
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/mm/init_32.c | 14 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 260 |
2 files changed, 194 insertions, 80 deletions
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8ae0272c1c51..de73992b8432 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -238,7 +238,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) } } -static inline int is_kernel_text(unsigned long addr) +/* + * The <linux/kallsyms.h> already defines is_kernel_text, + * using '__' prefix not to get in conflict. + */ +static inline int __is_kernel_text(unsigned long addr) { if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) return 1; @@ -328,8 +332,8 @@ repeat: addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; - if (is_kernel_text(addr) || - is_kernel_text(addr2)) + if (__is_kernel_text(addr) || + __is_kernel_text(addr2)) prot = PAGE_KERNEL_LARGE_EXEC; pages_2m++; @@ -354,7 +358,7 @@ repeat: */ pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR); - if (is_kernel_text(addr)) + if (__is_kernel_text(addr)) prot = PAGE_KERNEL_EXEC; pages_4k++; @@ -843,7 +847,7 @@ static void mark_nxdata_nx(void) */ unsigned long start = PFN_ALIGN(_etext); /* - * This comes from is_kernel_text upper limit. Also HPAGE where used: + * This comes from __is_kernel_text upper limit. Also HPAGE where used: */ unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 9ba08e9abc09..5ea7c2cf7ab4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1361,37 +1361,140 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, -(stack_size - i * 8)); } +static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, + struct bpf_prog *p, int stack_size, bool mod_ret) +{ + u8 *prog = *pprog; + int cnt = 0; + + if (emit_call(&prog, __bpf_prog_enter, prog)) + return -EINVAL; + /* remember prog start time returned by __bpf_prog_enter */ + emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); + + /* arg1: lea rdi, [rbp - stack_size] */ + EMIT4(0x48, 0x8D, 0x7D, -stack_size); + /* arg2: progs[i]->insnsi for interpreter */ + if (!p->jited) + emit_mov_imm64(&prog, BPF_REG_2, + (long) p->insnsi >> 32, + (u32) (long) p->insnsi); + /* call JITed bpf program or interpreter */ + if (emit_call(&prog, p->bpf_func, prog)) + return -EINVAL; + + /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return + * of the previous call which is then passed on the stack to + * the next BPF program. + */ + if (mod_ret) + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + + /* arg1: mov rdi, progs[i] */ + emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, + (u32) (long) p); + /* arg2: mov rsi, rbx <- start time in nsec */ + emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); + if (emit_call(&prog, __bpf_prog_exit, prog)) + return -EINVAL; + + *pprog = prog; + return 0; +} + +static void emit_nops(u8 **pprog, unsigned int len) +{ + unsigned int i, noplen; + u8 *prog = *pprog; + int cnt = 0; + + while (len > 0) { + noplen = len; + + if (noplen > ASM_NOP_MAX) + noplen = ASM_NOP_MAX; + + for (i = 0; i < noplen; i++) + EMIT1(ideal_nops[noplen][i]); + len -= noplen; + } + + *pprog = prog; +} + +static void emit_align(u8 **pprog, u32 align) +{ + u8 *target, *prog = *pprog; + + target = PTR_ALIGN(prog, align); + if (target != prog) + emit_nops(&prog, target - prog); + + *pprog = prog; +} + +static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) +{ + u8 *prog = *pprog; + int cnt = 0; + s64 offset; + + offset = func - (ip + 2 + 4); + if (!is_simm32(offset)) { + pr_err("Target %p is out of range\n", func); + return -EINVAL; + } + EMIT2_off32(0x0F, jmp_cond + 0x10, offset); + *pprog = prog; + return 0; +} + static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog **progs, int prog_cnt, int stack_size) + struct bpf_tramp_progs *tp, int stack_size) { + int i; u8 *prog = *pprog; - int cnt = 0, i; - for (i = 0; i < prog_cnt; i++) { - if (emit_call(&prog, __bpf_prog_enter, prog)) - return -EINVAL; - /* remember prog start time returned by __bpf_prog_enter */ - emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); - - /* arg1: lea rdi, [rbp - stack_size] */ - EMIT4(0x48, 0x8D, 0x7D, -stack_size); - /* arg2: progs[i]->insnsi for interpreter */ - if (!progs[i]->jited) - emit_mov_imm64(&prog, BPF_REG_2, - (long) progs[i]->insnsi >> 32, - (u32) (long) progs[i]->insnsi); - /* call JITed bpf program or interpreter */ - if (emit_call(&prog, progs[i]->bpf_func, prog)) + for (i = 0; i < tp->nr_progs; i++) { + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) return -EINVAL; + } + *pprog = prog; + return 0; +} + +static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, + struct bpf_tramp_progs *tp, int stack_size, + u8 **branches) +{ + u8 *prog = *pprog; + int i, cnt = 0; - /* arg1: mov rdi, progs[i] */ - emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32, - (u32) (long) progs[i]); - /* arg2: mov rsi, rbx <- start time in nsec */ - emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); - if (emit_call(&prog, __bpf_prog_exit, prog)) + /* The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + emit_mov_imm32(&prog, false, BPF_REG_0, 0); + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + for (i = 0; i < tp->nr_progs; i++) { + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true)) return -EINVAL; + + /* mod_ret prog stored return value into [rbp - 8]. Emit: + * if (*(u64 *)(rbp - 8) != 0) + * goto do_fexit; + */ + /* cmp QWORD PTR [rbp - 0x8], 0x0 */ + EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00); + + /* Save the location of the branch and Generate 6 nops + * (4 bytes for an offset and 2 bytes for the jump) These nops + * are replaced with a conditional jump once do_fexit (i.e. the + * start of the fexit invocation) is finalized. + */ + branches[i] = prog; + emit_nops(&prog, 4 + 2); } + *pprog = prog; return 0; } @@ -1458,12 +1561,15 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, */ int arch_prepare_bpf_trampoline(void *image, void *image_end, const struct btf_func_model *m, u32 flags, - struct bpf_prog **fentry_progs, int fentry_cnt, - struct bpf_prog **fexit_progs, int fexit_cnt, + struct bpf_tramp_progs *tprogs, void *orig_call) { - int cnt = 0, nr_args = m->nr_args; + int ret, i, cnt = 0, nr_args = m->nr_args; int stack_size = nr_args * 8; + struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; + struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; + struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; + u8 **branches = NULL; u8 *prog; /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ @@ -1492,28 +1598,64 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, save_regs(m, &prog, nr_args, stack_size); - if (fentry_cnt) - if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size)) + if (fentry->nr_progs) + if (invoke_bpf(m, &prog, fentry, stack_size)) return -EINVAL; + if (fmod_ret->nr_progs) { + branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *), + GFP_KERNEL); + if (!branches) + return -ENOMEM; + + if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size, + branches)) { + ret = -EINVAL; + goto cleanup; + } + } + if (flags & BPF_TRAMP_F_CALL_ORIG) { - if (fentry_cnt) + if (fentry->nr_progs || fmod_ret->nr_progs) restore_regs(m, &prog, nr_args, stack_size); /* call original function */ - if (emit_call(&prog, orig_call, prog)) - return -EINVAL; + if (emit_call(&prog, orig_call, prog)) { + ret = -EINVAL; + goto cleanup; + } /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); } - if (fexit_cnt) - if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size)) - return -EINVAL; + if (fmod_ret->nr_progs) { + /* From Intel 64 and IA-32 Architectures Optimization + * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler + * Coding Rule 11: All branch targets should be 16-byte + * aligned. + */ + emit_align(&prog, 16); + /* Update the branches saved in invoke_bpf_mod_ret with the + * aligned address of do_fexit. + */ + for (i = 0; i < fmod_ret->nr_progs; i++) + emit_cond_near_jump(&branches[i], prog, branches[i], + X86_JNE); + } + + if (fexit->nr_progs) + if (invoke_bpf(m, &prog, fexit, stack_size)) { + ret = -EINVAL; + goto cleanup; + } if (flags & BPF_TRAMP_F_RESTORE_REGS) restore_regs(m, &prog, nr_args, stack_size); + /* This needs to be done regardless. If there were fmod_ret programs, + * the return value is only updated on the stack and still needs to be + * restored to R0. + */ if (flags & BPF_TRAMP_F_CALL_ORIG) /* restore original return value back into RAX */ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); @@ -1525,45 +1667,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ EMIT1(0xC3); /* ret */ /* Make sure the trampoline generation logic doesn't overflow */ - if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) - return -EFAULT; - return prog - (u8 *)image; -} - -static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) -{ - u8 *prog = *pprog; - int cnt = 0; - s64 offset; - - offset = func - (ip + 2 + 4); - if (!is_simm32(offset)) { - pr_err("Target %p is out of range\n", func); - return -EINVAL; - } - EMIT2_off32(0x0F, jmp_cond + 0x10, offset); - *pprog = prog; - return 0; -} - -static void emit_nops(u8 **pprog, unsigned int len) -{ - unsigned int i, noplen; - u8 *prog = *pprog; - int cnt = 0; - - while (len > 0) { - noplen = len; - - if (noplen > ASM_NOP_MAX) - noplen = ASM_NOP_MAX; - - for (i = 0; i < noplen; i++) - EMIT1(ideal_nops[noplen][i]); - len -= noplen; + if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { + ret = -EFAULT; + goto cleanup; } + ret = prog - (u8 *)image; - *pprog = prog; +cleanup: + kfree(branches); + return ret; } static int emit_fallback_jump(u8 **pprog) @@ -1588,7 +1700,7 @@ static int emit_fallback_jump(u8 **pprog) static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) { - u8 *jg_reloc, *jg_target, *prog = *pprog; + u8 *jg_reloc, *prog = *pprog; int pivot, err, jg_bytes = 1, cnt = 0; s64 jg_offset; @@ -1643,9 +1755,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) * Coding Rule 11: All branch targets should be 16-byte * aligned. */ - jg_target = PTR_ALIGN(prog, 16); - if (jg_target != prog) - emit_nops(&prog, jg_target - prog); + emit_align(&prog, 16); jg_offset = prog - jg_reloc; emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); |