diff options
author | David S. Miller <davem@davemloft.net> | 2016-05-16 13:49:33 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-05-16 13:49:33 -0400 |
commit | 485b777855ed74dfcde5c46cfc88e2bc1b7c0714 (patch) | |
tree | ec3eef1f80ae2f1d5fdbb9a5c12df5bf06025ad7 | |
parent | 553eb544444e28749e2d752dee11e2ae4a3ecfb6 (diff) | |
parent | d93a47f735f3455a896e46b18d0ac26fa19639e6 (diff) |
Merge branch 'bpf-blinding'
Daniel Borkmann says:
====================
BPF updates
This set implements constant blinding for BPF, first couple of
patches are some preparatory cleanups, followed by the blinding.
Please see individual patches for details.
Thanks a lot!
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/sysctl/net.txt | 11 | ||||
-rw-r--r-- | arch/arm/Kconfig | 2 | ||||
-rw-r--r-- | arch/arm64/Kconfig | 2 | ||||
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 56 | ||||
-rw-r--r-- | arch/mips/Kconfig | 2 | ||||
-rw-r--r-- | arch/powerpc/Kconfig | 2 | ||||
-rw-r--r-- | arch/s390/Kconfig | 2 | ||||
-rw-r--r-- | arch/s390/net/bpf_jit_comp.c | 77 | ||||
-rw-r--r-- | arch/sparc/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/Kconfig | 2 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 70 | ||||
-rw-r--r-- | include/linux/filter.h | 52 | ||||
-rw-r--r-- | include/linux/netdevice.h | 1 | ||||
-rw-r--r-- | kernel/bpf/core.c | 294 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 2 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 53 | ||||
-rw-r--r-- | lib/test_bpf.c | 5 | ||||
-rw-r--r-- | net/Kconfig | 21 | ||||
-rw-r--r-- | net/core/filter.c | 40 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 9 |
20 files changed, 569 insertions, 136 deletions
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index 809ab6efcc74..f0480f7ea740 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -43,6 +43,17 @@ Values : 1 - enable the JIT 2 - enable the JIT and ask the compiler to emit traces on kernel log. +bpf_jit_harden +-------------- + +This enables hardening for the Berkeley Packet Filter Just in Time compiler. +Supported are eBPF JIT backends. Enabling hardening trades off performance, +but can mitigate JIT spraying. +Values : + 0 - disable JIT hardening (default value) + 1 - enable JIT hardening for unprivileged users only + 2 - enable JIT hardening for all users + dev_weight -------------- diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index cdfa6c2b7626..2315b0d1b4f4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -41,7 +41,7 @@ config ARM select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK select HAVE_ARM_SMCCC if CPU_V7 - select HAVE_BPF_JIT + select HAVE_CBPF_JIT select HAVE_CC_STACKPROTECTOR select HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4f436220384f..e6761ea2feec 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -58,7 +58,7 @@ config ARM64 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select HAVE_BPF_JIT + select HAVE_EBPF_JIT select HAVE_C_RECORDMCOUNT select HAVE_CC_STACKPROTECTOR select HAVE_CMPXCHG_DOUBLE diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b405bbb54431..d0d51903c7e0 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -31,8 +31,8 @@ int bpf_jit_enable __read_mostly; -#define TMP_REG_1 (MAX_BPF_REG + 0) -#define TMP_REG_2 (MAX_BPF_REG + 1) +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* Map BPF registers to A64 registers */ static const int bpf2a64[] = { @@ -54,6 +54,8 @@ static const int bpf2a64[] = { /* temporary register for internal BPF JIT */ [TMP_REG_1] = A64_R(23), [TMP_REG_2] = A64_R(24), + /* temporary register for blinding constants */ + [BPF_REG_AX] = A64_R(9), }; struct jit_ctx { @@ -762,31 +764,45 @@ void bpf_jit_compile(struct bpf_prog *prog) /* Nothing to do here. We support Internal BPF. */ } -void bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { + struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; + bool tmp_blinded = false; struct jit_ctx ctx; int image_size; u8 *image_ptr; if (!bpf_jit_enable) - return; + return orig_prog; - if (!prog || !prog->len) - return; + tmp = bpf_jit_blind_constants(prog); + /* If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. + */ + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); - if (ctx.offset == NULL) - return; + if (ctx.offset == NULL) { + prog = orig_prog; + goto out; + } /* 1. Initial fake pass to compute ctx->idx. */ /* Fake pass to fill in ctx->offset and ctx->tmp_used. */ - if (build_body(&ctx)) - goto out; + if (build_body(&ctx)) { + prog = orig_prog; + goto out_off; + } build_prologue(&ctx); @@ -797,8 +813,10 @@ void bpf_int_jit_compile(struct bpf_prog *prog) image_size = sizeof(u32) * ctx.idx; header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); - if (header == NULL) - goto out; + if (header == NULL) { + prog = orig_prog; + goto out_off; + } /* 2. Now, the actual pass. */ @@ -809,7 +827,8 @@ void bpf_int_jit_compile(struct bpf_prog *prog) if (build_body(&ctx)) { bpf_jit_binary_free(header); - goto out; + prog = orig_prog; + goto out_off; } build_epilogue(&ctx); @@ -817,7 +836,8 @@ void bpf_int_jit_compile(struct bpf_prog *prog) /* 3. Extra pass to validate JITed code. */ if (validate_code(&ctx)) { bpf_jit_binary_free(header); - goto out; + prog = orig_prog; + goto out_off; } /* And we're done. */ @@ -829,8 +849,14 @@ void bpf_int_jit_compile(struct bpf_prog *prog) set_memory_ro((unsigned long)header, header->pages); prog->bpf_func = (void *)ctx.image; prog->jited = 1; -out: + +out_off: kfree(ctx.offset); +out: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + return prog; } void bpf_jit_free(struct bpf_prog *prog) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2018c2b0e078..3ee1ea61b2dc 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -15,7 +15,7 @@ config MIPS select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select HAVE_BPF_JIT if !CPU_MICROMIPS + select HAVE_CBPF_JIT if !CPU_MICROMIPS select HAVE_FUNCTION_TRACER select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 7cd32c038286..2fdb73d9198a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -126,7 +126,7 @@ config PPC select IRQ_FORCED_THREADING select HAVE_RCU_TABLE_FREE if SMP select HAVE_SYSCALL_TRACEPOINTS - select HAVE_BPF_JIT + select HAVE_CBPF_JIT select HAVE_ARCH_JUMP_LABEL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index bf24ab188921..a883981c0174 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -126,7 +126,7 @@ config S390 select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_BPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES + select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 3c0bfc1f2694..9133b0ec000b 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -54,16 +54,17 @@ struct bpf_jit { #define SEEN_FUNC 16 /* calls C functions */ #define SEEN_TAIL_CALL 32 /* code uses tail calls */ #define SEEN_SKB_CHANGE 64 /* code changes skb data */ +#define SEEN_REG_AX 128 /* code uses constant blinding */ #define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB) /* * s390 registers */ -#define REG_W0 (__MAX_BPF_REG+0) /* Work register 1 (even) */ -#define REG_W1 (__MAX_BPF_REG+1) /* Work register 2 (odd) */ -#define REG_SKB_DATA (__MAX_BPF_REG+2) /* SKB data register */ -#define REG_L (__MAX_BPF_REG+3) /* Literal pool register */ -#define REG_15 (__MAX_BPF_REG+4) /* Register 15 */ +#define REG_W0 (MAX_BPF_JIT_REG + 0) /* Work register 1 (even) */ +#define REG_W1 (MAX_BPF_JIT_REG + 1) /* Work register 2 (odd) */ +#define REG_SKB_DATA (MAX_BPF_JIT_REG + 2) /* SKB data register */ +#define REG_L (MAX_BPF_JIT_REG + 3) /* Literal pool register */ +#define REG_15 (MAX_BPF_JIT_REG + 4) /* Register 15 */ #define REG_0 REG_W0 /* Register 0 */ #define REG_1 REG_W1 /* Register 1 */ #define REG_2 BPF_REG_1 /* Register 2 */ @@ -88,6 +89,8 @@ static const int reg2hex[] = { [BPF_REG_9] = 10, /* BPF stack pointer */ [BPF_REG_FP] = 13, + /* Register for blinding (shared with REG_SKB_DATA) */ + [BPF_REG_AX] = 12, /* SKB data pointer */ [REG_SKB_DATA] = 12, /* Work registers for s390x backend */ @@ -385,7 +388,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op) /* * For SKB access %b1 contains the SKB pointer. For "bpf_jit.S" * we store the SKB header length on the stack and the SKB data - * pointer in REG_SKB_DATA. + * pointer in REG_SKB_DATA if BPF_REG_AX is not used. */ static void emit_load_skb_data_hlen(struct bpf_jit *jit) { @@ -397,9 +400,10 @@ static void emit_load_skb_data_hlen(struct bpf_jit *jit) offsetof(struct sk_buff, data_len)); /* stg %w1,ST_OFF_HLEN(%r0,%r15) */ EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, REG_15, STK_OFF_HLEN); - /* lg %skb_data,data_off(%b1) */ - EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, - BPF_REG_1, offsetof(struct sk_buff, data)); + if (!(jit->seen & SEEN_REG_AX)) + /* lg %skb_data,data_off(%b1) */ + EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, + BPF_REG_1, offsetof(struct sk_buff, data)); } /* @@ -487,6 +491,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i s32 imm = insn->imm; s16 off = insn->off; + if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) + jit->seen |= SEEN_REG_AX; switch (insn->code) { /* * BPF_MOV @@ -1188,7 +1194,7 @@ call_fn: /* * Implicit input: * BPF_REG_6 (R7) : skb pointer - * REG_SKB_DATA (R12): skb data pointer + * REG_SKB_DATA (R12): skb data pointer (if no BPF_REG_AX) * * Calculated input: * BPF_REG_2 (R3) : offset of byte(s) to fetch in skb @@ -1209,6 +1215,11 @@ call_fn: /* agfr %b2,%src (%src is s32 here) */ EMIT4(0xb9180000, BPF_REG_2, src_reg); + /* Reload REG_SKB_DATA if BPF_REG_AX is used */ + if (jit->seen & SEEN_REG_AX) + /* lg %skb_data,data_off(%b6) */ + EMIT6_DISP_LH(0xe3000000, 0x0004, REG_SKB_DATA, REG_0, + BPF_REG_6, offsetof(struct sk_buff, data)); /* basr %b5,%w1 (%b5 is call saved) */ EMIT2(0x0d00, BPF_REG_5, REG_W1); @@ -1262,37 +1273,62 @@ void bpf_jit_compile(struct bpf_prog *fp) /* * Compile eBPF program "fp" */ -void bpf_int_jit_compile(struct bpf_prog *fp) +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { + struct bpf_prog *tmp, *orig_fp = fp; struct bpf_binary_header *header; + bool tmp_blinded = false; struct bpf_jit jit; int pass; if (!bpf_jit_enable) - return; + return orig_fp; + + tmp = bpf_jit_blind_constants(fp); + /* + * If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. + */ + if (IS_ERR(tmp)) + return orig_fp; + if (tmp != fp) { + tmp_blinded = true; + fp = tmp; + } + memset(&jit, 0, sizeof(jit)); jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); - if (jit.addrs == NULL) - return; + if (jit.addrs == NULL) { + fp = orig_fp; + goto out; + } /* * Three initial passes: * - 1/2: Determine clobbered registers * - 3: Calculate program size and addrs arrray */ for (pass = 1; pass <= 3; pass++) { - if (bpf_jit_prog(&jit, fp)) + if (bpf_jit_prog(&jit, fp)) { + fp = orig_fp; goto free_addrs; + } } /* * Final pass: Allocate and generate program */ - if (jit.size >= BPF_SIZE_MAX) + if (jit.size >= BPF_SIZE_MAX) { + fp = orig_fp; goto free_addrs; + } header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole); - if (!header) + if (!header) { + fp = orig_fp; goto free_addrs; - if (bpf_jit_prog(&jit, fp)) + } + if (bpf_jit_prog(&jit, fp)) { + fp = orig_fp; goto free_addrs; + } if (bpf_jit_enable > 1) { bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf); if (jit.prg_buf) @@ -1305,6 +1341,11 @@ void bpf_int_jit_compile(struct bpf_prog *fp) } free_addrs: kfree(jit.addrs); +out: + if (tmp_blinded) + bpf_jit_prog_release_other(fp, fp == orig_fp ? + tmp : orig_fp); + return fp; } /* diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 57ffaf285c2f..d5003812c748 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -32,7 +32,7 @@ config SPARC select ARCH_WANT_IPC_PARSE_VERSION select GENERIC_PCI_IOMAP select HAVE_NMI_WATCHDOG if SPARC64 - select HAVE_BPF_JIT + select HAVE_CBPF_JIT select HAVE_DEBUG_BUGVERBOSE select GENERIC_SMP_IDLE_THREAD select GENERIC_CLOCKEVENTS diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2dc18605831f..ae83046d51a8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -91,7 +91,7 @@ config X86 select HAVE_ARCH_SOFT_DIRTY if X86_64 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE - select HAVE_BPF_JIT if X86_64 + select HAVE_EBPF_JIT if X86_64 select HAVE_CC_STACKPROTECTOR select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4286f3618bd0..fe04a04dab8e 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -110,11 +110,16 @@ static void bpf_flush_icache(void *start, void *end) ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) /* pick a register outside of BPF range for JIT internal work */ -#define AUX_REG (MAX_BPF_REG + 1) +#define AUX_REG (MAX_BPF_JIT_REG + 1) -/* the following table maps BPF registers to x64 registers. - * x64 register r12 is unused, since if used as base address register - * in load/store instructions, it always needs an extra byte of encoding +/* The following table maps BPF registers to x64 registers. + * + * x64 register r12 is unused, since if used as base address + * register in load/store instructions, it always needs an + * extra byte of encoding and is callee saved. + * + * r9 caches skb->len - skb->data_len + * r10 caches skb->data, and used for blinding (if enabled) */ static const int reg2hex[] = { [BPF_REG_0] = 0, /* rax */ @@ -128,6 +133,7 @@ static const int reg2hex[] = { [BPF_REG_8] = 6, /* r14 callee saved */ [BPF_REG_9] = 7, /* r15 callee saved */ [BPF_REG_FP] = 5, /* rbp readonly */ + [BPF_REG_AX] = 2, /* r10 temp register */ [AUX_REG] = 3, /* r11 temp register */ }; @@ -141,7 +147,8 @@ static bool is_ereg(u32 reg) BIT(AUX_REG) | BIT(BPF_REG_7) | BIT(BPF_REG_8) | - BIT(BPF_REG_9)); + BIT(BPF_REG_9) | + BIT(BPF_REG_AX)); } /* add modifiers if 'reg' maps to x64 registers r8..r15 */ @@ -182,6 +189,7 @@ static void jit_fill_hole(void *area, unsigned int size) struct jit_context { int cleanup_addr; /* epilogue code offset */ bool seen_ld_abs; + bool seen_ax_reg; }; /* maximum number of bytes emitted while JITing one eBPF insn */ @@ -345,6 +353,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, struct bpf_insn *insn = bpf_prog->insnsi; int insn_cnt = bpf_prog->len; bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0); + bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0); bool seen_exit = false; u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; int i, cnt = 0; @@ -367,6 +376,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int ilen; u8 *func; + if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) + ctx->seen_ax_reg = seen_ax_reg = true; + switch (insn->code) { /* ALU */ case BPF_ALU | BPF_ADD | BPF_X: @@ -1002,6 +1014,10 @@ common_load: * sk_load_* helpers also use %r10 and %r9d. * See bpf_jit.S */ + if (seen_ax_reg) + /* r10 = skb->data, mov %r10, off32(%rbx) */ + EMIT3_off32(0x4c, 0x8b, 0x93, + offsetof(struct sk_buff, data)); EMIT1_off32(0xE8, jmp_offset); /* call */ break; @@ -1073,25 +1089,37 @@ void bpf_jit_compile(struct bpf_prog *prog) { } -void bpf_int_jit_compile(struct bpf_prog *prog) +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; + struct bpf_prog *tmp, *orig_prog = prog; int proglen, oldproglen = 0; struct jit_context ctx = {}; + bool tmp_blinded = false; u8 *image = NULL; int *addrs; int pass; int i; if (!bpf_jit_enable) - return; + return orig_prog; - if (!prog || !prog->len) - return; + tmp = bpf_jit_blind_constants(prog); + /* If blinding was requested and we failed during blinding, + * we must fall back to the interpreter. + */ + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL); - if (!addrs) - return; + if (!addrs) { + prog = orig_prog; + goto out; + } /* Before first pass, make a rough estimation of addrs[] * each bpf instruction is translated to less than 64 bytes @@ -1113,21 +1141,25 @@ void bpf_int_jit_compile(struct bpf_prog *prog) image = NULL; if (header) bpf_jit_binary_free(header); - goto out; + prog = orig_prog; + goto out_addrs; } if (image) { if (proglen != oldproglen) { pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", proglen, oldproglen); - goto out; + prog = orig_prog; + goto out_addrs; } break; } if (proglen == oldproglen) { header = bpf_jit_binary_alloc(proglen, &image, 1, jit_fill_hole); - if (!header) - goto out; + if (!header) { + prog = orig_prog; + goto out_addrs; + } } oldproglen = proglen; } @@ -1141,8 +1173,14 @@ void bpf_int_jit_compile(struct bpf_prog *prog) prog->bpf_func = (void *)image; prog->jited = 1; } -out: + +out_addrs: kfree(addrs); +out: + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + return prog; } void bpf_jit_free(struct bpf_prog *fp) diff --git a/include/linux/filter.h b/include/linux/filter.h index ec1411c89105..6fc31ef1da2d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -13,6 +13,8 @@ #include <linux/printk.h> #include <linux/workqueue.h> #include <linux/sched.h> +#include <linux/capability.h> + #include <net/sch_generic.h> #include <asm/cacheflush.h> @@ -42,6 +44,15 @@ struct bpf_prog_aux; #define BPF_REG_X BPF_REG_7 #define BPF_REG_TMP BPF_REG_8 +/* Kernel hidden auxiliary/helper register for hardening step. + * Only used by eBPF JITs. It's nothing more than a temporary + * register that JITs use internally, only that here it's part + * of eBPF instructions that have been rewritten for blinding + * constants. See JIT pre-step in bpf_jit_blind_constants(). + */ +#define BPF_REG_AX MAX_BPF_REG +#define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) + /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 @@ -458,7 +469,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) int sk_filter(struct sock *sk, struct sk_buff *skb); -int bpf_prog_select_runtime(struct bpf_prog *fp); +struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err); void bpf_prog_free(struct bpf_prog *fp); struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); @@ -492,10 +503,17 @@ bool sk_filter_charge(struct sock *sk, struct sk_filter *fp); void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); -void bpf_int_jit_compile(struct bpf_prog *fp); + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); bool bpf_helper_changes_skb_data(void *func); +struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, + const struct bpf_insn *patch, u32 len); + #ifdef CONFIG_BPF_JIT +extern int bpf_jit_enable; +extern int bpf_jit_harden; + typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); struct bpf_binary_header * @@ -507,6 +525,9 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr); void bpf_jit_compile(struct bpf_prog *fp); void bpf_jit_free(struct bpf_prog *fp); +struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); +void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other); + static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, u32 pass, void *image) { @@ -517,6 +538,33 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen, print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET, 16, 1, image, proglen, false); } + +static inline bool bpf_jit_is_ebpf(void) +{ +# ifdef CONFIG_HAVE_EBPF_JIT + return true; +# else + return false; +# endif +} + +static inline bool bpf_jit_blinding_enabled(void) +{ + /* These are the prerequisites, should someone ever have the + * idea to call blinding outside of them, we make sure to + * bail out. + */ + if (!bpf_jit_is_ebpf()) + return false; + if (!bpf_jit_enable) + return false; + if (!bpf_jit_harden) + return false; + if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN)) + return false; + + return true; +} #else static inline void bpf_jit_compile(struct bpf_prog *fp) { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c2f5112f08f7..c148edfe4965 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3759,7 +3759,6 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, extern int netdev_max_backlog; extern int netdev_tstamp_prequeue; extern int weight_p; -extern int bpf_jit_enable; bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d781b077431f..f1e8a0def99b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -129,14 +129,83 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, return fp; } -EXPORT_SYMBOL_GPL(bpf_prog_realloc); void __bpf_prog_free(struct bpf_prog *fp) { kfree(fp->aux); vfree(fp); } -EXPORT_SYMBOL_GPL(__bpf_prog_free); + +static bool bpf_is_jmp_and_has_target(const struct bpf_insn *insn) +{ + return BPF_CLASS(insn->code) == BPF_JMP && + /* Call and Exit are both special jumps with no + * target inside the BPF instruction image. + */ + BPF_OP(insn->code) != BPF_CALL && + BPF_OP(insn->code) != BPF_EXIT; +} + +static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta) +{ + struct bpf_insn *insn = prog->insnsi; + u32 i, insn_cnt = prog->len; + + for (i = 0; i < insn_cnt; i++, insn++) { + if (!bpf_is_jmp_and_has_target(insn)) + continue; + + /* Adjust offset of jmps if we cross boundaries. */ + if (i < pos && i + insn->off + 1 > pos) + insn->off += delta; + else if (i > pos + delta && i + insn->off + 1 <= pos + delta) + insn->off -= delta; + } +} + +struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, + const struct bpf_insn *patch, u32 len) +{ + u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; + struct bpf_prog *prog_adj; + + /* Since our patchlet doesn't expand the image, we're done. */ + if (insn_delta == 0) { + memcpy(prog->insnsi + off, patch, sizeof(*patch)); + return prog; + } + + insn_adj_cnt = prog->len + insn_delta; + + /* Several new instructions need to be inserted. Make room + * for them. Likely, there's no need for a new allocation as + * last page could have large enough tailroom. + */ + prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt), + GFP_USER); + if (!prog_adj) + return NULL; + + prog_adj->len = insn_adj_cnt; + + /* Patching happens in 3 steps: + * + * 1) Move over tail of insnsi from next instruction onwards, + * so we can patch the single target insn with one or more + * new ones (patching is always from 1 to n insns, n > 0). + * 2) Inject new instructions at the target location. + * 3) Adjust branch offsets if necessary. + */ + insn_rest = insn_adj_cnt - off - len; + + memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1, + sizeof(*patch) * insn_rest); + memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len); + + bpf_adj_branches(prog_adj, off, insn_delta); + + return prog_adj; +} #ifdef CONFIG_BPF_JIT struct bpf_binary_header * @@ -174,6 +243,209 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr) { module_memfree(hdr); } + +int bpf_jit_harden __read_mostly; + +static int bpf_jit_blind_insn(const struct bpf_insn *from, + const struct bpf_insn *aux, + struct bpf_insn *to_buff) +{ + struct bpf_insn *to = to_buff; + u32 imm_rnd = prandom_u32(); + s16 off; + + BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); + BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); + + if (from->imm == 0 && + (from->code == (BPF_ALU | BPF_MOV | BPF_K) || + from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { + *to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg); + goto out; + } + + switch (from->code) { + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU | BPF_MOD | BPF_K: + *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX); + break; + + case BPF_ALU64 | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_ALU64_REG(from->code, from->dst_reg, BPF_REG_AX); + break; + + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSET | BPF_K: + /* Accommodate for extra offset in case of a backjump. */ + off = from->off; + if (off < 0) + off -= 2; + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); + break; + + case BPF_LD | BPF_ABS | BPF_W: + case BPF_LD | BPF_ABS | BPF_H: + case BPF_LD | BPF_ABS | BPF_B: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0); + break; + + case BPF_LD | BPF_IND | BPF_W: + case BPF_LD | BPF_IND | BPF_H: + case BPF_LD | BPF_IND | BPF_B: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_ALU32_REG(BPF_ADD, BPF_REG_AX, from->src_reg); + *to++ = BPF_LD_IND(from->code, BPF_REG_AX, 0); + break; + + case BPF_LD | BPF_IMM | BPF_DW: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); + *to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX); + break; + case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */ + *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm); + *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX); + break; + + case BPF_ST | BPF_MEM | BPF_DW: + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_B: + *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); + *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); + *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off); + break; + } +out: + return to - to_buff; +} + +static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, + gfp_t gfp_extra_flags) +{ + gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | + gfp_extra_flags; + struct bpf_prog *fp; + + fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags, PAGE_KERNEL); + if (fp != NULL) { + kmemcheck_annotate_bitfield(fp, meta); + + /* aux->prog still points to the fp_other one, so + |