summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/net/bpf_jit_comp.c424
-rw-r--r--include/linux/bpf.h138
-rw-r--r--include/linux/bpf_types.h78
-rw-r--r--include/linux/bpf_verifier.h1
-rw-r--r--include/linux/btf.h1
-rw-r--r--include/uapi/linux/bpf.h3
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/btf.c363
-rw-r--r--kernel/bpf/core.c9
-rw-r--r--kernel/bpf/syscall.c77
-rw-r--r--kernel/bpf/trampoline.c253
-rw-r--r--kernel/bpf/verifier.c131
-rw-r--r--net/bpf/test_run.c41
-rw-r--r--net/core/filter.c12
-rw-r--r--tools/include/uapi/linux/bpf.h3
-rw-r--r--tools/lib/bpf/bpf.c8
-rw-r--r--tools/lib/bpf/bpf.h5
-rw-r--r--tools/lib/bpf/bpf_helpers.h13
-rw-r--r--tools/lib/bpf/btf.c22
-rw-r--r--tools/lib/bpf/btf.h2
-rw-r--r--tools/lib/bpf/libbpf.c154
-rw-r--r--tools/lib/bpf/libbpf.h7
-rw-r--r--tools/lib/bpf/libbpf.map3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c90
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c76
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c76
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c39
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_test.c90
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c91
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_test.c98
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c52
-rw-r--r--tools/testing/selftests/bpf/progs/test_pkt_access.c38
34 files changed, 2354 insertions, 173 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 8cd23d8309bf..2e586f579945 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -9,9 +9,11 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
+#include <linux/memory.h>
#include <asm/extable.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
+#include <asm/text-patching.h>
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -96,6 +98,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
/* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1)
+#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
/*
* The following table maps BPF registers to x86-64 registers.
@@ -104,8 +107,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
* register in load/store instructions, it always needs an
* extra byte of encoding and is callee saved.
*
- * Also x86-64 register R9 is unused. x86-64 register R10 is
- * used for blinding (if enabled).
+ * x86-64 register R9 is not used by BPF programs, but can be used by BPF
+ * trampoline. x86-64 register R10 is used for blinding (if enabled).
*/
static const int reg2hex[] = {
[BPF_REG_0] = 0, /* RAX */
@@ -121,6 +124,7 @@ static const int reg2hex[] = {
[BPF_REG_FP] = 5, /* RBP readonly */
[BPF_REG_AX] = 2, /* R10 temp register */
[AUX_REG] = 3, /* R11 temp register */
+ [X86_REG_R9] = 1, /* R9 register, 6th function argument */
};
static const int reg2pt_regs[] = {
@@ -148,6 +152,7 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_7) |
BIT(BPF_REG_8) |
BIT(BPF_REG_9) |
+ BIT(X86_REG_R9) |
BIT(BPF_REG_AX));
}
@@ -198,8 +203,10 @@ struct jit_context {
/* Maximum number of bytes emitted while JITing one eBPF insn */
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
+/* number of bytes emit_call() needs to generate call instruction */
+#define X86_CALL_SIZE 5
-#define PROLOGUE_SIZE 20
+#define PROLOGUE_SIZE 25
/*
* Emit x86-64 prologue code for BPF program and check its size.
@@ -208,8 +215,13 @@ struct jit_context {
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
{
u8 *prog = *pprog;
- int cnt = 0;
+ int cnt = X86_CALL_SIZE;
+ /* BPF trampoline can be made to work without these nops,
+ * but let's waste 5 bytes for now and optimize later
+ */
+ memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
+ prog += cnt;
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* sub rsp, rounded_stack_depth */
@@ -390,6 +402,149 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
*pprog = prog;
}
+/* LDX: dst_reg = *(u8*)(src_reg + off) */
+static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ switch (size) {
+ case BPF_B:
+ /* Emit 'movzx rax, byte ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
+ break;
+ case BPF_H:
+ /* Emit 'movzx rax, word ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
+ break;
+ case BPF_W:
+ /* Emit 'mov eax, dword ptr [rax+0x14]' */
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
+ else
+ EMIT1(0x8B);
+ break;
+ case BPF_DW:
+ /* Emit 'mov rax, qword ptr [rax+0x14]' */
+ EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
+ break;
+ }
+ /*
+ * If insn->off == 0 we can save one extra byte, but
+ * special case of x86 R13 which always needs an offset
+ * is not worth the hassle
+ */
+ if (is_imm8(off))
+ EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
+ else
+ EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+ *pprog = prog;
+}
+
+/* STX: *(u8*)(dst_reg + off) = src_reg */
+static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ switch (size) {
+ case BPF_B:
+ /* Emit 'mov byte ptr [rax + off], al' */
+ if (is_ereg(dst_reg) || is_ereg(src_reg) ||
+ /* We have to add extra byte for x86 SIL, DIL regs */
+ src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
+ else
+ EMIT1(0x88);
+ break;
+ case BPF_H:
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
+ else
+ EMIT2(0x66, 0x89);
+ break;
+ case BPF_W:
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
+ else
+ EMIT1(0x89);
+ break;
+ case BPF_DW:
+ EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
+ break;
+ }
+ if (is_imm8(off))
+ EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
+ else
+ EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+ *pprog = prog;
+}
+
+static int emit_call(u8 **pprog, void *func, void *ip)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ s64 offset;
+
+ offset = func - (ip + X86_CALL_SIZE);
+ if (!is_simm32(offset)) {
+ pr_err("Target call %p is out of range\n", func);
+ return -EINVAL;
+ }
+ EMIT1_off32(0xE8, offset);
+ *pprog = prog;
+ return 0;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr)
+{
+ u8 old_insn[X86_CALL_SIZE] = {};
+ u8 new_insn[X86_CALL_SIZE] = {};
+ u8 *prog;
+ int ret;
+
+ if (!is_kernel_text((long)ip) &&
+ !is_bpf_text_address((long)ip))
+ /* BPF trampoline in modules is not supported */
+ return -EINVAL;
+
+ if (old_addr) {
+ prog = old_insn;
+ ret = emit_call(&prog, old_addr, (void *)ip);
+ if (ret)
+ return ret;
+ }
+ if (new_addr) {
+ prog = new_insn;
+ ret = emit_call(&prog, new_addr, (void *)ip);
+ if (ret)
+ return ret;
+ }
+ ret = -EBUSY;
+ mutex_lock(&text_mutex);
+ switch (t) {
+ case BPF_MOD_NOP_TO_CALL:
+ if (memcmp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE))
+ goto out;
+ text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL);
+ break;
+ case BPF_MOD_CALL_TO_CALL:
+ if (memcmp(ip, old_insn, X86_CALL_SIZE))
+ goto out;
+ text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL);
+ break;
+ case BPF_MOD_CALL_TO_NOP:
+ if (memcmp(ip, old_insn, X86_CALL_SIZE))
+ goto out;
+ text_poke_bp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE, NULL);
+ break;
+ }
+ ret = 0;
+out:
+ mutex_unlock(&text_mutex);
+ return ret;
+}
static bool ex_handler_bpf(const struct exception_table_entry *x,
struct pt_regs *regs, int trapnr,
@@ -773,68 +928,22 @@ st: if (is_imm8(insn->off))
/* STX: *(u8*)(dst_reg + off) = src_reg */
case BPF_STX | BPF_MEM | BPF_B:
- /* Emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* We have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
- EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
- else
- EMIT1(0x88);
- goto stx;
case BPF_STX | BPF_MEM | BPF_H:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
- else
- EMIT2(0x66, 0x89);
- goto stx;
case BPF_STX | BPF_MEM | BPF_W:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
- else
- EMIT1(0x89);
- goto stx;
case BPF_STX | BPF_MEM | BPF_DW:
- EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
-stx: if (is_imm8(insn->off))
- EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
- else
- EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
- insn->off);
+ emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
break;
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
- /* Emit 'movzx rax, byte ptr [rax + off]' */
- EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
- goto ldx;
case BPF_LDX | BPF_MEM | BPF_H:
case BPF_LDX | BPF_PROBE_MEM | BPF_H:
- /* Emit 'movzx rax, word ptr [rax + off]' */
- EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
- goto ldx;
case BPF_LDX | BPF_MEM | BPF_W:
case BPF_LDX | BPF_PROBE_MEM | BPF_W:
- /* Emit 'mov eax, dword ptr [rax+0x14]' */
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
- else
- EMIT1(0x8B);
- goto ldx;
case BPF_LDX | BPF_MEM | BPF_DW:
case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
- /* Emit 'mov rax, qword ptr [rax+0x14]' */
- EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
-ldx: /*
- * If insn->off == 0 we can save one extra byte, but
- * special case of x86 R13 which always needs an offset
- * is not worth the hassle
- */
- if (is_imm8(insn->off))
- EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
- else
- EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
- insn->off);
+ emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
struct exception_table_entry *ex;
u8 *_insn = image + proglen;
@@ -899,13 +1008,8 @@ xadd: if (is_imm8(insn->off))
/* call */
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
- jmp_offset = func - (image + addrs[i]);
- if (!imm32 || !is_simm32(jmp_offset)) {
- pr_err("unsupported BPF func %d addr %p image %p\n",
- imm32, func, image);
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
return -EINVAL;
- }
- EMIT1_off32(0xE8, jmp_offset);
break;
case BPF_JMP | BPF_TAIL_CALL:
@@ -1138,6 +1242,210 @@ emit_jmp:
return proglen;
}
+static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+ int stack_size)
+{
+ int i;
+ /* Store function arguments to stack.
+ * For a function that accepts two pointers the sequence will be:
+ * mov QWORD PTR [rbp-0x10],rdi
+ * mov QWORD PTR [rbp-0x8],rsi
+ */
+ for (i = 0; i < min(nr_args, 6); i++)
+ emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]),
+ BPF_REG_FP,
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ -(stack_size - i * 8));
+}
+
+static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+ int stack_size)
+{
+ int i;
+
+ /* Restore function arguments from stack.
+ * For a function that accepts two pointers the sequence will be:
+ * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
+ * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
+ */
+ for (i = 0; i < min(nr_args, 6); i++)
+ emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]),
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ BPF_REG_FP,
+ -(stack_size - i * 8));
+}
+
+static int invoke_bpf(struct btf_func_model *m, u8 **pprog,
+ struct bpf_prog **progs, int prog_cnt, int stack_size)
+{
+ u8 *prog = *pprog;
+ int cnt = 0, i;
+
+ for (i = 0; i < prog_cnt; i++) {
+ if (emit_call(&prog, __bpf_prog_enter, prog))
+ return -EINVAL;
+ /* remember prog start time returned by __bpf_prog_enter */
+ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+
+ /* arg1: lea rdi, [rbp - stack_size] */
+ EMIT4(0x48, 0x8D, 0x7D, -stack_size);
+ /* arg2: progs[i]->insnsi for interpreter */
+ if (!progs[i]->jited)
+ emit_mov_imm64(&prog, BPF_REG_2,
+ (long) progs[i]->insnsi >> 32,
+ (u32) (long) progs[i]->insnsi);
+ /* call JITed bpf program or interpreter */
+ if (emit_call(&prog, progs[i]->bpf_func, prog))
+ return -EINVAL;
+
+ /* arg1: mov rdi, progs[i] */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32,
+ (u32) (long) progs[i]);
+ /* arg2: mov rsi, rbx <- start time in nsec */
+ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ if (emit_call(&prog, __bpf_prog_exit, prog))
+ return -EINVAL;
+ }
+ *pprog = prog;
+ return 0;
+}
+
+/* Example:
+ * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+ * its 'struct btf_func_model' will be nr_args=2
+ * The assembly code when eth_type_trans is executing after trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 16 // space for skb and dev
+ * push rbx // temp regs to pass start time
+ * mov qword ptr [rbp - 16], rdi // save skb pointer to stack
+ * mov qword ptr [rbp - 8], rsi // save dev pointer to stack
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 16] // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack
+ * pop rbx
+ * leave
+ * ret
+ *
+ * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be
+ * replaced with 'call generated_bpf_trampoline'. When it returns
+ * eth_type_trans will continue executing with original skb and dev pointers.
+ *
+ * The assembly code when eth_type_trans is called from trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 24 // space for skb, dev, return value
+ * push rbx // temp regs to pass start time
+ * mov qword ptr [rbp - 24], rdi // save skb pointer to stack
+ * mov qword ptr [rbp - 16], rsi // save dev pointer to stack
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time if bpf stats are enabled
+ * lea rdi, [rbp - 24] // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog // bpf prog can access skb and dev
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack
+ * call eth_type_trans+5 // execute body of eth_type_trans
+ * mov qword ptr [rbp - 8], rax // save return value
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 24] // R1==ctx of bpf prog
+ * call addr_of_jited_FEXIT_prog // bpf prog can access skb, dev, return value
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value
+ * pop rbx
+ * leave
+ * add rsp, 8 // skip eth_type_trans's frame
+ * ret // return to its caller
+ */
+int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+ struct bpf_prog **fentry_progs, int fentry_cnt,
+ struct bpf_prog **fexit_progs, int fexit_cnt,
+ void *orig_call)
+{
+ int cnt = 0, nr_args = m->nr_args;
+ int stack_size = nr_args * 8;
+ u8 *prog;
+
+ /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
+ if (nr_args > 6)
+ return -ENOTSUPP;
+
+ if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
+ (flags & BPF_TRAMP_F_SKIP_FRAME))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ stack_size += 8; /* room for return value of orig_call */
+
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip patched call instruction and point orig_call to actual
+ * body of the kernel function.
+ */
+ orig_call += X86_CALL_SIZE;
+
+ prog = image;
+
+ EMIT1(0x55); /* push rbp */
+ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
+ EMIT1(0x53); /* push rbx */
+
+ save_regs(m, &prog, nr_args, stack_size);
+
+ if (fentry_cnt)
+ if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ if (fentry_cnt)
+ restore_regs(m, &prog, nr_args, stack_size);
+
+ /* call original function */
+ if (emit_call(&prog, orig_call, prog))
+ return -EINVAL;
+ /* remember return value in a stack for bpf prog to access */
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
+ }
+
+ if (fexit_cnt)
+ if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_RESTORE_REGS)
+ restore_regs(m, &prog, nr_args, stack_size);
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ /* restore original return value back into RAX */
+ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
+
+ EMIT1(0x5B); /* pop rbx */
+ EMIT1(0xC9); /* leave */
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip our return address and return to parent */
+ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+ EMIT1(0xC3); /* ret */
+ /* One half of the page has active running trampoline.
+ * Another half is an area for next trampoline.
+ * Make sure the trampoline generation logic doesn't overflow.
+ */
+ if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY))
+ return -EFAULT;
+ return 0;
+}
+
struct x64_jit_data {
struct bpf_binary_header *header;
int *addrs;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7c7f518811a6..5b81cde47314 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -14,6 +14,8 @@
#include <linux/numa.h>
#include <linux/wait.h>
#include <linux/u64_stats_sync.h>
+#include <linux/refcount.h>
+#include <linux/mutex.h>
struct bpf_verifier_env;
struct bpf_verifier_log;
@@ -246,7 +248,7 @@ struct bpf_func_proto {
};
enum bpf_arg_type arg_type[5];
};
- u32 *btf_id; /* BTF ids of arguments */
+ int *btf_id; /* BTF ids of arguments */
};
/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
@@ -384,6 +386,104 @@ struct bpf_prog_stats {
struct u64_stats_sync syncp;
} __aligned(2 * sizeof(u64));
+struct btf_func_model {
+ u8 ret_size;
+ u8 nr_args;
+ u8 arg_size[MAX_BPF_FUNC_ARGS];
+};
+
+/* Restore arguments before returning from trampoline to let original function
+ * continue executing. This flag is used for fentry progs when there are no
+ * fexit progs.
+ */
+#define BPF_TRAMP_F_RESTORE_REGS BIT(0)
+/* Call original function after fentry progs, but before fexit progs.
+ * Makes sense for fentry/fexit, normal calls and indirect calls.
+ */
+#define BPF_TRAMP_F_CALL_ORIG BIT(1)
+/* Skip current frame and return to parent. Makes sense for fentry/fexit
+ * programs only. Should not be used with normal calls and indirect calls.
+ */
+#define BPF_TRAMP_F_SKIP_FRAME BIT(2)
+
+/* Different use cases for BPF trampoline:
+ * 1. replace nop at the function entry (kprobe equivalent)
+ * flags = BPF_TRAMP_F_RESTORE_REGS
+ * fentry = a set of programs to run before returning from trampoline
+ *
+ * 2. replace nop at the function entry (kprobe + kretprobe equivalent)
+ * flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME
+ * orig_call = fentry_ip + MCOUNT_INSN_SIZE
+ * fentry = a set of program to run before calling original function
+ * fexit = a set of program to run after original function
+ *
+ * 3. replace direct call instruction anywhere in the function body
+ * or assign a function pointer for indirect call (like tcp_congestion_ops->cong_avoid)
+ * With flags = 0
+ * fentry = a set of programs to run before returning from trampoline
+ * With flags = BPF_TRAMP_F_CALL_ORIG
+ * orig_call = original callback addr or direct function addr
+ * fentry = a set of program to run before calling original function
+ * fexit = a set of program to run after original function
+ */
+int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+ struct bpf_prog **fentry_progs, int fentry_cnt,
+ struct bpf_prog **fexit_progs, int fexit_cnt,
+ void *orig_call);
+/* these two functions are called from generated trampoline */
+u64 notrace __bpf_prog_enter(void);
+void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
+
+enum bpf_tramp_prog_type {
+ BPF_TRAMP_FENTRY,
+ BPF_TRAMP_FEXIT,
+ BPF_TRAMP_MAX
+};
+
+struct bpf_trampoline {
+ /* hlist for trampoline_table */
+ struct hlist_node hlist;
+ /* serializes access to fields of this trampoline */
+ struct mutex mutex;
+ refcount_t refcnt;
+ u64 key;
+ struct {
+ struct btf_func_model model;
+ void *addr;
+ } func;
+ /* list of BPF programs using this trampoline */
+ struct hlist_head progs_hlist[BPF_TRAMP_MAX];
+ /* Number of attached programs. A counter per kind. */
+ int progs_cnt[BPF_TRAMP_MAX];
+ /* Executable image of trampoline */
+ void *image;
+ u64 selector;
+};
+#ifdef CONFIG_BPF_JIT
+struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
+int bpf_trampoline_link_prog(struct bpf_prog *prog);
+int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
+void bpf_trampoline_put(struct bpf_trampoline *tr);
+#else
+static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
+{
+ return NULL;
+}
+static inline int bpf_trampoline_link_prog(struct bpf_prog *prog)
+{
+ return -ENOTSUPP;
+}
+static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
+{
+ return -ENOTSUPP;
+}
+static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
+#endif
+
+struct bpf_func_info_aux {
+ bool unreliable;
+};
+
struct bpf_prog_aux {
atomic_t refcnt;
u32 used_map_cnt;
@@ -395,9 +495,14 @@ struct bpf_prog_aux {
u32 func_cnt; /* used by non-func prog as the number of func progs */
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+ struct bpf_prog *linked_prog;
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
bool offload_requested;
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
+ bool func_proto_unreliable;
+ enum bpf_tramp_prog_type trampoline_prog_type;
+ struct bpf_trampoline *trampoline;
+ struct hlist_node tramp_hlist;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
/* function name for valid attach_btf_id */
@@ -419,6 +524,7 @@ struct bpf_prog_aux {
struct bpf_prog_offload *offload;
struct btf *btf;
struct bpf_func_info *func_info;
+ struct bpf_func_info_aux *func_info_aux;
/* bpf_line_info loaded from userspace. linfo->insn_off
* has the xlated insn offset.
* Both the main and sub prog share the same linfo.
@@ -648,7 +754,7 @@ DECLARE_PER_CPU(int, bpf_prog_active);
extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;
-#define BPF_PROG_TYPE(_id, _name) \
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
extern const struct bpf_prog_ops _name ## _prog_ops; \
extern const struct bpf_verifier_ops _name ## _verifier_ops;
#define BPF_MAP_TYPE(_id, _ops) \
@@ -782,7 +888,16 @@ int btf_struct_access(struct bpf_verifier_log *log,
const struct btf_type *t, int off, int size,
enum bpf_access_type atype,
u32 *next_btf_id);
-u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int);
+int btf_resolve_helper_id(struct bpf_verifier_log *log,
+ const struct bpf_func_proto *fn, int);
+
+int btf_distill_func_proto(struct bpf_verifier_log *log,
+ struct btf *btf,
+ const struct btf_type *func_proto,
+ const char *func_name,
+ struct btf_func_model *m);
+
+int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog);
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
@@ -1107,6 +1222,15 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
#endif
#ifdef CONFIG_INET
+struct sk_reuseport_kern {
+ struct sk_buff *skb;
+ struct sock *sk;
+ struct sock *selected_sk;
+ void *data_end;
+ u32 hash;
+ u32 reuseport_id;
+ bool bind_inany;
+};
bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info);
@@ -1157,4 +1281,12 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
}
#endif /* CONFIG_INET */
+enum bpf_text_poke_type {
+ BPF_MOD_NOP_TO_CALL,
+ BPF_MOD_CALL_TO_CALL,
+ BPF_MOD_CALL_TO_NOP,
+};
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *addr1, void *addr2);
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index de14872b01ba..93740b3614d7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -2,42 +2,68 @@
/* internal file - do not include directly */
#ifdef CONFIG_NET
-BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
-BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp,
+ struct xdp_md, struct xdp_buff)
#ifdef CONFIG_CGROUP_BPF
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock,
+ struct bpf_sock, struct sock)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr,
+ struct bpf_sock_addr, struct bpf_sock_addr_kern)
#endif
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
-BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops,
+ struct bpf_sock_ops, struct bpf_sock_ops_kern)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb,
+ struct __sk_buff, struct sk_buff)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg,
+ struct sk_msg_md, struct sk_msg)
+BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector,
+ struct __sk_buff, struct bpf_flow_dissector)
#endif
#ifdef CONFIG_BPF_EVENTS
-BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
-BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
-BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
-BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
-BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
-BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing)
+BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe,
+ bpf_user_pt_regs_t, struct pt_regs)
+BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint,
+ __u64, u64)
+BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event,
+ struct bpf_perf_event_data, struct bpf_perf_event_data_kern)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint,
+ struct bpf_raw_tracepoint_args, u64)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable,
+ struct bpf_raw_tracepoint_args, u64)
+BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing,
+ void *, void *)
#endif
#ifdef CONFIG_CGROUP_BPF
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev,
+ struct bpf_cgroup_dev_ctx, struct bpf_cgroup_dev_ctx)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl,
+ struct bpf_sysctl, struct bpf_sy