summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/filter.txt2
-rw-r--r--arch/arm/net/bpf_jit_32.c8
-rw-r--r--arch/arm64/net/bpf_jit_comp.c13
-rw-r--r--arch/mips/net/ebpf_jit.c29
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c8
-rw-r--r--arch/s390/net/bpf_jit_comp.c10
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c18
-rw-r--r--arch/x86/net/bpf_jit_comp.c20
-rw-r--r--include/linux/filter.h12
-rw-r--r--include/linux/tcp.h11
-rw-r--r--include/net/tcp.h42
-rw-r--r--include/uapi/linux/bpf.h84
-rw-r--r--kernel/bpf/core.c258
-rw-r--r--kernel/bpf/lpm_trie.c28
-rw-r--r--kernel/bpf/syscall.c5
-rw-r--r--kernel/bpf/verifier.c62
-rw-r--r--lib/test_bpf.c8
-rw-r--r--net/core/filter.c303
-rw-r--r--net/ipv4/tcp.c26
-rw-r--r--net/ipv4/tcp_nv.c2
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c7
-rw-r--r--samples/bpf/Makefile5
-rw-r--r--samples/sockmap/sockmap_user.c392
-rw-r--r--tools/include/uapi/linux/bpf.h86
-rw-r--r--tools/testing/selftests/bpf/Makefile6
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rwxr-xr-xtools/testing/selftests/bpf/tcp_client.py51
-rwxr-xr-xtools/testing/selftests/bpf/tcp_server.py83
-rw-r--r--tools/testing/selftests/bpf/test_align.c30
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c2
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c95
-rw-r--r--tools/testing/selftests/bpf/test_maps.c32
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf.h16
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_kern.c115
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c126
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c344
37 files changed, 1969 insertions, 378 deletions
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 87814859cfc2..a4508ec1816b 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1134,7 +1134,7 @@ The verifier's knowledge about the variable offset consists of:
mask and value; no bit should ever be 1 in both. For example, if a byte is read
into a register from memory, the register's top 56 bits are known zero, while
the low 8 are unknown - which is represented as the tnum (0x0; 0xff). If we
-then OR this with 0x40, we get (0x40; 0xcf), then if we add 1 we get (0x0;
+then OR this with 0x40, we get (0x40; 0xbf), then if we add 1 we get (0x0;
0x1ff), because of potential carries.
Besides arithmetic, the register state can also be updated by conditional
branches. For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 41e2feb0cf4f..b5030e1a41d8 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -363,15 +363,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
{
const u8 *tmp = bpf2a32[TMP_REG_1];
- s32 jmp_offset;
- /* checks if divisor is zero or not. If it is, then
- * exit directly.
- */
- emit(ARM_CMP_I(rn, 0), ctx);
- _emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx);
- jmp_offset = epilogue_offset(ctx);
- _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
#if __LINUX_ARM_ARCH__ == 7
if (elf_hwcap & HWCAP_IDIVA) {
if (op == BPF_DIV)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 0775d5ab8ee9..1d4f1da7c58f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -390,18 +390,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_DIV | BPF_X:
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_X:
- {
- const u8 r0 = bpf2a64[BPF_REG_0];
-
- /* if (src == 0) return 0 */
- jmp_offset = 3; /* skip ahead to else path */
- check_imm19(jmp_offset);
- emit(A64_CBNZ(is64, src, jmp_offset), ctx);
- emit(A64_MOVZ(1, r0, 0, 0), ctx);
- jmp_offset = epilogue_offset(ctx);
- check_imm26(jmp_offset);
- emit(A64_B(jmp_offset), ctx);
- /* else */
switch (BPF_OP(code)) {
case BPF_DIV:
emit(A64_UDIV(is64, dst, dst, src), ctx);
@@ -413,7 +401,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
}
break;
- }
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU64 | BPF_LSH | BPF_X:
emit(A64_LSLV(is64, dst, dst, src), ctx);
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 4e347030ed2c..3e2798bfea4f 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -741,16 +741,11 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */
case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */
+ if (insn->imm == 0)
+ return -EINVAL;
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
if (dst < 0)
return dst;
- if (insn->imm == 0) { /* Div by zero */
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
- emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
- }
td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
if (td == REG_64BIT || td == REG_32BIT_ZERO_EX)
/* sign extend */
@@ -770,19 +765,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */
case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */
+ if (insn->imm == 0)
+ return -EINVAL;
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
if (dst < 0)
return dst;
- if (insn->imm == 0) { /* Div by zero */
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
- emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
- }
if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
-
if (insn->imm == 1) {
/* div by 1 is a nop, mod by 1 is zero */
if (bpf_op == BPF_MOD)
@@ -860,11 +849,6 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_DIV:
case BPF_MOD:
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
- emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
emit_instr(ctx, ddivu, dst, src);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
@@ -943,11 +927,6 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_DIV:
case BPF_MOD:
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
- emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
emit_instr(ctx, divu, dst, src);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 217a78e84865..0a34b0cec7b7 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -381,10 +381,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
goto bpf_alu32_trunc;
case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
- PPC_CMPWI(src_reg, 0);
- PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
- PPC_LI(b2p[BPF_REG_0], 0);
- PPC_JMP(exit_addr);
if (BPF_OP(code) == BPF_MOD) {
PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg);
PPC_MULW(b2p[TMP_REG_1], src_reg,
@@ -395,10 +391,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
goto bpf_alu32_trunc;
case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
- PPC_CMPDI(src_reg, 0);
- PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
- PPC_LI(b2p[BPF_REG_0], 0);
- PPC_JMP(exit_addr);
if (BPF_OP(code) == BPF_MOD) {
PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg);
PPC_MULD(b2p[TMP_REG_1], src_reg,
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e50188773ff3..78a19c93b380 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -610,11 +610,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
- jit->seen |= SEEN_RET0;
- /* ltr %src,%src (if src == 0 goto fail) */
- EMIT2(0x1200, src_reg, src_reg);
- /* jz <ret0> */
- EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
/* lhi %w0,0 */
EMIT4_IMM(0xa7080000, REG_W0, 0);
/* lr %w1,%dst */
@@ -630,11 +625,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
- jit->seen |= SEEN_RET0;
- /* ltgr %src,%src (if src == 0 goto fail) */
- EMIT4(0xb9020000, src_reg, src_reg);
- /* jz <ret0> */
- EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
/* lghi %w0,0 */
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 50a24d7bd4c5..48a25869349b 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -967,31 +967,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_alu(MULX, src, dst, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_X:
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_write_y(G0, ctx);
emit_alu(DIV, src, dst, ctx);
break;
-
case BPF_ALU64 | BPF_DIV | BPF_X:
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_alu(UDIVX, src, dst, ctx);
break;
-
case BPF_ALU | BPF_MOD | BPF_X: {
const u8 tmp = bpf2sparc[TMP_REG_1];
ctx->tmp_1_used = true;
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_write_y(G0, ctx);
emit_alu3(DIV, dst, src, tmp, ctx);
emit_alu3(MULX, tmp, src, tmp, ctx);
@@ -1003,10 +989,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
ctx->tmp_1_used = true;
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_alu3(UDIVX, dst, src, tmp, ctx);
emit_alu3(MULX, tmp, src, tmp, ctx);
emit_alu3(SUB, dst, tmp, dst, ctx);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5acee5139e28..4923d92f918d 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -568,26 +568,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
*/
EMIT2(0x31, 0xd2);
- if (BPF_SRC(insn->code) == BPF_X) {
- /* if (src_reg == 0) return 0 */
-
- /* cmp r11, 0 */
- EMIT4(0x49, 0x83, 0xFB, 0x00);
-
- /* jne .+9 (skip over pop, pop, xor and jmp) */
- EMIT2(X86_JNE, 1 + 1 + 2 + 5);
- EMIT1(0x5A); /* pop rdx */
- EMIT1(0x58); /* pop rax */
- EMIT2(0x31, 0xc0); /* xor eax, eax */
-
- /* jmp cleanup_addr
- * addrs[i] - 11, because there are 11 bytes
- * after this insn: div, mov, pop, pop, mov
- */
- jmp_offset = ctx->cleanup_addr - (addrs[i] - 11);
- EMIT1_off32(0xE9, jmp_offset);
- }
-
if (BPF_CLASS(insn->code) == BPF_ALU64)
/* div r11 */
EMIT3(0x49, 0xF7, 0xF3);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 425056c7f96c..276932d75975 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -688,6 +688,8 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
void bpf_prog_free(struct bpf_prog *fp);
+bool bpf_opcode_in_insntable(u8 code);
+
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags);
@@ -1003,10 +1005,20 @@ struct bpf_sock_ops_kern {
struct sock *sk;
u32 op;
union {
+ u32 args[4];
u32 reply;
u32 replylong[4];
};
u32 is_fullsock;
+ u64 temp; /* temp and everything after is not
+ * initialized to 0 before calling
+ * the BPF program. New fields that
+ * should be initialized to 0 should
+ * be inserted before temp.
+ * temp is scratch storage used by
+ * sock_ops_convert_ctx_access
+ * as temporary storage of a register.
+ */
};
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4f93f0953c41..8f4c54986f97 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -335,6 +335,17 @@ struct tcp_sock {
int linger2;
+
+/* Sock_ops bpf program related variables */
+#ifdef CONFIG_BPF
+ u8 bpf_sock_ops_cb_flags; /* Control calling BPF programs
+ * values defined in uapi/linux/tcp.h
+ */
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG)
+#else
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
+#endif
+
/* Receiver side RTT estimation */
struct {
u32 rtt_us;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5a1d26a18599..093e967a2960 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2006,12 +2006,12 @@ void tcp_cleanup_ulp(struct sock *sk);
* program loaded).
*/
#ifdef CONFIG_BPF
-static inline int tcp_call_bpf(struct sock *sk, int op)
+static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
struct bpf_sock_ops_kern sock_ops;
int ret;
- memset(&sock_ops, 0, sizeof(sock_ops));
+ memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
if (sk_fullsock(sk)) {
sock_ops.is_fullsock = 1;
sock_owned_by_me(sk);
@@ -2019,6 +2019,8 @@ static inline int tcp_call_bpf(struct sock *sk, int op)
sock_ops.sk = sk;
sock_ops.op = op;
+ if (nargs > 0)
+ memcpy(sock_ops.args, args, nargs * sizeof(*args));
ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
if (ret == 0)
@@ -2027,18 +2029,46 @@ static inline int tcp_call_bpf(struct sock *sk, int op)
ret = -1;
return ret;
}
+
+static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
+{
+ u32 args[2] = {arg1, arg2};
+
+ return tcp_call_bpf(sk, op, 2, args);
+}
+
+static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
+ u32 arg3)
+{
+ u32 args[3] = {arg1, arg2, arg3};
+
+ return tcp_call_bpf(sk, op, 3, args);
+}
+
#else
-static inline int tcp_call_bpf(struct sock *sk, int op)
+static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
return -EPERM;
}
+
+static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
+{
+ return -EPERM;
+}
+
+static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
+ u32 arg3)
+{
+ return -EPERM;
+}
+
#endif
static inline u32 tcp_timeout_init(struct sock *sk)
{
int timeout;
- timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT);
+ timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL);
if (timeout <= 0)
timeout = TCP_TIMEOUT_INIT;
@@ -2049,7 +2079,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
{
int rwnd;
- rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
+ rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL);
if (rwnd < 0)
rwnd = 0;
@@ -2058,7 +2088,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
{
- return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
+ return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
}
#if IS_ENABLED(CONFIG_SMC)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 406c19d6016b..db6bdc375126 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -642,6 +642,14 @@ union bpf_attr {
* @optlen: length of optval in bytes
* Return: 0 or negative error
*
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ * Set callback flags for sock_ops
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ * @flags: flags value
+ * Return: 0 for no error
+ * -EINVAL if there is no full tcp socket
+ * bits in flags that are not supported by current kernel
+ *
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
* Grow or shrink room in sk_buff.
* @skb: pointer to skb
@@ -748,7 +756,8 @@ union bpf_attr {
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
FN(getsockopt), \
- FN(override_return),
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -952,8 +961,9 @@ struct bpf_map_info {
struct bpf_sock_ops {
__u32 op;
union {
- __u32 reply;
- __u32 replylong[4];
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
};
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
@@ -968,8 +978,39 @@ struct bpf_sock_ops {
*/
__u32 snd_cwnd;
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
};
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -1003,6 +1044,43 @@ enum {
* a congestion threshold. RTTs above
* this indicate congestion
*/
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
};
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3aa0658add76..5f35f93dcab2 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -782,6 +782,137 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
}
EXPORT_SYMBOL_GPL(__bpf_call_base);
+/* All UAPI available opcodes. */
+#define BPF_INSN_MAP(INSN_2, INSN_3) \
+ /* 32 bit ALU operations. */ \
+ /* Register based. */ \
+ INSN_3(ALU, ADD, X), \
+ INSN_3(ALU, SUB, X), \
+ INSN_3(ALU, AND, X), \
+ INSN_3(ALU, OR, X), \
+ INSN_3(ALU, LSH, X), \
+ INSN_3(ALU, RSH, X), \
+ INSN_3(ALU, XOR, X), \
+ INSN_3(ALU, MUL, X), \
+ INSN_3(ALU, MOV, X), \
+ INSN_3(ALU, DIV, X), \
+ INSN_3(ALU, MOD, X), \
+ INSN_2(ALU, NEG), \
+ INSN_3(ALU, END, TO_BE), \
+ INSN_3(ALU, END, TO_LE), \
+ /* Immediate based. */ \
+ INSN_3(ALU, ADD, K), \
+ INSN_3(ALU, SUB, K), \
+ INSN_3(ALU, AND, K), \
+ INSN_3(ALU, OR, K), \
+ INSN_3(ALU, LSH, K), \
+ INSN_3(ALU, RSH, K), \
+ INSN_3(ALU, XOR, K), \
+ INSN_3(ALU, MUL, K), \
+ INSN_3(ALU, MOV, K), \
+ INSN_3(ALU, DIV, K), \
+ INSN_3(ALU, MOD, K), \
+ /* 64 bit ALU operations. */ \
+ /* Register based. */ \
+ INSN_3(ALU64, ADD, X), \
+ INSN_3(ALU64, SUB, X), \
+ INSN_3(ALU64, AND, X), \
+ INSN_3(ALU64, OR, X), \
+ INSN_3(ALU64, LSH, X), \
+ INSN_3(ALU64, RSH, X), \
+ INSN_3(ALU64, XOR, X), \
+ INSN_3(ALU64, MUL, X), \
+ INSN_3(ALU64, MOV, X), \
+ INSN_3(ALU64, ARSH, X), \
+ INSN_3(ALU64, DIV, X), \
+ INSN_3(ALU64, MOD, X), \
+ INSN_2(ALU64, NEG), \
+ /* Immediate based. */ \
+ INSN_3(ALU64, ADD, K), \
+ INSN_3(ALU64, SUB, K), \
+ INSN_3(ALU64, AND, K), \
+ INSN_3(ALU64, OR, K), \
+ INSN_3(ALU64, LSH, K), \
+ INSN_3(ALU64, RSH, K), \
+ INSN_3(ALU64, XOR, K), \
+ INSN_3(ALU64, MUL, K), \
+ INSN_3(ALU64, MOV, K), \
+ INSN_3(ALU64, ARSH, K), \
+ INSN_3(ALU64, DIV, K), \
+ INSN_3(ALU64, MOD, K), \
+ /* Call instruction. */ \
+ INSN_2(JMP, CALL), \
+ /* Exit instruction. */ \
+ INSN_2(JMP, EXIT), \
+ /* Jump instructions. */ \
+ /* Register based. */ \
+ INSN_3(JMP, JEQ, X), \
+ INSN_3(JMP, JNE, X), \
+ INSN_3(JMP, JGT, X), \
+ INSN_3(JMP, JLT, X), \
+ INSN_3(JMP, JGE, X), \
+ INSN_3(JMP, JLE, X), \
+ INSN_3(JMP, JSGT, X), \
+ INSN_3(JMP, JSLT, X), \
+ INSN_3(JMP, JSGE, X), \
+ INSN_3(JMP, JSLE, X), \
+ INSN_3(JMP, JSET, X), \
+ /* Immediate based. */ \
+ INSN_3(JMP, JEQ, K), \
+ INSN_3(JMP, JNE, K), \
+ INSN_3(JMP, JGT, K), \
+ INSN_3(JMP, JLT, K), \
+ INSN_3(JMP, JGE, K), \
+ INSN_3(JMP, JLE, K), \
+ INSN_3(JMP, JSGT, K), \
+ INSN_3(JMP, JSLT, K), \
+ INSN_3(JMP, JSGE, K), \
+ INSN_3(JMP, JSLE, K), \
+ INSN_3(JMP, JSET, K), \
+ INSN_2(JMP, JA), \
+ /* Store instructions. */ \
+ /* Register based. */ \
+ INSN_3(STX, MEM, B), \
+ INSN_3(STX, MEM, H), \
+ INSN_3(STX, MEM, W), \
+ INSN_3(STX, MEM, DW), \
+ INSN_3(STX, XADD, W), \
+ INSN_3(STX, XADD, DW), \
+ /* Immediate based. */ \
+ INSN_3(ST, MEM, B), \
+ INSN_3(ST, MEM, H), \
+ INSN_3(ST, MEM, W), \
+ INSN_3(ST, MEM, DW), \
+ /* Load instructions. */ \
+ /* Register based. */ \
+ INSN_3(LDX, MEM, B), \
+ INSN_3(LDX, MEM, H), \
+ INSN_3(LDX, MEM, W), \
+ INSN_3(LDX, MEM, DW), \
+ /* Immediate based. */ \
+ INSN_3(LD, IMM, DW), \
+ /* Misc (old cBPF carry-over). */ \
+ INSN_3(LD, ABS, B), \
+ INSN_3(LD, ABS, H), \
+ INSN_3(LD, ABS, W), \
+ INSN_3(LD, IND, B), \
+ INSN_3(LD, IND, H), \
+ INSN_3(LD, IND, W)
+
+bool bpf_opcode_in_insntable(u8 code)
+{
+#define BPF_INSN_2_TBL(x, y) [BPF_##x | BPF_##y] = true
+#define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true
+ static const bool public_insntable[256] = {
+ [0 ... 255] = false,
+ /* Now overwrite non-defaults ... */
+ BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
+ };
+#undef BPF_INSN_3_TBL
+#undef BPF_INSN_2_TBL
+ return public_insntable[code];
+}
+
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
/**
* __bpf_prog_run - run eBPF program on a given context
@@ -793,115 +924,18 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
u64 tmp;
+#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
+#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
static const void *jumptable[256] = {
[0 ... 255] = &&default_label,
/* Now overwrite non-defaults ... */
- /* 32 bit ALU operations */
- [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
- [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
- [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
- [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
- [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
- [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
- [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
- [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
- [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
- [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
- [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
- [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
- [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
- [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
- [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
- [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
- [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
- [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
- [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
- [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
- [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
- [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
- [BPF_ALU | BPF_NEG] = &&ALU_NEG,
- [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
- [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
- /* 64 bit ALU operations */
- [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
- [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
- [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
- [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
- [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
- [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
- [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
- [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
- [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
- [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
- [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
- [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
- [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
- [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
- [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
- [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
- [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
- [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
- [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
- [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
- [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
- [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
- [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
- [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
- [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
- /* Call instruction */
- [BPF_JMP | BPF_CALL] = &&JMP_CALL,
+ BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
+ /* Non-UAPI available opcodes. */
[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
- /* Jumps */
- [BPF_JMP | BPF_JA] = &&JMP_JA,
- [BPF_J