diff options
Diffstat (limited to 'arch/arm')
-rw-r--r-- | arch/arm/Kconfig | 2 | ||||
-rw-r--r-- | arch/arm/boot/dts/rk3228-evb.dts | 34 | ||||
-rw-r--r-- | arch/arm/configs/multi_v7_defconfig | 1 | ||||
-rw-r--r-- | arch/arm/net/bpf_jit_32.c | 2448 | ||||
-rw-r--r-- | arch/arm/net/bpf_jit_32.h | 108 |
5 files changed, 1782 insertions, 811 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 61a0cb15067e..f1b3f1d575d4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -50,7 +50,7 @@ config ARM select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) select HAVE_ARCH_TRACEHOOK select HAVE_ARM_SMCCC if CPU_V7 - select HAVE_CBPF_JIT + select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 select HAVE_CC_STACKPROTECTOR select HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT diff --git a/arch/arm/boot/dts/rk3228-evb.dts b/arch/arm/boot/dts/rk3228-evb.dts index 58834330a5ba..1be9daacc4f9 100644 --- a/arch/arm/boot/dts/rk3228-evb.dts +++ b/arch/arm/boot/dts/rk3228-evb.dts @@ -50,6 +50,16 @@ device_type = "memory"; reg = <0x60000000 0x40000000>; }; + + vcc_phy: vcc-phy-regulator { + compatible = "regulator-fixed"; + enable-active-high; + regulator-name = "vcc_phy"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + regulator-boot-on; + }; }; &emmc { @@ -60,6 +70,30 @@ status = "okay"; }; +&gmac { + assigned-clocks = <&cru SCLK_MAC_SRC>; + assigned-clock-rates = <50000000>; + clock_in_out = "output"; + phy-supply = <&vcc_phy>; + phy-mode = "rmii"; + phy-handle = <&phy>; + status = "okay"; + + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + phy: phy@0 { + compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22"; + reg = <0>; + clocks = <&cru SCLK_MAC_PHY>; + resets = <&cru SRST_MACPHY>; + phy-is-integrated; + }; + }; +}; + &tsadc { status = "okay"; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 4d19c1b4b8e7..94d7e71c69c4 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -270,6 +270,7 @@ CONFIG_ICPLUS_PHY=y CONFIG_REALTEK_PHY=y CONFIG_MICREL_PHY=y CONFIG_FIXED_PHY=y +CONFIG_ROCKCHIP_PHY=y CONFIG_USB_PEGASUS=y CONFIG_USB_RTL8152=m CONFIG_USB_USBNET=y diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index d5b9fa19b684..c199990e12b6 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1,6 +1,7 @@ /* - * Just-In-Time compiler for BPF filters on 32bit ARM + * Just-In-Time compiler for eBPF filters on 32bit ARM * + * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> * * This program is free software; you can redistribute it and/or modify it @@ -8,6 +9,7 @@ * Free Software Foundation; version 2 of the License. */ +#include <linux/bpf.h> #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/errno.h> @@ -18,54 +20,101 @@ #include <linux/if_vlan.h> #include <asm/cacheflush.h> -#include <asm/set_memory.h> #include <asm/hwcap.h> #include <asm/opcodes.h> #include "bpf_jit_32.h" +int bpf_jit_enable __read_mostly; + +#define STACK_OFFSET(k) (k) +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ +#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ + +/* Flags used for JIT optimization */ +#define SEEN_CALL (1 << 0) + +#define FLAG_IMM_OVERFLOW (1 << 0) + /* - * ABI: + * Map eBPF registers to ARM 32bit registers or stack scratch space. + * + * 1. First argument is passed using the arm 32bit registers and rest of the + * arguments are passed on stack scratch space. + * 2. First callee-saved arugument is mapped to arm 32 bit registers and rest + * arguments are mapped to scratch space on stack. + * 3. We need two 64 bit temp registers to do complex operations on eBPF + * registers. + * + * As the eBPF registers are all 64 bit registers and arm has only 32 bit + * registers, we have to map each eBPF registers with two arm 32 bit regs or + * scratch memory space and we have to build eBPF 64 bit register from those. * - * r0 scratch register - * r4 BPF register A - * r5 BPF register X - * r6 pointer to the skb - * r7 skb->data - * r8 skb_headlen(skb) */ +static const u8 bpf2a32[][2] = { + /* return value from in-kernel function, and exit value from eBPF */ + [BPF_REG_0] = {ARM_R1, ARM_R0}, + /* arguments from eBPF program to in-kernel function */ + [BPF_REG_1] = {ARM_R3, ARM_R2}, + /* Stored on stack scratch space */ + [BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)}, + [BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)}, + [BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)}, + [BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)}, + /* callee saved registers that in-kernel function will preserve */ + [BPF_REG_6] = {ARM_R5, ARM_R4}, + /* Stored on stack scratch space */ + [BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)}, + [BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)}, + [BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)}, + /* Read only Frame Pointer to access Stack */ + [BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)}, + /* Temporary Register for internal BPF JIT, can be used + * for constant blindings and others. + */ + [TMP_REG_1] = {ARM_R7, ARM_R6}, + [TMP_REG_2] = {ARM_R10, ARM_R8}, + /* Tail call count. Stored on stack scratch space. */ + [TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)}, + /* temporary register for blinding constants. + * Stored on stack scratch space. + */ + [BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)}, +}; -#define r_scratch ARM_R0 -/* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */ -#define r_off ARM_R1 -#define r_A ARM_R4 -#define r_X ARM_R5 -#define r_skb ARM_R6 -#define r_skb_data ARM_R7 -#define r_skb_hl ARM_R8 - -#define SCRATCH_SP_OFFSET 0 -#define SCRATCH_OFF(k) (SCRATCH_SP_OFFSET + 4 * (k)) - -#define SEEN_MEM ((1 << BPF_MEMWORDS) - 1) -#define SEEN_MEM_WORD(k) (1 << (k)) -#define SEEN_X (1 << BPF_MEMWORDS) -#define SEEN_CALL (1 << (BPF_MEMWORDS + 1)) -#define SEEN_SKB (1 << (BPF_MEMWORDS + 2)) -#define SEEN_DATA (1 << (BPF_MEMWORDS + 3)) +#define dst_lo dst[1] +#define dst_hi dst[0] +#define src_lo src[1] +#define src_hi src[0] -#define FLAG_NEED_X_RESET (1 << 0) -#define FLAG_IMM_OVERFLOW (1 << 1) +/* + * JIT Context: + * + * prog : bpf_prog + * idx : index of current last JITed instruction. + * prologue_bytes : bytes used in prologue. + * epilogue_offset : offset of epilogue starting. + * seen : bit mask used for JIT optimization. + * offsets : array of eBPF instruction offsets in + * JITed code. + * target : final JITed code. + * epilogue_bytes : no of bytes used in epilogue. + * imm_count : no of immediate counts used for global + * variables. + * imms : array of global variable addresses. + */ struct jit_ctx { - const struct bpf_prog *skf; - unsigned idx; - unsigned prologue_bytes; - int ret0_fp_idx; + const struct bpf_prog *prog; + unsigned int idx; + unsigned int prologue_bytes; + unsigned int epilogue_offset; u32 seen; u32 flags; u32 *offsets; u32 *target; + u32 stack_size; #if __LINUX_ARM_ARCH__ < 7 u16 epilogue_bytes; u16 imm_count; @@ -73,68 +122,16 @@ struct jit_ctx { #endif }; -int bpf_jit_enable __read_mostly; - -static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret, - unsigned int size) -{ - void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size); - - if (!ptr) - return -EFAULT; - memcpy(ret, ptr, size); - return 0; -} - -static u64 jit_get_skb_b(struct sk_buff *skb, int offset) -{ - u8 ret; - int err; - - if (offset < 0) - err = call_neg_helper(skb, offset, &ret, 1); - else - err = skb_copy_bits(skb, offset, &ret, 1); - - return (u64)err << 32 | ret; -} - -static u64 jit_get_skb_h(struct sk_buff *skb, int offset) -{ - u16 ret; - int err; - - if (offset < 0) - err = call_neg_helper(skb, offset, &ret, 2); - else - err = skb_copy_bits(skb, offset, &ret, 2); - - return (u64)err << 32 | ntohs(ret); -} - -static u64 jit_get_skb_w(struct sk_buff *skb, int offset) -{ - u32 ret; - int err; - - if (offset < 0) - err = call_neg_helper(skb, offset, &ret, 4); - else - err = skb_copy_bits(skb, offset, &ret, 4); - - return (u64)err << 32 | ntohl(ret); -} - /* * Wrappers which handle both OABI and EABI and assures Thumb2 interworking * (where the assembly routines like __aeabi_uidiv could cause problems). */ -static u32 jit_udiv(u32 dividend, u32 divisor) +static u32 jit_udiv32(u32 dividend, u32 divisor) { return dividend / divisor; } -static u32 jit_mod(u32 dividend, u32 divisor) +static u32 jit_mod32(u32 dividend, u32 divisor) { return dividend % divisor; } @@ -158,36 +155,22 @@ static inline void emit(u32 inst, struct jit_ctx *ctx) _emit(ARM_COND_AL, inst, ctx); } -static u16 saved_regs(struct jit_ctx *ctx) +/* + * Checks if immediate value can be converted to imm12(12 bits) value. + */ +static int16_t imm8m(u32 x) { - u16 ret = 0; - - if ((ctx->skf->len > 1) || - (ctx->skf->insns[0].code == (BPF_RET | BPF_A))) - ret |= 1 << r_A; - -#ifdef CONFIG_FRAME_POINTER - ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC); -#else - if (ctx->seen & SEEN_CALL) - ret |= 1 << ARM_LR; -#endif - if (ctx->seen & (SEEN_DATA | SEEN_SKB)) - ret |= 1 << r_skb; - if (ctx->seen & SEEN_DATA) - ret |= (1 << r_skb_data) | (1 << r_skb_hl); - if (ctx->seen & SEEN_X) - ret |= 1 << r_X; - - return ret; -} + u32 rot; -static inline int mem_words_used(struct jit_ctx *ctx) -{ - /* yes, we do waste some stack space IF there are "holes" in the set" */ - return fls(ctx->seen & SEEN_MEM); + for (rot = 0; rot < 16; rot++) + if ((x & ~ror32(0xff, 2 * rot)) == 0) + return rol32(x, 2 * rot) | (rot << 8); + return -1; } +/* + * Initializes the JIT space with undefined instructions. + */ static void jit_fill_hole(void *area, unsigned int size) { u32 *ptr; @@ -196,88 +179,34 @@ static void jit_fill_hole(void *area, unsigned int size) *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); } -static void build_prologue(struct jit_ctx *ctx) -{ - u16 reg_set = saved_regs(ctx); - u16 off; - -#ifdef CONFIG_FRAME_POINTER - emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); - emit(ARM_PUSH(reg_set), ctx); - emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); -#else - if (reg_set) - emit(ARM_PUSH(reg_set), ctx); -#endif - - if (ctx->seen & (SEEN_DATA | SEEN_SKB)) - emit(ARM_MOV_R(r_skb, ARM_R0), ctx); - - if (ctx->seen & SEEN_DATA) { - off = offsetof(struct sk_buff, data); - emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx); - /* headlen = len - data_len */ - off = offsetof(struct sk_buff, len); - emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx); - off = offsetof(struct sk_buff, data_len); - emit(ARM_LDR_I(r_scratch, r_skb, off), ctx); - emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx); - } - - if (ctx->flags & FLAG_NEED_X_RESET) - emit(ARM_MOV_I(r_X, 0), ctx); - - /* do not leak kernel data to userspace */ - if (bpf_needs_clear_a(&ctx->skf->insns[0])) - emit(ARM_MOV_I(r_A, 0), ctx); - - /* stack space for the BPF_MEM words */ - if (ctx->seen & SEEN_MEM) - emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); -} - -static void build_epilogue(struct jit_ctx *ctx) -{ - u16 reg_set = saved_regs(ctx); - - if (ctx->seen & SEEN_MEM) - emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx); - - reg_set &= ~(1 << ARM_LR); +/* Stack must be multiples of 16 Bytes */ +#define STACK_ALIGN(sz) (((sz) + 3) & ~3) -#ifdef CONFIG_FRAME_POINTER - /* the first instruction of the prologue was: mov ip, sp */ - reg_set &= ~(1 << ARM_IP); - reg_set |= (1 << ARM_SP); - emit(ARM_LDM(ARM_SP, reg_set), ctx); -#else - if (reg_set) { - if (ctx->seen & SEEN_CALL) - reg_set |= 1 << ARM_PC; - emit(ARM_POP(reg_set), ctx); - } +/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, + * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, + * BPF_REG_FP and Tail call counts. + */ +#define SCRATCH_SIZE 80 - if (!(ctx->seen & SEEN_CALL)) - emit(ARM_BX(ARM_LR), ctx); -#endif -} +/* total stack size used in JITed code */ +#define _STACK_SIZE \ + (ctx->prog->aux->stack_depth + \ + + SCRATCH_SIZE + \ + + 4 /* extra for skb_copy_bits buffer */) -static int16_t imm8m(u32 x) -{ - u32 rot; +#define STACK_SIZE STACK_ALIGN(_STACK_SIZE) - for (rot = 0; rot < 16; rot++) - if ((x & ~ror32(0xff, 2 * rot)) == 0) - return rol32(x, 2 * rot) | (rot << 8); +/* Get the offset of eBPF REGISTERs stored on scratch space. */ +#define STACK_VAR(off) (STACK_SIZE-off-4) - return -1; -} +/* Offset of skb_copy_bits buffer */ +#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE) #if __LINUX_ARM_ARCH__ < 7 static u16 imm_offset(u32 k, struct jit_ctx *ctx) { - unsigned i = 0, offset; + unsigned int i = 0, offset; u16 imm; /* on the "fake" run we just count them (duplicates included) */ @@ -296,7 +225,7 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx) ctx->imms[i] = k; /* constants go just after the epilogue */ - offset = ctx->offsets[ctx->skf->len]; + offset = ctx->offsets[ctx->prog->len - 1] * 4; offset += ctx->prologue_bytes; offset += ctx->epilogue_bytes; offset += i * 4; @@ -320,10 +249,22 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx) #endif /* __LINUX_ARM_ARCH__ */ +static inline int bpf2a32_offset(int bpf_to, int bpf_from, + const struct jit_ctx *ctx) { + int to, from; + + if (ctx->target == NULL) + return 0; + to = ctx->offsets[bpf_to]; + from = ctx->offsets[bpf_from]; + + return to - from - 1; +} + /* * Move an immediate that's not an imm8m to a core register. */ -static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx) +static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx) { #if __LINUX_ARM_ARCH__ < 7 emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); @@ -334,7 +275,7 @@ static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx) #endif } -static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx) +static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx) { int imm12 = imm8m(val); @@ -344,676 +285,1594 @@ static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx) emit_mov_i_no8m(rd, val, ctx); } -#if __LINUX_ARM_ARCH__ < 6 - -static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) +static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) { - _emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx); - _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); - _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx); - _emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx); - _emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx); - _emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx); - _emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx); - _emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx); + ctx->seen |= SEEN_CALL; +#if __LINUX_ARM_ARCH__ < 5 + emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); + + if (elf_hwcap & HWCAP_THUMB) + emit(ARM_BX(tgt_reg), ctx); + else + emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); +#else + emit(ARM_BLX_R(tgt_reg), ctx); +#endif } -static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) +static inline int epilogue_offset(const struct jit_ctx *ctx) { - _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx); - _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx); - _emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx); + int to, from; + /* No need for 1st dummy run */ + if (ctx->target == NULL) + return 0; + to = ctx->epilogue_offset; + from = ctx->idx; + + return to - from - 2; } -static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx) +static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) { - /* r_dst = (r_src << 8) | (r_src >> 8) */ - emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx); - emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx); + const u8 *tmp = bpf2a32[TMP_REG_1]; + s32 jmp_offset; + + /* checks if divisor is zero or not. If it is, then + * exit directly. + */ + emit(ARM_CMP_I(rn, 0), ctx); + _emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx); + jmp_offset = epilogue_offset(ctx); + _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx); +#if __LINUX_ARM_ARCH__ == 7 + if (elf_hwcap & HWCAP_IDIVA) { + if (op == BPF_DIV) + emit(ARM_UDIV(rd, rm, rn), ctx); + else { + emit(ARM_UDIV(ARM_IP, rm, rn), ctx); + emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx); + } + return; + } +#endif /* - * we need to mask out the bits set in r_dst[23:16] due to - * the first shift instruction. - * - * note that 0x8ff is the encoded immediate 0x00ff0000. + * For BPF_ALU | BPF_DIV | BPF_K instructions + * As ARM_R1 and ARM_R0 contains 1st argument of bpf + * function, we need to save it on caller side to save + * it from getting destroyed within callee. + * After the return from the callee, we restore ARM_R0 + * ARM_R1. */ - emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx); -} + if (rn != ARM_R1) { + emit(ARM_MOV_R(tmp[0], ARM_R1), ctx); + emit(ARM_MOV_R(ARM_R1, rn), ctx); + } + if (rm != ARM_R0) { + emit(ARM_MOV_R(tmp[1], ARM_R0), ctx); + emit(ARM_MOV_R(ARM_R0, rm), ctx); + } -#else /* ARMv6+ */ + /* Call appropriate function */ + ctx->seen |= SEEN_CALL; + emit_mov_i(ARM_IP, op == BPF_DIV ? + (u32)jit_udiv32 : (u32)jit_mod32, ctx); + emit_blx_r(ARM_IP, ctx); -static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) -{ - _emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx); -#ifdef __LITTLE_ENDIAN - _emit(cond, ARM_REV(r_res, r_res), ctx); -#endif + /* Save return value */ + if (rd != ARM_R0) + emit(ARM_MOV_R(rd, ARM_R0), ctx); + + /* Restore ARM_R0 and ARM_R1 */ + if (rn != ARM_R1) + emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx); + if (rm != ARM_R0) + emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); } -static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx) +/* Checks whether BPF register is on scratch stack space or not. */ +static inline bool is_on_stack(u8 bpf_reg) { - _emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx); -#ifdef __LITTLE_ENDIAN - _emit(cond, ARM_REV16(r_res, r_res), ctx); -#endif + static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5, + BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT, + BPF_REG_2, BPF_REG_FP}; + int i, reg_len = sizeof(stack_regs); + + for (i = 0 ; i < reg_len ; i++) { + if (bpf_reg == stack_regs[i]) + return true; + } + return false; } -static inline void emit_swap16(u8 r_dst __maybe_unused, - u8 r_src __maybe_unused, - struct jit_ctx *ctx __maybe_unused) +static inline void emit_a32_mov_i(const u8 dst, const u32 val, + bool dstk, struct jit_ctx *ctx) { -#ifdef __LITTLE_ENDIAN - emit(ARM_REV16(r_dst, r_src), ctx); -#endif + const u8 *tmp = bpf2a32[TMP_REG_1]; + + if (dstk) { + emit_mov_i(tmp[1], val, ctx); + emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx); + } else { + emit_mov_i(dst, val, ctx); + } } -#endif /* __LINUX_ARM_ARCH__ < 6 */ +/* Sign extended move */ +static inline void emit_a32_mov_i64(const bool is64, const u8 dst[], + const u32 val, bool dstk, + struct jit_ctx *ctx) { + u32 hi = 0; + if (is64 && (val & (1<<31))) + hi = (u32)~0; + emit_a32_mov_i(dst_lo, val, dstk, ctx); + emit_a32_mov_i(dst_hi, hi, dstk, ctx); +} -/* Compute the immediate value for a PC-relative branch. */ -static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx) -{ - u32 imm; +static inline void emit_a32_add_r(const u8 dst, const u8 src, + const bool is64, const bool hi, + struct jit_ctx *ctx) { + /* 64 bit : + * adds dst_lo, dst_lo, src_lo + * adc dst_hi, dst_hi, src_hi + * 32 bit : + * add dst_lo, dst_lo, src_lo + */ + if (!hi && is64) + emit(ARM_ADDS_R(dst, dst, src), ctx); + else if (hi && is64) + emit(ARM_ADC_R(dst, dst, src), ctx); + else + emit(ARM_ADD_R(dst, dst, src), ctx); +} - if (ctx->target == NULL) - return 0; - /* - * BPF allows only forward jumps and the offset of the target is - * still the one computed during the first pass. +static inline void emit_a32_sub_r(const u8 dst, const u8 src, + const bool is64, const bool hi, + struct jit_ctx *ctx) { + /* 64 bit : + * subs dst_lo, dst_lo, src_lo + * sbc dst_hi, dst_hi, src_hi + * 32 bit : + * sub dst_lo, dst_lo, src_lo */ - imm = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8); + if (!hi && is64) + emit(ARM_SUBS_R(dst, dst, src), ctx); + else if (hi && is64) + emit(ARM_SBC_R(dst, dst, src), ctx); + else + emit(ARM_SUB_R(dst, dst, src), ctx); +} - return imm >> 2; +static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64, + const bool hi, const u8 op, struct jit_ctx *ctx){ + switch (BPF_OP(op)) { + /* dst = dst + src */ + case BPF_ADD: + emit_a32_add_r(dst, src, is64, hi, ctx); + break; + /* dst = dst - src */ + case BPF_SUB: + emit_a32_sub_r(dst, src, is64, hi, ctx); + break; + /* dst = dst | src */ + case BPF_OR: + emit(ARM_ORR_R(dst, dst, src), ctx); + break; + /* dst = dst & src */ + case BPF_AND: + emit(ARM_AND_R(dst, dst, src), ctx); + break; + /* dst = dst ^ src */ + case BPF_XOR: + emit(ARM_EOR_R(dst, dst, src), ctx); + break; + /* dst = dst * src */ + case BPF_MUL: + emit(ARM_MUL(dst, dst, src), ctx); + break; + /* dst = dst << src */ + case BPF_LSH: + emit(ARM_LSL_R(dst, dst, src), ctx); + break; + /* dst = dst >> src */ + case BPF_RSH: + emit(ARM_LSR_R(dst, dst, src), ctx); + break; + /* dst = dst >> src (signed)*/ + case BPF_ARSH: + emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx); + break; + } } -#define OP_IMM3(op, r1, r2, imm_val, ctx) \ - do { \ - imm12 = imm8m(imm_val); \ - if (imm12 < 0) { \ - emit_mov_i_no8m(r_scratch, imm_val, ctx); \ - emit(op ## _R((r1), (r2), r_scratch), ctx); \ - } else { \ - emit(op ## _I((r1), (r2), imm12), ctx); \ - } \ - } while (0) - -static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx) -{ - if (ctx->ret0_fp_idx >= 0) { - _emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx); - /* NOP to keep the size constant between passes */ - emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx); +/* ALU operation (32 bit) + * dst = dst (op) src + */ +static inline void emit_a32_alu_r(const u8 dst, const u8 src, + bool dstk, bool sstk, + struct jit_ctx *ctx, const bool is64, + const bool hi, const u8 op) { + const u8 *tmp = bpf2a32[TMP_REG_1]; + u8 rn = sstk ? tmp[1] : src; + + if (sstk) + emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx); + + /* ALU operation */ + if (dstk) { + emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); + emit_alu_r(tmp[0], rn, is64, hi, op, ctx); + emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx); } else { - _emit(cond, ARM_MOV_I(ARM_R0, 0), ctx); - _emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx); + emit_alu_r(dst, rn, is64, hi, op, ctx); } } -static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) -{ -#if __LINUX_ARM_ARCH__ < 5 - emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); +/* ALU operation (64 bit) */ +static inline void emit_a32_alu_r64(const bool is64, const u8 dst[], + const u8 src[], bool dstk, + bool sstk, struct jit_ctx *ctx, + const u8 op) { + emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op); + if (is64) + emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op); + else + emit_a32_mov_i(dst_hi, 0, dstk, ctx); +} - if (elf_hwcap & HWCAP_THUMB) - emit(ARM_BX(tgt_reg), ctx); +/* dst = imm (4 bytes)*/ +static inline void emit_a32_mov_r(const u8 dst, const u8 src, + bool dstk, bool sstk, + struct jit_ctx *ctx) { + const u8 *tmp = bpf2a32[TMP_REG_1]; + u8 rt = sstk ? tmp[0] : src; + + if (sstk) + emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx); + if (dstk) + emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx); else - emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); -#else - emit(ARM_BLX_R(tgt_reg), ctx); -#endif + emit(ARM_MOV_R(dst, rt), ctx); } -static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, - int bpf_op) -{ -#if __LINUX_ARM_ARCH__ == 7 - if (elf_hwcap & HWCAP_IDIVA) { - if (bpf_op == BPF_DIV) - emit(ARM_UDIV(rd, rm, rn), ctx); - else { - emit(ARM_UDIV(ARM_R3, rm, rn), ctx); - emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx); - } - return; +/* dst = src */ +static inline void emit_a32_mov_r64(const bool is64, const u8 dst[], + const u8 src[], bool dstk, + bool sstk, struct jit_ctx *ctx) { + emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx); + if (is64) { + /* complete 8 byte move */ + emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx); + } else { + /* Zero out high 4 bytes */ + emit_a32_mov_i(dst_hi, 0, dstk, ctx); } -#endif +} - /* - * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4 - * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into - * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm - * before using it as a source for ARM_R1. - * - * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is - * ARM_R5 (r_X) so there is no particular register overlap - * issues. - */ - if (rn != ARM_R1) - emit(ARM_MOV_R(ARM_R1, rn), ctx); - if (rm != ARM_R0) - emit(ARM_MOV_R(ARM_R0, rm), ctx); +/* Shift operations */ +static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk, + struct jit_ctx *ctx, const u8 op) { + const u8 *tmp = bpf2a32[TMP_REG_1]; + u8 rd = dstk ? tmp[0] : dst; + + if (dstk) + emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + + /* Do shift operation */ + switch (op) { + case BPF_LSH: + emit(ARM_LSL_I(rd, rd, val), ctx); + break; + case BPF_RSH: + emit(ARM_LSR_I(rd, rd, val), ctx); + break; + case BPF_NEG: + emit(ARM_RSB_I(rd, rd, val), ctx); + break; + } + if (dstk) + emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); +} + +/* dst = ~dst (64 bit) */ +static inline void emit_a32_neg64(const u8 dst[], bool dstk, + struct jit_ctx *ctx){ + const u8 *tmp = bpf2a32[TMP_REG_1]; + u8 rd = dstk ? tmp[1] : dst[1]; + u8 rm = dstk ? tmp[0] : dst[0]; + + /* Setup Operand */ + if (dstk) { + emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); + } + + /* Do Negate Operation */ + emit(ARM_RSBS_I(rd, rd, 0), ctx); + emit(ARM_RSC_I(rm, rm, 0), ctx); + + if (dstk) { + emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); + } +} + +/* dst = dst << src */ +static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, + bool sstk, struct jit_ctx *ctx) { + const u8 *tmp = bpf2a32[TMP_REG_1]; + const u8 *tmp2 = bpf2a32[TMP_REG_2]; + + /* Setup Operands */ + u8 rt = sstk ? tmp2[1] : src_lo; + u8 rd = dstk ? tmp[1] : dst_lo; + u8 rm = dstk ? tmp[0] : dst_hi; + + if (sstk) + emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); + if (dstk) { + emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); + } + + /* Do LSH operation */ + emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); + emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); + /* As we are using ARM_LR */ ctx->seen |= SEEN_CALL; - emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod, - ctx); - emit_blx_r(ARM_R3, ctx); + emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx); + + if (dstk) { + emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); + } else { + emit(ARM_MOV_R(rd, ARM_LR), ctx); + emit(ARM_MOV_R(rm, ARM_IP), ctx); + } +} - if (rd != ARM_R0) - emit(ARM_MOV_R(rd, ARM_R0), ctx); +/* dst = dst >> src (signed)*/ +static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, + bool sstk, struct jit_ctx *ctx) { + const u8 *tmp = bpf2a32[TMP_REG_1]; + const u8 *tmp2 = bpf2a32[TMP_REG_2]; + /* Setup Operands */ + u8 rt = sstk ? tmp2[1] : src_lo; + u8 rd = dstk ? tmp[1] : dst_lo; + u8 rm = dstk ? tmp[0] : dst_hi; + + if (sstk) + emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); + if (dstk) { + emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); + } + + /* Do the ARSH operation */ + emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); + emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); + /* As we are using ARM_LR */ + ctx->seen |= SEEN_CALL; + emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); + _emit(ARM_COND_MI, ARM_B(0), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx); + if (dstk) { + emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); + } else { + emit(ARM_MOV_R(rd, ARM_LR), ctx); + emit(ARM_MOV_R(rm, ARM_IP), ctx); + } +} + +/* dst = dst >> src */ +static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk, + bool sstk, struct jit_ctx *ctx) { + const u8 *tmp = bpf2a32[TMP_REG_1]; + const u8 *tmp2 = bpf2a32[TMP_REG_2]; + /* Setup Operands */ + u8 rt = sstk ? tmp2[1] : src_lo; + u8 rd = dstk ? tmp[1] : dst_lo; + u8 rm = dstk ? tmp[0] : dst_hi; + + if (sstk) + emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx); + if (dstk) { + emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); + } + + /* Do LSH operation */ + emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); + emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); + /* As we are using ARM_LR */ + ctx->seen |= SEEN_CALL; + emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); + emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); + emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx); + if (dstk) { + emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx); + } else { + emit(ARM_MOV_R(rd, ARM_LR), ctx); + emit(ARM_MOV_R(rm, ARM_IP), ctx); + } } -static inline void update_on_xread(struct jit_ctx *ctx) +/* dst = dst << val */ +static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk, + const u32 val, struct jit_ctx *ctx){ + const u8 *tmp = bpf2a32[TMP_REG_1]; + const u8 *tmp2 = bpf2a32[TMP_REG_2]; + /* Setup operands */ + u8 rd = dstk ? tmp[1] : dst_lo; + u8 rm = dstk ? tmp[0] : dst_hi; + + if (dstk) { + emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); + } + + /* Do LSH operation */ + if (val < 32) { + emit(ARM_MOV_SI(tmp2[0], rm, SRTYPE_ASL, val), ctx); + emit(ARM_ORR_SI(rm, tmp2[0], rd, SRTYPE_LSR, 32 - val), ctx); + emit(ARM_MOV_SI(rd, rd, SRTYPE_ASL, val), ctx); + } else { + if (val == 32) + emit(ARM_MOV_R(rm, rd), ctx); + else + emit(ARM_MOV_SI(rm, rd, SRTYPE_ASL, val - 32), ctx); + emit(ARM_EOR_R(rd, rd, rd), ctx); + } + + if (dstk) { + emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); + } +} + +/* dst = dst >> val */ +static inline void emit_a32_lsr_i64(const u8 dst[], bool dstk, + const u32 val, struct jit_ctx *ctx) { + const u8 *tmp = bpf2a32[TMP_REG_1]; + const u8 *tmp2 = bpf2a32[TMP_REG_2]; + /* Setup operands */ + u8 rd = dstk ? tmp[1] : dst_lo; + u8 rm = dstk ? tmp[0] : dst_hi; + + if (dstk) { + emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); + } + + /* Do LSR operation */ + if (val < 32) { + emit(ARM_MOV_SI(tmp2[1], rd, SRTYPE_LSR, val), ctx); + emit(ARM_ORR_SI(rd, tmp2[1], rm, SRTYPE_ASL, 32 - val), ctx); + emit(ARM_MOV_SI(rm, rm, SRTYPE_LSR, val), ctx); + } else if (val == 32) { + emit(ARM_MOV_R(rd, rm), ctx); + emit(ARM_MOV_I(rm, 0), ctx); + } else { + emit(ARM_MOV_SI(rd, rm, SRTYPE_LSR, val - 32), ctx); + emit(ARM_MOV_I(rm, 0), ctx); + } + + if (dstk) { + emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx); + emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx); + } +} + +/* dst = dst >> val (signed) */ +static inline void emit_a32_arsh_i64(const u8 dst[], bool dstk, + const u32 val, struct jit_ctx *ctx){ + const u8 *tmp = bpf2a32[TMP_REG_1]; + const u8 *tmp2 = bpf2a32[TMP_REG_2]; + /* Setup op |