summaryrefslogtreecommitdiffstats
path: root/arch/arm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 14:45:08 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-06 14:45:08 -0700
commitaae3dbb4776e7916b6cd442d00159bea27a695c1 (patch)
treed074c5d783a81e7e2e084b1eba77f57459da7e37 /arch/arm
parentec3604c7a5aae8953545b0d05495357009a960e5 (diff)
parent66bed8465a808400eb14562510e26c8818082cb8 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Support ipv6 checksum offload in sunvnet driver, from Shannon Nelson. 2) Move to RB-tree instead of custom AVL code in inetpeer, from Eric Dumazet. 3) Allow generic XDP to work on virtual devices, from John Fastabend. 4) Add bpf device maps and XDP_REDIRECT, which can be used to build arbitrary switching frameworks using XDP. From John Fastabend. 5) Remove UFO offloads from the tree, gave us little other than bugs. 6) Remove the IPSEC flow cache, from Florian Westphal. 7) Support ipv6 route offload in mlxsw driver. 8) Support VF representors in bnxt_en, from Sathya Perla. 9) Add support for forward error correction modes to ethtool, from Vidya Sagar Ravipati. 10) Add time filter for packet scheduler action dumping, from Jamal Hadi Salim. 11) Extend the zerocopy sendmsg() used by virtio and tap to regular sockets via MSG_ZEROCOPY. From Willem de Bruijn. 12) Significantly rework value tracking in the BPF verifier, from Edward Cree. 13) Add new jump instructions to eBPF, from Daniel Borkmann. 14) Rework rtnetlink plumbing so that operations can be run without taking the RTNL semaphore. From Florian Westphal. 15) Support XDP in tap driver, from Jason Wang. 16) Add 32-bit eBPF JIT for ARM, from Shubham Bansal. 17) Add Huawei hinic ethernet driver. 18) Allow to report MD5 keys in TCP inet_diag dumps, from Ivan Delalande. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1780 commits) i40e: point wb_desc at the nvm_wb_desc during i40e_read_nvm_aq i40e: avoid NVM acquire deadlock during NVM update drivers: net: xgene: Remove return statement from void function drivers: net: xgene: Configure tx/rx delay for ACPI drivers: net: xgene: Read tx/rx delay for ACPI rocker: fix kcalloc parameter order rds: Fix non-atomic operation on shared flag variable net: sched: don't use GFP_KERNEL under spin lock vhost_net: correctly check tx avail during rx busy polling net: mdio-mux: add mdio_mux parameter to mdio_mux_init() rxrpc: Make service connection lookup always check for retry net: stmmac: Delete dead code for MDIO registration gianfar: Fix Tx flow control deactivation cxgb4: Ignore MPS_TX_INT_CAUSE[Bubble] for T6 cxgb4: Fix pause frame count in t4_get_port_stats cxgb4: fix memory leak tun: rename generic_xdp to skb_xdp tun: reserve extra headroom only when XDP is set net: dsa: bcm_sf2: Configure IMP port TC2QOS mapping net: dsa: bcm_sf2: Advertise number of egress queues ...
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/boot/dts/rk3228-evb.dts34
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/net/bpf_jit_32.c2448
-rw-r--r--arch/arm/net/bpf_jit_32.h108
5 files changed, 1782 insertions, 811 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 61a0cb15067e..f1b3f1d575d4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -50,7 +50,7 @@ config ARM
select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT)
select HAVE_ARCH_TRACEHOOK
select HAVE_ARM_SMCCC if CPU_V7
- select HAVE_CBPF_JIT
+ select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
select HAVE_CC_STACKPROTECTOR
select HAVE_CONTEXT_TRACKING
select HAVE_C_RECORDMCOUNT
diff --git a/arch/arm/boot/dts/rk3228-evb.dts b/arch/arm/boot/dts/rk3228-evb.dts
index 58834330a5ba..1be9daacc4f9 100644
--- a/arch/arm/boot/dts/rk3228-evb.dts
+++ b/arch/arm/boot/dts/rk3228-evb.dts
@@ -50,6 +50,16 @@
device_type = "memory";
reg = <0x60000000 0x40000000>;
};
+
+ vcc_phy: vcc-phy-regulator {
+ compatible = "regulator-fixed";
+ enable-active-high;
+ regulator-name = "vcc_phy";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
};
&emmc {
@@ -60,6 +70,30 @@
status = "okay";
};
+&gmac {
+ assigned-clocks = <&cru SCLK_MAC_SRC>;
+ assigned-clock-rates = <50000000>;
+ clock_in_out = "output";
+ phy-supply = <&vcc_phy>;
+ phy-mode = "rmii";
+ phy-handle = <&phy>;
+ status = "okay";
+
+ mdio {
+ compatible = "snps,dwmac-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy: phy@0 {
+ compatible = "ethernet-phy-id1234.d400", "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ clocks = <&cru SCLK_MAC_PHY>;
+ resets = <&cru SRST_MACPHY>;
+ phy-is-integrated;
+ };
+ };
+};
+
&tsadc {
status = "okay";
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 4d19c1b4b8e7..94d7e71c69c4 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -270,6 +270,7 @@ CONFIG_ICPLUS_PHY=y
CONFIG_REALTEK_PHY=y
CONFIG_MICREL_PHY=y
CONFIG_FIXED_PHY=y
+CONFIG_ROCKCHIP_PHY=y
CONFIG_USB_PEGASUS=y
CONFIG_USB_RTL8152=m
CONFIG_USB_USBNET=y
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index d5b9fa19b684..c199990e12b6 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1,6 +1,7 @@
/*
- * Just-In-Time compiler for BPF filters on 32bit ARM
+ * Just-In-Time compiler for eBPF filters on 32bit ARM
*
+ * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
* Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
*
* This program is free software; you can redistribute it and/or modify it
@@ -8,6 +9,7 @@
* Free Software Foundation; version 2 of the License.
*/
+#include <linux/bpf.h>
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/errno.h>
@@ -18,54 +20,101 @@
#include <linux/if_vlan.h>
#include <asm/cacheflush.h>
-#include <asm/set_memory.h>
#include <asm/hwcap.h>
#include <asm/opcodes.h>
#include "bpf_jit_32.h"
+int bpf_jit_enable __read_mostly;
+
+#define STACK_OFFSET(k) (k)
+#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */
+#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */
+#define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */
+
+/* Flags used for JIT optimization */
+#define SEEN_CALL (1 << 0)
+
+#define FLAG_IMM_OVERFLOW (1 << 0)
+
/*
- * ABI:
+ * Map eBPF registers to ARM 32bit registers or stack scratch space.
+ *
+ * 1. First argument is passed using the arm 32bit registers and rest of the
+ * arguments are passed on stack scratch space.
+ * 2. First callee-saved arugument is mapped to arm 32 bit registers and rest
+ * arguments are mapped to scratch space on stack.
+ * 3. We need two 64 bit temp registers to do complex operations on eBPF
+ * registers.
+ *
+ * As the eBPF registers are all 64 bit registers and arm has only 32 bit
+ * registers, we have to map each eBPF registers with two arm 32 bit regs or
+ * scratch memory space and we have to build eBPF 64 bit register from those.
*
- * r0 scratch register
- * r4 BPF register A
- * r5 BPF register X
- * r6 pointer to the skb
- * r7 skb->data
- * r8 skb_headlen(skb)
*/
+static const u8 bpf2a32[][2] = {
+ /* return value from in-kernel function, and exit value from eBPF */
+ [BPF_REG_0] = {ARM_R1, ARM_R0},
+ /* arguments from eBPF program to in-kernel function */
+ [BPF_REG_1] = {ARM_R3, ARM_R2},
+ /* Stored on stack scratch space */
+ [BPF_REG_2] = {STACK_OFFSET(0), STACK_OFFSET(4)},
+ [BPF_REG_3] = {STACK_OFFSET(8), STACK_OFFSET(12)},
+ [BPF_REG_4] = {STACK_OFFSET(16), STACK_OFFSET(20)},
+ [BPF_REG_5] = {STACK_OFFSET(24), STACK_OFFSET(28)},
+ /* callee saved registers that in-kernel function will preserve */
+ [BPF_REG_6] = {ARM_R5, ARM_R4},
+ /* Stored on stack scratch space */
+ [BPF_REG_7] = {STACK_OFFSET(32), STACK_OFFSET(36)},
+ [BPF_REG_8] = {STACK_OFFSET(40), STACK_OFFSET(44)},
+ [BPF_REG_9] = {STACK_OFFSET(48), STACK_OFFSET(52)},
+ /* Read only Frame Pointer to access Stack */
+ [BPF_REG_FP] = {STACK_OFFSET(56), STACK_OFFSET(60)},
+ /* Temporary Register for internal BPF JIT, can be used
+ * for constant blindings and others.
+ */
+ [TMP_REG_1] = {ARM_R7, ARM_R6},
+ [TMP_REG_2] = {ARM_R10, ARM_R8},
+ /* Tail call count. Stored on stack scratch space. */
+ [TCALL_CNT] = {STACK_OFFSET(64), STACK_OFFSET(68)},
+ /* temporary register for blinding constants.
+ * Stored on stack scratch space.
+ */
+ [BPF_REG_AX] = {STACK_OFFSET(72), STACK_OFFSET(76)},
+};
-#define r_scratch ARM_R0
-/* r1-r3 are (also) used for the unaligned loads on the non-ARMv7 slowpath */
-#define r_off ARM_R1
-#define r_A ARM_R4
-#define r_X ARM_R5
-#define r_skb ARM_R6
-#define r_skb_data ARM_R7
-#define r_skb_hl ARM_R8
-
-#define SCRATCH_SP_OFFSET 0
-#define SCRATCH_OFF(k) (SCRATCH_SP_OFFSET + 4 * (k))
-
-#define SEEN_MEM ((1 << BPF_MEMWORDS) - 1)
-#define SEEN_MEM_WORD(k) (1 << (k))
-#define SEEN_X (1 << BPF_MEMWORDS)
-#define SEEN_CALL (1 << (BPF_MEMWORDS + 1))
-#define SEEN_SKB (1 << (BPF_MEMWORDS + 2))
-#define SEEN_DATA (1 << (BPF_MEMWORDS + 3))
+#define dst_lo dst[1]
+#define dst_hi dst[0]
+#define src_lo src[1]
+#define src_hi src[0]
-#define FLAG_NEED_X_RESET (1 << 0)
-#define FLAG_IMM_OVERFLOW (1 << 1)
+/*
+ * JIT Context:
+ *
+ * prog : bpf_prog
+ * idx : index of current last JITed instruction.
+ * prologue_bytes : bytes used in prologue.
+ * epilogue_offset : offset of epilogue starting.
+ * seen : bit mask used for JIT optimization.
+ * offsets : array of eBPF instruction offsets in
+ * JITed code.
+ * target : final JITed code.
+ * epilogue_bytes : no of bytes used in epilogue.
+ * imm_count : no of immediate counts used for global
+ * variables.
+ * imms : array of global variable addresses.
+ */
struct jit_ctx {
- const struct bpf_prog *skf;
- unsigned idx;
- unsigned prologue_bytes;
- int ret0_fp_idx;
+ const struct bpf_prog *prog;
+ unsigned int idx;
+ unsigned int prologue_bytes;
+ unsigned int epilogue_offset;
u32 seen;
u32 flags;
u32 *offsets;
u32 *target;
+ u32 stack_size;
#if __LINUX_ARM_ARCH__ < 7
u16 epilogue_bytes;
u16 imm_count;
@@ -73,68 +122,16 @@ struct jit_ctx {
#endif
};
-int bpf_jit_enable __read_mostly;
-
-static inline int call_neg_helper(struct sk_buff *skb, int offset, void *ret,
- unsigned int size)
-{
- void *ptr = bpf_internal_load_pointer_neg_helper(skb, offset, size);
-
- if (!ptr)
- return -EFAULT;
- memcpy(ret, ptr, size);
- return 0;
-}
-
-static u64 jit_get_skb_b(struct sk_buff *skb, int offset)
-{
- u8 ret;
- int err;
-
- if (offset < 0)
- err = call_neg_helper(skb, offset, &ret, 1);
- else
- err = skb_copy_bits(skb, offset, &ret, 1);
-
- return (u64)err << 32 | ret;
-}
-
-static u64 jit_get_skb_h(struct sk_buff *skb, int offset)
-{
- u16 ret;
- int err;
-
- if (offset < 0)
- err = call_neg_helper(skb, offset, &ret, 2);
- else
- err = skb_copy_bits(skb, offset, &ret, 2);
-
- return (u64)err << 32 | ntohs(ret);
-}
-
-static u64 jit_get_skb_w(struct sk_buff *skb, int offset)
-{
- u32 ret;
- int err;
-
- if (offset < 0)
- err = call_neg_helper(skb, offset, &ret, 4);
- else
- err = skb_copy_bits(skb, offset, &ret, 4);
-
- return (u64)err << 32 | ntohl(ret);
-}
-
/*
* Wrappers which handle both OABI and EABI and assures Thumb2 interworking
* (where the assembly routines like __aeabi_uidiv could cause problems).
*/
-static u32 jit_udiv(u32 dividend, u32 divisor)
+static u32 jit_udiv32(u32 dividend, u32 divisor)
{
return dividend / divisor;
}
-static u32 jit_mod(u32 dividend, u32 divisor)
+static u32 jit_mod32(u32 dividend, u32 divisor)
{
return dividend % divisor;
}
@@ -158,36 +155,22 @@ static inline void emit(u32 inst, struct jit_ctx *ctx)
_emit(ARM_COND_AL, inst, ctx);
}
-static u16 saved_regs(struct jit_ctx *ctx)
+/*
+ * Checks if immediate value can be converted to imm12(12 bits) value.
+ */
+static int16_t imm8m(u32 x)
{
- u16 ret = 0;
-
- if ((ctx->skf->len > 1) ||
- (ctx->skf->insns[0].code == (BPF_RET | BPF_A)))
- ret |= 1 << r_A;
-
-#ifdef CONFIG_FRAME_POINTER
- ret |= (1 << ARM_FP) | (1 << ARM_IP) | (1 << ARM_LR) | (1 << ARM_PC);
-#else
- if (ctx->seen & SEEN_CALL)
- ret |= 1 << ARM_LR;
-#endif
- if (ctx->seen & (SEEN_DATA | SEEN_SKB))
- ret |= 1 << r_skb;
- if (ctx->seen & SEEN_DATA)
- ret |= (1 << r_skb_data) | (1 << r_skb_hl);
- if (ctx->seen & SEEN_X)
- ret |= 1 << r_X;
-
- return ret;
-}
+ u32 rot;
-static inline int mem_words_used(struct jit_ctx *ctx)
-{
- /* yes, we do waste some stack space IF there are "holes" in the set" */
- return fls(ctx->seen & SEEN_MEM);
+ for (rot = 0; rot < 16; rot++)
+ if ((x & ~ror32(0xff, 2 * rot)) == 0)
+ return rol32(x, 2 * rot) | (rot << 8);
+ return -1;
}
+/*
+ * Initializes the JIT space with undefined instructions.
+ */
static void jit_fill_hole(void *area, unsigned int size)
{
u32 *ptr;
@@ -196,88 +179,34 @@ static void jit_fill_hole(void *area, unsigned int size)
*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
}
-static void build_prologue(struct jit_ctx *ctx)
-{
- u16 reg_set = saved_regs(ctx);
- u16 off;
-
-#ifdef CONFIG_FRAME_POINTER
- emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx);
- emit(ARM_PUSH(reg_set), ctx);
- emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx);
-#else
- if (reg_set)
- emit(ARM_PUSH(reg_set), ctx);
-#endif
-
- if (ctx->seen & (SEEN_DATA | SEEN_SKB))
- emit(ARM_MOV_R(r_skb, ARM_R0), ctx);
-
- if (ctx->seen & SEEN_DATA) {
- off = offsetof(struct sk_buff, data);
- emit(ARM_LDR_I(r_skb_data, r_skb, off), ctx);
- /* headlen = len - data_len */
- off = offsetof(struct sk_buff, len);
- emit(ARM_LDR_I(r_skb_hl, r_skb, off), ctx);
- off = offsetof(struct sk_buff, data_len);
- emit(ARM_LDR_I(r_scratch, r_skb, off), ctx);
- emit(ARM_SUB_R(r_skb_hl, r_skb_hl, r_scratch), ctx);
- }
-
- if (ctx->flags & FLAG_NEED_X_RESET)
- emit(ARM_MOV_I(r_X, 0), ctx);
-
- /* do not leak kernel data to userspace */
- if (bpf_needs_clear_a(&ctx->skf->insns[0]))
- emit(ARM_MOV_I(r_A, 0), ctx);
-
- /* stack space for the BPF_MEM words */
- if (ctx->seen & SEEN_MEM)
- emit(ARM_SUB_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
-}
-
-static void build_epilogue(struct jit_ctx *ctx)
-{
- u16 reg_set = saved_regs(ctx);
-
- if (ctx->seen & SEEN_MEM)
- emit(ARM_ADD_I(ARM_SP, ARM_SP, mem_words_used(ctx) * 4), ctx);
-
- reg_set &= ~(1 << ARM_LR);
+/* Stack must be multiples of 16 Bytes */
+#define STACK_ALIGN(sz) (((sz) + 3) & ~3)
-#ifdef CONFIG_FRAME_POINTER
- /* the first instruction of the prologue was: mov ip, sp */
- reg_set &= ~(1 << ARM_IP);
- reg_set |= (1 << ARM_SP);
- emit(ARM_LDM(ARM_SP, reg_set), ctx);
-#else
- if (reg_set) {
- if (ctx->seen & SEEN_CALL)
- reg_set |= 1 << ARM_PC;
- emit(ARM_POP(reg_set), ctx);
- }
+/* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4,
+ * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9,
+ * BPF_REG_FP and Tail call counts.
+ */
+#define SCRATCH_SIZE 80
- if (!(ctx->seen & SEEN_CALL))
- emit(ARM_BX(ARM_LR), ctx);
-#endif
-}
+/* total stack size used in JITed code */
+#define _STACK_SIZE \
+ (ctx->prog->aux->stack_depth + \
+ + SCRATCH_SIZE + \
+ + 4 /* extra for skb_copy_bits buffer */)
-static int16_t imm8m(u32 x)
-{
- u32 rot;
+#define STACK_SIZE STACK_ALIGN(_STACK_SIZE)
- for (rot = 0; rot < 16; rot++)
- if ((x & ~ror32(0xff, 2 * rot)) == 0)
- return rol32(x, 2 * rot) | (rot << 8);
+/* Get the offset of eBPF REGISTERs stored on scratch space. */
+#define STACK_VAR(off) (STACK_SIZE-off-4)
- return -1;
-}
+/* Offset of skb_copy_bits buffer */
+#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
#if __LINUX_ARM_ARCH__ < 7
static u16 imm_offset(u32 k, struct jit_ctx *ctx)
{
- unsigned i = 0, offset;
+ unsigned int i = 0, offset;
u16 imm;
/* on the "fake" run we just count them (duplicates included) */
@@ -296,7 +225,7 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
ctx->imms[i] = k;
/* constants go just after the epilogue */
- offset = ctx->offsets[ctx->skf->len];
+ offset = ctx->offsets[ctx->prog->len - 1] * 4;
offset += ctx->prologue_bytes;
offset += ctx->epilogue_bytes;
offset += i * 4;
@@ -320,10 +249,22 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx)
#endif /* __LINUX_ARM_ARCH__ */
+static inline int bpf2a32_offset(int bpf_to, int bpf_from,
+ const struct jit_ctx *ctx) {
+ int to, from;
+
+ if (ctx->target == NULL)
+ return 0;
+ to = ctx->offsets[bpf_to];
+ from = ctx->offsets[bpf_from];
+
+ return to - from - 1;
+}
+
/*
* Move an immediate that's not an imm8m to a core register.
*/
-static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
+static inline void emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx)
{
#if __LINUX_ARM_ARCH__ < 7
emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx);
@@ -334,7 +275,7 @@ static inline void emit_mov_i_no8m(int rd, u32 val, struct jit_ctx *ctx)
#endif
}
-static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
+static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx)
{
int imm12 = imm8m(val);
@@ -344,676 +285,1594 @@ static inline void emit_mov_i(int rd, u32 val, struct jit_ctx *ctx)
emit_mov_i_no8m(rd, val, ctx);
}
-#if __LINUX_ARM_ARCH__ < 6
-
-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
{
- _emit(cond, ARM_LDRB_I(ARM_R3, r_addr, 1), ctx);
- _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
- _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 3), ctx);
- _emit(cond, ARM_LSL_I(ARM_R3, ARM_R3, 16), ctx);
- _emit(cond, ARM_LDRB_I(ARM_R0, r_addr, 2), ctx);
- _emit(cond, ARM_ORR_S(ARM_R3, ARM_R3, ARM_R1, SRTYPE_LSL, 24), ctx);
- _emit(cond, ARM_ORR_R(ARM_R3, ARM_R3, ARM_R2), ctx);
- _emit(cond, ARM_ORR_S(r_res, ARM_R3, ARM_R0, SRTYPE_LSL, 8), ctx);
+ ctx->seen |= SEEN_CALL;
+#if __LINUX_ARM_ARCH__ < 5
+ emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+
+ if (elf_hwcap & HWCAP_THUMB)
+ emit(ARM_BX(tgt_reg), ctx);
+ else
+ emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
+#else
+ emit(ARM_BLX_R(tgt_reg), ctx);
+#endif
}
-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+static inline int epilogue_offset(const struct jit_ctx *ctx)
{
- _emit(cond, ARM_LDRB_I(ARM_R1, r_addr, 0), ctx);
- _emit(cond, ARM_LDRB_I(ARM_R2, r_addr, 1), ctx);
- _emit(cond, ARM_ORR_S(r_res, ARM_R2, ARM_R1, SRTYPE_LSL, 8), ctx);
+ int to, from;
+ /* No need for 1st dummy run */
+ if (ctx->target == NULL)
+ return 0;
+ to = ctx->epilogue_offset;
+ from = ctx->idx;
+
+ return to - from - 2;
}
-static inline void emit_swap16(u8 r_dst, u8 r_src, struct jit_ctx *ctx)
+static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
{
- /* r_dst = (r_src << 8) | (r_src >> 8) */
- emit(ARM_LSL_I(ARM_R1, r_src, 8), ctx);
- emit(ARM_ORR_S(r_dst, ARM_R1, r_src, SRTYPE_LSR, 8), ctx);
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ s32 jmp_offset;
+
+ /* checks if divisor is zero or not. If it is, then
+ * exit directly.
+ */
+ emit(ARM_CMP_I(rn, 0), ctx);
+ _emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx);
+ jmp_offset = epilogue_offset(ctx);
+ _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
+#if __LINUX_ARM_ARCH__ == 7
+ if (elf_hwcap & HWCAP_IDIVA) {
+ if (op == BPF_DIV)
+ emit(ARM_UDIV(rd, rm, rn), ctx);
+ else {
+ emit(ARM_UDIV(ARM_IP, rm, rn), ctx);
+ emit(ARM_MLS(rd, rn, ARM_IP, rm), ctx);
+ }
+ return;
+ }
+#endif
/*
- * we need to mask out the bits set in r_dst[23:16] due to
- * the first shift instruction.
- *
- * note that 0x8ff is the encoded immediate 0x00ff0000.
+ * For BPF_ALU | BPF_DIV | BPF_K instructions
+ * As ARM_R1 and ARM_R0 contains 1st argument of bpf
+ * function, we need to save it on caller side to save
+ * it from getting destroyed within callee.
+ * After the return from the callee, we restore ARM_R0
+ * ARM_R1.
*/
- emit(ARM_BIC_I(r_dst, r_dst, 0x8ff), ctx);
-}
+ if (rn != ARM_R1) {
+ emit(ARM_MOV_R(tmp[0], ARM_R1), ctx);
+ emit(ARM_MOV_R(ARM_R1, rn), ctx);
+ }
+ if (rm != ARM_R0) {
+ emit(ARM_MOV_R(tmp[1], ARM_R0), ctx);
+ emit(ARM_MOV_R(ARM_R0, rm), ctx);
+ }
-#else /* ARMv6+ */
+ /* Call appropriate function */
+ ctx->seen |= SEEN_CALL;
+ emit_mov_i(ARM_IP, op == BPF_DIV ?
+ (u32)jit_udiv32 : (u32)jit_mod32, ctx);
+ emit_blx_r(ARM_IP, ctx);
-static void emit_load_be32(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
-{
- _emit(cond, ARM_LDR_I(r_res, r_addr, 0), ctx);
-#ifdef __LITTLE_ENDIAN
- _emit(cond, ARM_REV(r_res, r_res), ctx);
-#endif
+ /* Save return value */
+ if (rd != ARM_R0)
+ emit(ARM_MOV_R(rd, ARM_R0), ctx);
+
+ /* Restore ARM_R0 and ARM_R1 */
+ if (rn != ARM_R1)
+ emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx);
+ if (rm != ARM_R0)
+ emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx);
}
-static void emit_load_be16(u8 cond, u8 r_res, u8 r_addr, struct jit_ctx *ctx)
+/* Checks whether BPF register is on scratch stack space or not. */
+static inline bool is_on_stack(u8 bpf_reg)
{
- _emit(cond, ARM_LDRH_I(r_res, r_addr, 0), ctx);
-#ifdef __LITTLE_ENDIAN
- _emit(cond, ARM_REV16(r_res, r_res), ctx);
-#endif
+ static u8 stack_regs[] = {BPF_REG_AX, BPF_REG_3, BPF_REG_4, BPF_REG_5,
+ BPF_REG_7, BPF_REG_8, BPF_REG_9, TCALL_CNT,
+ BPF_REG_2, BPF_REG_FP};
+ int i, reg_len = sizeof(stack_regs);
+
+ for (i = 0 ; i < reg_len ; i++) {
+ if (bpf_reg == stack_regs[i])
+ return true;
+ }
+ return false;
}
-static inline void emit_swap16(u8 r_dst __maybe_unused,
- u8 r_src __maybe_unused,
- struct jit_ctx *ctx __maybe_unused)
+static inline void emit_a32_mov_i(const u8 dst, const u32 val,
+ bool dstk, struct jit_ctx *ctx)
{
-#ifdef __LITTLE_ENDIAN
- emit(ARM_REV16(r_dst, r_src), ctx);
-#endif
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+
+ if (dstk) {
+ emit_mov_i(tmp[1], val, ctx);
+ emit(ARM_STR_I(tmp[1], ARM_SP, STACK_VAR(dst)), ctx);
+ } else {
+ emit_mov_i(dst, val, ctx);
+ }
}
-#endif /* __LINUX_ARM_ARCH__ < 6 */
+/* Sign extended move */
+static inline void emit_a32_mov_i64(const bool is64, const u8 dst[],
+ const u32 val, bool dstk,
+ struct jit_ctx *ctx) {
+ u32 hi = 0;
+ if (is64 && (val & (1<<31)))
+ hi = (u32)~0;
+ emit_a32_mov_i(dst_lo, val, dstk, ctx);
+ emit_a32_mov_i(dst_hi, hi, dstk, ctx);
+}
-/* Compute the immediate value for a PC-relative branch. */
-static inline u32 b_imm(unsigned tgt, struct jit_ctx *ctx)
-{
- u32 imm;
+static inline void emit_a32_add_r(const u8 dst, const u8 src,
+ const bool is64, const bool hi,
+ struct jit_ctx *ctx) {
+ /* 64 bit :
+ * adds dst_lo, dst_lo, src_lo
+ * adc dst_hi, dst_hi, src_hi
+ * 32 bit :
+ * add dst_lo, dst_lo, src_lo
+ */
+ if (!hi && is64)
+ emit(ARM_ADDS_R(dst, dst, src), ctx);
+ else if (hi && is64)
+ emit(ARM_ADC_R(dst, dst, src), ctx);
+ else
+ emit(ARM_ADD_R(dst, dst, src), ctx);
+}
- if (ctx->target == NULL)
- return 0;
- /*
- * BPF allows only forward jumps and the offset of the target is
- * still the one computed during the first pass.
+static inline void emit_a32_sub_r(const u8 dst, const u8 src,
+ const bool is64, const bool hi,
+ struct jit_ctx *ctx) {
+ /* 64 bit :
+ * subs dst_lo, dst_lo, src_lo
+ * sbc dst_hi, dst_hi, src_hi
+ * 32 bit :
+ * sub dst_lo, dst_lo, src_lo
*/
- imm = ctx->offsets[tgt] + ctx->prologue_bytes - (ctx->idx * 4 + 8);
+ if (!hi && is64)
+ emit(ARM_SUBS_R(dst, dst, src), ctx);
+ else if (hi && is64)
+ emit(ARM_SBC_R(dst, dst, src), ctx);
+ else
+ emit(ARM_SUB_R(dst, dst, src), ctx);
+}
- return imm >> 2;
+static inline void emit_alu_r(const u8 dst, const u8 src, const bool is64,
+ const bool hi, const u8 op, struct jit_ctx *ctx){
+ switch (BPF_OP(op)) {
+ /* dst = dst + src */
+ case BPF_ADD:
+ emit_a32_add_r(dst, src, is64, hi, ctx);
+ break;
+ /* dst = dst - src */
+ case BPF_SUB:
+ emit_a32_sub_r(dst, src, is64, hi, ctx);
+ break;
+ /* dst = dst | src */
+ case BPF_OR:
+ emit(ARM_ORR_R(dst, dst, src), ctx);
+ break;
+ /* dst = dst & src */
+ case BPF_AND:
+ emit(ARM_AND_R(dst, dst, src), ctx);
+ break;
+ /* dst = dst ^ src */
+ case BPF_XOR:
+ emit(ARM_EOR_R(dst, dst, src), ctx);
+ break;
+ /* dst = dst * src */
+ case BPF_MUL:
+ emit(ARM_MUL(dst, dst, src), ctx);
+ break;
+ /* dst = dst << src */
+ case BPF_LSH:
+ emit(ARM_LSL_R(dst, dst, src), ctx);
+ break;
+ /* dst = dst >> src */
+ case BPF_RSH:
+ emit(ARM_LSR_R(dst, dst, src), ctx);
+ break;
+ /* dst = dst >> src (signed)*/
+ case BPF_ARSH:
+ emit(ARM_MOV_SR(dst, dst, SRTYPE_ASR, src), ctx);
+ break;
+ }
}
-#define OP_IMM3(op, r1, r2, imm_val, ctx) \
- do { \
- imm12 = imm8m(imm_val); \
- if (imm12 < 0) { \
- emit_mov_i_no8m(r_scratch, imm_val, ctx); \
- emit(op ## _R((r1), (r2), r_scratch), ctx); \
- } else { \
- emit(op ## _I((r1), (r2), imm12), ctx); \
- } \
- } while (0)
-
-static inline void emit_err_ret(u8 cond, struct jit_ctx *ctx)
-{
- if (ctx->ret0_fp_idx >= 0) {
- _emit(cond, ARM_B(b_imm(ctx->ret0_fp_idx, ctx)), ctx);
- /* NOP to keep the size constant between passes */
- emit(ARM_MOV_R(ARM_R0, ARM_R0), ctx);
+/* ALU operation (32 bit)
+ * dst = dst (op) src
+ */
+static inline void emit_a32_alu_r(const u8 dst, const u8 src,
+ bool dstk, bool sstk,
+ struct jit_ctx *ctx, const bool is64,
+ const bool hi, const u8 op) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ u8 rn = sstk ? tmp[1] : src;
+
+ if (sstk)
+ emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src)), ctx);
+
+ /* ALU operation */
+ if (dstk) {
+ emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
+ emit_alu_r(tmp[0], rn, is64, hi, op, ctx);
+ emit(ARM_STR_I(tmp[0], ARM_SP, STACK_VAR(dst)), ctx);
} else {
- _emit(cond, ARM_MOV_I(ARM_R0, 0), ctx);
- _emit(cond, ARM_B(b_imm(ctx->skf->len, ctx)), ctx);
+ emit_alu_r(dst, rn, is64, hi, op, ctx);
}
}
-static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx)
-{
-#if __LINUX_ARM_ARCH__ < 5
- emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx);
+/* ALU operation (64 bit) */
+static inline void emit_a32_alu_r64(const bool is64, const u8 dst[],
+ const u8 src[], bool dstk,
+ bool sstk, struct jit_ctx *ctx,
+ const u8 op) {
+ emit_a32_alu_r(dst_lo, src_lo, dstk, sstk, ctx, is64, false, op);
+ if (is64)
+ emit_a32_alu_r(dst_hi, src_hi, dstk, sstk, ctx, is64, true, op);
+ else
+ emit_a32_mov_i(dst_hi, 0, dstk, ctx);
+}
- if (elf_hwcap & HWCAP_THUMB)
- emit(ARM_BX(tgt_reg), ctx);
+/* dst = imm (4 bytes)*/
+static inline void emit_a32_mov_r(const u8 dst, const u8 src,
+ bool dstk, bool sstk,
+ struct jit_ctx *ctx) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ u8 rt = sstk ? tmp[0] : src;
+
+ if (sstk)
+ emit(ARM_LDR_I(tmp[0], ARM_SP, STACK_VAR(src)), ctx);
+ if (dstk)
+ emit(ARM_STR_I(rt, ARM_SP, STACK_VAR(dst)), ctx);
else
- emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx);
-#else
- emit(ARM_BLX_R(tgt_reg), ctx);
-#endif
+ emit(ARM_MOV_R(dst, rt), ctx);
}
-static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx,
- int bpf_op)
-{
-#if __LINUX_ARM_ARCH__ == 7
- if (elf_hwcap & HWCAP_IDIVA) {
- if (bpf_op == BPF_DIV)
- emit(ARM_UDIV(rd, rm, rn), ctx);
- else {
- emit(ARM_UDIV(ARM_R3, rm, rn), ctx);
- emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx);
- }
- return;
+/* dst = src */
+static inline void emit_a32_mov_r64(const bool is64, const u8 dst[],
+ const u8 src[], bool dstk,
+ bool sstk, struct jit_ctx *ctx) {
+ emit_a32_mov_r(dst_lo, src_lo, dstk, sstk, ctx);
+ if (is64) {
+ /* complete 8 byte move */
+ emit_a32_mov_r(dst_hi, src_hi, dstk, sstk, ctx);
+ } else {
+ /* Zero out high 4 bytes */
+ emit_a32_mov_i(dst_hi, 0, dstk, ctx);
}
-#endif
+}
- /*
- * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4
- * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into
- * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm
- * before using it as a source for ARM_R1.
- *
- * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is
- * ARM_R5 (r_X) so there is no particular register overlap
- * issues.
- */
- if (rn != ARM_R1)
- emit(ARM_MOV_R(ARM_R1, rn), ctx);
- if (rm != ARM_R0)
- emit(ARM_MOV_R(ARM_R0, rm), ctx);
+/* Shift operations */
+static inline void emit_a32_alu_i(const u8 dst, const u32 val, bool dstk,
+ struct jit_ctx *ctx, const u8 op) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ u8 rd = dstk ? tmp[0] : dst;
+
+ if (dstk)
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+
+ /* Do shift operation */
+ switch (op) {
+ case BPF_LSH:
+ emit(ARM_LSL_I(rd, rd, val), ctx);
+ break;
+ case BPF_RSH:
+ emit(ARM_LSR_I(rd, rd, val), ctx);
+ break;
+ case BPF_NEG:
+ emit(ARM_RSB_I(rd, rd, val), ctx);
+ break;
+ }
+ if (dstk)
+ emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx);
+}
+
+/* dst = ~dst (64 bit) */
+static inline void emit_a32_neg64(const u8 dst[], bool dstk,
+ struct jit_ctx *ctx){
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ u8 rd = dstk ? tmp[1] : dst[1];
+ u8 rm = dstk ? tmp[0] : dst[0];
+
+ /* Setup Operand */
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Do Negate Operation */
+ emit(ARM_RSBS_I(rd, rd, 0), ctx);
+ emit(ARM_RSC_I(rm, rm, 0), ctx);
+
+ if (dstk) {
+ emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+}
+
+/* dst = dst << src */
+static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk,
+ bool sstk, struct jit_ctx *ctx) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+
+ /* Setup Operands */
+ u8 rt = sstk ? tmp2[1] : src_lo;
+ u8 rd = dstk ? tmp[1] : dst_lo;
+ u8 rm = dstk ? tmp[0] : dst_hi;
+
+ if (sstk)
+ emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Do LSH operation */
+ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx);
+ emit(ARM_RSB_I(tmp2[0], rt, 32), ctx);
+ /* As we are using ARM_LR */
ctx->seen |= SEEN_CALL;
- emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod,
- ctx);
- emit_blx_r(ARM_R3, ctx);
+ emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx);
+ emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx);
+ emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_ASL, rt), ctx);
+
+ if (dstk) {
+ emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ } else {
+ emit(ARM_MOV_R(rd, ARM_LR), ctx);
+ emit(ARM_MOV_R(rm, ARM_IP), ctx);
+ }
+}
- if (rd != ARM_R0)
- emit(ARM_MOV_R(rd, ARM_R0), ctx);
+/* dst = dst >> src (signed)*/
+static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk,
+ bool sstk, struct jit_ctx *ctx) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ /* Setup Operands */
+ u8 rt = sstk ? tmp2[1] : src_lo;
+ u8 rd = dstk ? tmp[1] : dst_lo;
+ u8 rm = dstk ? tmp[0] : dst_hi;
+
+ if (sstk)
+ emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Do the ARSH operation */
+ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+ emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+ /* As we are using ARM_LR */
+ ctx->seen |= SEEN_CALL;
+ emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+ _emit(ARM_COND_MI, ARM_B(0), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASR, tmp2[0]), ctx);
+ emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_ASR, rt), ctx);
+ if (dstk) {
+ emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ } else {
+ emit(ARM_MOV_R(rd, ARM_LR), ctx);
+ emit(ARM_MOV_R(rm, ARM_IP), ctx);
+ }
+}
+
+/* dst = dst >> src */
+static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk,
+ bool sstk, struct jit_ctx *ctx) {
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ /* Setup Operands */
+ u8 rt = sstk ? tmp2[1] : src_lo;
+ u8 rd = dstk ? tmp[1] : dst_lo;
+ u8 rm = dstk ? tmp[0] : dst_hi;
+
+ if (sstk)
+ emit(ARM_LDR_I(rt, ARM_SP, STACK_VAR(src_lo)), ctx);
+ if (dstk) {
+ emit(ARM_LDR_I(rd, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_LDR_I(rm, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ }
+
+ /* Do LSH operation */
+ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx);
+ emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
+ /* As we are using ARM_LR */
+ ctx->seen |= SEEN_CALL;
+ emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx);
+ emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx);
+ emit(ARM_MOV_SR(ARM_IP, rm, SRTYPE_LSR, rt), ctx);
+ if (dstk) {
+ emit(ARM_STR_I(ARM_LR, ARM_SP, STACK_VAR(dst_lo)), ctx);
+ emit(ARM_STR_I(ARM_IP, ARM_SP, STACK_VAR(dst_hi)), ctx);
+ } else {
+ emit(ARM_MOV_R(rd, ARM_LR), ctx);
+ emit(ARM_MOV_R(rm, ARM_IP), ctx);
+ }
}
-static inline void update_on_xread(struct jit_ctx *ctx)
+/* dst = dst << val */
+static inline void emit_a32_lsh_i64(const u8 dst[], bool dstk,
+ const u32 val, struct jit_ctx *ctx){
+ const u8 *tmp = bpf2a32[TMP_REG_1];
+ const u8 *tmp2 = bpf2a32[TMP_REG_2];
+ /* Setup operands */