summaryrefslogtreecommitdiffstats
path: root/arch/sparc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/sparc')
-rw-r--r--arch/sparc/Kconfig3
-rw-r--r--arch/sparc/include/uapi/asm/socket.h6
-rw-r--r--arch/sparc/net/Makefile2
-rw-r--r--arch/sparc/net/bpf_jit_32.h (renamed from arch/sparc/net/bpf_jit.h)2
-rw-r--r--arch/sparc/net/bpf_jit_64.h66
-rw-r--r--arch/sparc/net/bpf_jit_asm_32.S (renamed from arch/sparc/net/bpf_jit_asm.S)9
-rw-r--r--arch/sparc/net/bpf_jit_asm_64.S161
-rw-r--r--arch/sparc/net/bpf_jit_comp_32.c (renamed from arch/sparc/net/bpf_jit_comp.c)51
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c1565
9 files changed, 1804 insertions, 61 deletions
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 3db2543733a5..d8c7736d2d59 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -31,7 +31,8 @@ config SPARC
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_PCI_IOMAP
select HAVE_NMI_WATCHDOG if SPARC64
- select HAVE_CBPF_JIT
+ select HAVE_CBPF_JIT if SPARC32
+ select HAVE_EBPF_JIT if SPARC64
select HAVE_DEBUG_BUGVERBOSE
select GENERIC_SMP_IDLE_THREAD
select GENERIC_CLOCKEVENTS
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index a25dc32f5d6a..3f4ad19d9ec7 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -88,6 +88,12 @@
#define SCM_TIMESTAMPING_OPT_STATS 0x0038
+#define SO_MEMINFO 0x0039
+
+#define SO_INCOMING_NAPI_ID 0x003a
+
+#define SO_COOKIE 0x003b
+
/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
diff --git a/arch/sparc/net/Makefile b/arch/sparc/net/Makefile
index 1306a58ac541..76fa8e95b721 100644
--- a/arch/sparc/net/Makefile
+++ b/arch/sparc/net/Makefile
@@ -1,4 +1,4 @@
#
# Arch-specific network modules
#
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_asm_$(BITS).o bpf_jit_comp_$(BITS).o
diff --git a/arch/sparc/net/bpf_jit.h b/arch/sparc/net/bpf_jit_32.h
index 33d6b375ff12..d5c069bff5f9 100644
--- a/arch/sparc/net/bpf_jit.h
+++ b/arch/sparc/net/bpf_jit_32.h
@@ -39,7 +39,7 @@
#define r_TMP2 G2
#define r_OFF G3
-/* assembly code in arch/sparc/net/bpf_jit_asm.S */
+/* assembly code in arch/sparc/net/bpf_jit_asm_32.S */
extern u32 bpf_jit_load_word[];
extern u32 bpf_jit_load_half[];
extern u32 bpf_jit_load_byte[];
diff --git a/arch/sparc/net/bpf_jit_64.h b/arch/sparc/net/bpf_jit_64.h
new file mode 100644
index 000000000000..74abd45796ea
--- /dev/null
+++ b/arch/sparc/net/bpf_jit_64.h
@@ -0,0 +1,66 @@
+#ifndef _BPF_JIT_H
+#define _BPF_JIT_H
+
+#ifndef __ASSEMBLER__
+#define G0 0x00
+#define G1 0x01
+#define G2 0x02
+#define G3 0x03
+#define G6 0x06
+#define G7 0x07
+#define O0 0x08
+#define O1 0x09
+#define O2 0x0a
+#define O3 0x0b
+#define O4 0x0c
+#define O5 0x0d
+#define SP 0x0e
+#define O7 0x0f
+#define L0 0x10
+#define L1 0x11
+#define L2 0x12
+#define L3 0x13
+#define L4 0x14
+#define L5 0x15
+#define L6 0x16
+#define L7 0x17
+#define I0 0x18
+#define I1 0x19
+#define I2 0x1a
+#define I3 0x1b
+#define I4 0x1c
+#define I5 0x1d
+#define FP 0x1e
+#define I7 0x1f
+
+#define r_SKB L0
+#define r_HEADLEN L4
+#define r_SKB_DATA L5
+#define r_TMP G1
+#define r_TMP2 G3
+
+/* assembly code in arch/sparc/net/bpf_jit_asm_64.S */
+extern u32 bpf_jit_load_word[];
+extern u32 bpf_jit_load_half[];
+extern u32 bpf_jit_load_byte[];
+extern u32 bpf_jit_load_byte_msh[];
+extern u32 bpf_jit_load_word_positive_offset[];
+extern u32 bpf_jit_load_half_positive_offset[];
+extern u32 bpf_jit_load_byte_positive_offset[];
+extern u32 bpf_jit_load_byte_msh_positive_offset[];
+extern u32 bpf_jit_load_word_negative_offset[];
+extern u32 bpf_jit_load_half_negative_offset[];
+extern u32 bpf_jit_load_byte_negative_offset[];
+extern u32 bpf_jit_load_byte_msh_negative_offset[];
+
+#else
+#define r_RESULT %o0
+#define r_SKB %o0
+#define r_OFF %o1
+#define r_HEADLEN %l4
+#define r_SKB_DATA %l5
+#define r_TMP %g1
+#define r_TMP2 %g3
+#endif
+
+#endif /* _BPF_JIT_H */
diff --git a/arch/sparc/net/bpf_jit_asm.S b/arch/sparc/net/bpf_jit_asm_32.S
index 8c83f4b8eb15..dcc402f5738a 100644
--- a/arch/sparc/net/bpf_jit_asm.S
+++ b/arch/sparc/net/bpf_jit_asm_32.S
@@ -1,18 +1,11 @@
#include <asm/ptrace.h>
-#include "bpf_jit.h"
+#include "bpf_jit_32.h"
-#ifdef CONFIG_SPARC64
-#define SAVE_SZ 176
-#define SCRATCH_OFF STACK_BIAS + 128
-#define BE_PTR(label) be,pn %xcc, label
-#define SIGN_EXTEND(reg) sra reg, 0, reg
-#else
#define SAVE_SZ 96
#define SCRATCH_OFF 72
#define BE_PTR(label) be label
#define SIGN_EXTEND(reg)
-#endif
#define SKF_MAX_NEG_OFF (-0x200000) /* SKF_LL_OFF from filter.h */
diff --git a/arch/sparc/net/bpf_jit_asm_64.S b/arch/sparc/net/bpf_jit_asm_64.S
new file mode 100644
index 000000000000..3b3f14655f81
--- /dev/null
+++ b/arch/sparc/net/bpf_jit_asm_64.S
@@ -0,0 +1,161 @@
+#include <asm/ptrace.h>
+
+#include "bpf_jit_64.h"
+
+#define SAVE_SZ 176
+#define SCRATCH_OFF STACK_BIAS + 128
+#define BE_PTR(label) be,pn %xcc, label
+#define SIGN_EXTEND(reg) sra reg, 0, reg
+
+#define SKF_MAX_NEG_OFF (-0x200000) /* SKF_LL_OFF from filter.h */
+
+ .text
+ .globl bpf_jit_load_word
+bpf_jit_load_word:
+ cmp r_OFF, 0
+ bl bpf_slow_path_word_neg
+ nop
+ .globl bpf_jit_load_word_positive_offset
+bpf_jit_load_word_positive_offset:
+ sub r_HEADLEN, r_OFF, r_TMP
+ cmp r_TMP, 3
+ ble bpf_slow_path_word
+ add r_SKB_DATA, r_OFF, r_TMP
+ andcc r_TMP, 3, %g0
+ bne load_word_unaligned
+ nop
+ retl
+ ld [r_TMP], r_RESULT
+load_word_unaligned:
+ ldub [r_TMP + 0x0], r_OFF
+ ldub [r_TMP + 0x1], r_TMP2
+ sll r_OFF, 8, r_OFF
+ or r_OFF, r_TMP2, r_OFF
+ ldub [r_TMP + 0x2], r_TMP2
+ sll r_OFF, 8, r_OFF
+ or r_OFF, r_TMP2, r_OFF
+ ldub [r_TMP + 0x3], r_TMP2
+ sll r_OFF, 8, r_OFF
+ retl
+ or r_OFF, r_TMP2, r_RESULT
+
+ .globl bpf_jit_load_half
+bpf_jit_load_half:
+ cmp r_OFF, 0
+ bl bpf_slow_path_half_neg
+ nop
+ .globl bpf_jit_load_half_positive_offset
+bpf_jit_load_half_positive_offset:
+ sub r_HEADLEN, r_OFF, r_TMP
+ cmp r_TMP, 1
+ ble bpf_slow_path_half
+ add r_SKB_DATA, r_OFF, r_TMP
+ andcc r_TMP, 1, %g0
+ bne load_half_unaligned
+ nop
+ retl
+ lduh [r_TMP], r_RESULT
+load_half_unaligned:
+ ldub [r_TMP + 0x0], r_OFF
+ ldub [r_TMP + 0x1], r_TMP2
+ sll r_OFF, 8, r_OFF
+ retl
+ or r_OFF, r_TMP2, r_RESULT
+
+ .globl bpf_jit_load_byte
+bpf_jit_load_byte:
+ cmp r_OFF, 0
+ bl bpf_slow_path_byte_neg
+ nop
+ .globl bpf_jit_load_byte_positive_offset
+bpf_jit_load_byte_positive_offset:
+ cmp r_OFF, r_HEADLEN
+ bge bpf_slow_path_byte
+ nop
+ retl
+ ldub [r_SKB_DATA + r_OFF], r_RESULT
+
+#define bpf_slow_path_common(LEN) \
+ save %sp, -SAVE_SZ, %sp; \
+ mov %i0, %o0; \
+ mov %i1, %o1; \
+ add %fp, SCRATCH_OFF, %o2; \
+ call skb_copy_bits; \
+ mov (LEN), %o3; \
+ cmp %o0, 0; \
+ restore;
+
+bpf_slow_path_word:
+ bpf_slow_path_common(4)
+ bl bpf_error
+ ld [%sp + SCRATCH_OFF], r_RESULT
+ retl
+ nop
+bpf_slow_path_half:
+ bpf_slow_path_common(2)
+ bl bpf_error
+ lduh [%sp + SCRATCH_OFF], r_RESULT
+ retl
+ nop
+bpf_slow_path_byte:
+ bpf_slow_path_common(1)
+ bl bpf_error
+ ldub [%sp + SCRATCH_OFF], r_RESULT
+ retl
+ nop
+
+#define bpf_negative_common(LEN) \
+ save %sp, -SAVE_SZ, %sp; \
+ mov %i0, %o0; \
+ mov %i1, %o1; \
+ SIGN_EXTEND(%o1); \
+ call bpf_internal_load_pointer_neg_helper; \
+ mov (LEN), %o2; \
+ mov %o0, r_TMP; \
+ cmp %o0, 0; \
+ BE_PTR(bpf_error); \
+ restore;
+
+bpf_slow_path_word_neg:
+ sethi %hi(SKF_MAX_NEG_OFF), r_TMP
+ cmp r_OFF, r_TMP
+ bl bpf_error
+ nop
+ .globl bpf_jit_load_word_negative_offset
+bpf_jit_load_word_negative_offset:
+ bpf_negative_common(4)
+ andcc r_TMP, 3, %g0
+ bne load_word_unaligned
+ nop
+ retl
+ ld [r_TMP], r_RESULT
+
+bpf_slow_path_half_neg:
+ sethi %hi(SKF_MAX_NEG_OFF), r_TMP
+ cmp r_OFF, r_TMP
+ bl bpf_error
+ nop
+ .globl bpf_jit_load_half_negative_offset
+bpf_jit_load_half_negative_offset:
+ bpf_negative_common(2)
+ andcc r_TMP, 1, %g0
+ bne load_half_unaligned
+ nop
+ retl
+ lduh [r_TMP], r_RESULT
+
+bpf_slow_path_byte_neg:
+ sethi %hi(SKF_MAX_NEG_OFF), r_TMP
+ cmp r_OFF, r_TMP
+ bl bpf_error
+ nop
+ .globl bpf_jit_load_byte_negative_offset
+bpf_jit_load_byte_negative_offset:
+ bpf_negative_common(1)
+ retl
+ ldub [r_TMP], r_RESULT
+
+bpf_error:
+ /* Make the JIT program itself return zero. */
+ ret
+ restore %g0, %g0, %o0
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp_32.c
index a6d9204a6a0b..d193748548e2 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp_32.c
@@ -8,7 +8,7 @@
#include <asm/cacheflush.h>
#include <asm/ptrace.h>
-#include "bpf_jit.h"
+#include "bpf_jit_32.h"
int bpf_jit_enable __read_mostly;
@@ -17,24 +17,6 @@ static inline bool is_simm13(unsigned int value)
return value + 0x1000 < 0x2000;
}
-static void bpf_flush_icache(void *start_, void *end_)
-{
-#ifdef CONFIG_SPARC64
- /* Cheetah's I-cache is fully coherent. */
- if (tlb_type == spitfire) {
- unsigned long start = (unsigned long) start_;
- unsigned long end = (unsigned long) end_;
-
- start &= ~7UL;
- end = (end + 7UL) & ~7UL;
- while (start < end) {
- flushi(start);
- start += 32;
- }
- }
-#endif
-}
-
#define SEEN_DATAREF 1 /* might call external helpers */
#define SEEN_XREG 2 /* ebx is used */
#define SEEN_MEM 4 /* use mem[] for temporary storage */
@@ -82,11 +64,7 @@ static void bpf_flush_icache(void *start_, void *end_)
#define BE (F2(0, 2) | CONDE)
#define BNE (F2(0, 2) | CONDNE)
-#ifdef CONFIG_SPARC64
-#define BE_PTR (F2(0, 1) | CONDE | (2 << 20))
-#else
#define BE_PTR BE
-#endif
#define SETHI(K, REG) \
(F2(0, 0x4) | RD(REG) | (((K) >> 10) & 0x3fffff))
@@ -116,13 +94,8 @@ static void bpf_flush_icache(void *start_, void *end_)
#define LD64 F3(3, 0x0b)
#define ST32 F3(3, 0x04)
-#ifdef CONFIG_SPARC64
-#define LDPTR LD64
-#define BASE_STACKFRAME 176
-#else
#define LDPTR LD32
#define BASE_STACKFRAME 96
-#endif
#define LD32I (LD32 | IMMED)
#define LD8I (LD8 | IMMED)
@@ -234,11 +207,7 @@ do { BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u8)); \
__emit_load8(BASE, STRUCT, FIELD, DEST); \
} while (0)
-#ifdef CONFIG_SPARC64
-#define BIAS (STACK_BIAS - 4)
-#else
#define BIAS (-4)
-#endif
#define emit_ldmem(OFF, DEST) \
do { *prog++ = LD32I | RS1(SP) | S13(BIAS - (OFF)) | RD(DEST); \
@@ -249,13 +218,8 @@ do { *prog++ = ST32I | RS1(SP) | S13(BIAS - (OFF)) | RD(SRC); \
} while (0)
#ifdef CONFIG_SMP
-#ifdef CONFIG_SPARC64
-#define emit_load_cpu(REG) \
- emit_load16(G6, struct thread_info, cpu, REG)
-#else
#define emit_load_cpu(REG) \
emit_load32(G6, struct thread_info, cpu, REG)
-#endif
#else
#define emit_load_cpu(REG) emit_clear(REG)
#endif
@@ -486,7 +450,6 @@ void bpf_jit_compile(struct bpf_prog *fp)
if (K == 1)
break;
emit_write_y(G0);
-#ifdef CONFIG_SPARC32
/* The Sparc v8 architecture requires
* three instructions between a %y
* register write and the first use.
@@ -494,31 +457,21 @@ void bpf_jit_compile(struct bpf_prog *fp)
emit_nop();
emit_nop();
emit_nop();
-#endif
emit_alu_K(DIV, K);
break;
case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
emit_cmpi(r_X, 0);
if (pc_ret0 > 0) {
t_offset = addrs[pc_ret0 - 1];
-#ifdef CONFIG_SPARC32
emit_branch(BE, t_offset + 20);
-#else
- emit_branch(BE, t_offset + 8);
-#endif
emit_nop(); /* delay slot */
} else {
emit_branch_off(BNE, 16);
emit_nop();
-#ifdef CONFIG_SPARC32
emit_jump(cleanup_addr + 20);
-#else
- emit_jump(cleanup_addr + 8);
-#endif
emit_clear(r_A);
}
emit_write_y(G0);
-#ifdef CONFIG_SPARC32
/* The Sparc v8 architecture requires
* three instructions between a %y
* register write and the first use.
@@ -526,7 +479,6 @@ void bpf_jit_compile(struct bpf_prog *fp)
emit_nop();
emit_nop();
emit_nop();
-#endif
emit_alu_X(DIV);
break;
case BPF_ALU | BPF_NEG:
@@ -797,7 +749,6 @@ cond_branch: f_offset = addrs[i + filter[i].jf];
bpf_jit_dump(flen, proglen, pass + 1, image);
if (image) {
- bpf_flush_icache(image, image + proglen);
fp->bpf_func = (void *)image;
fp->jited = 1;
}
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
new file mode 100644
index 000000000000..ec7d10da94f0
--- /dev/null
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -0,0 +1,1565 @@
+#include <linux/moduleloader.h>
+#include <linux/workqueue.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/cache.h>
+#include <linux/if_vlan.h>
+
+#include <asm/cacheflush.h>
+#include <asm/ptrace.h>
+
+#include "bpf_jit_64.h"
+
+int bpf_jit_enable __read_mostly;
+
+static inline bool is_simm13(unsigned int value)
+{
+ return value + 0x1000 < 0x2000;
+}
+
+static inline bool is_simm10(unsigned int value)
+{
+ return value + 0x200 < 0x400;
+}
+
+static inline bool is_simm5(unsigned int value)
+{
+ return value + 0x10 < 0x20;
+}
+
+static inline bool is_sethi(unsigned int value)
+{
+ return (value & ~0x3fffff) == 0;
+}
+
+static void bpf_flush_icache(void *start_, void *end_)
+{
+ /* Cheetah's I-cache is fully coherent. */
+ if (tlb_type == spitfire) {
+ unsigned long start = (unsigned long) start_;
+ unsigned long end = (unsigned long) end_;
+
+ start &= ~7UL;
+ end = (end + 7UL) & ~7UL;
+ while (start < end) {
+ flushi(start);
+ start += 32;
+ }
+ }
+}
+
+#define SEEN_DATAREF 1 /* might call external helpers */
+#define SEEN_XREG 2 /* ebx is used */
+#define SEEN_MEM 4 /* use mem[] for temporary storage */
+
+#define S13(X) ((X) & 0x1fff)
+#define S5(X) ((X) & 0x1f)
+#define IMMED 0x00002000
+#define RD(X) ((X) << 25)
+#define RS1(X) ((X) << 14)
+#define RS2(X) ((X))
+#define OP(X) ((X) << 30)
+#define OP2(X) ((X) << 22)
+#define OP3(X) ((X) << 19)
+#define COND(X) (((X) & 0xf) << 25)
+#define CBCOND(X) (((X) & 0x1f) << 25)
+#define F1(X) OP(X)
+#define F2(X, Y) (OP(X) | OP2(Y))
+#define F3(X, Y) (OP(X) | OP3(Y))
+#define ASI(X) (((X) & 0xff) << 5)
+
+#define CONDN COND(0x0)
+#define CONDE COND(0x1)
+#define CONDLE COND(0x2)
+#define CONDL COND(0x3)
+#define CONDLEU COND(0x4)
+#define CONDCS COND(0x5)
+#define CONDNEG COND(0x6)
+#define CONDVC COND(0x7)
+#define CONDA COND(0x8)
+#define CONDNE COND(0x9)
+#define CONDG COND(0xa)
+#define CONDGE COND(0xb)
+#define CONDGU COND(0xc)
+#define CONDCC COND(0xd)
+#define CONDPOS COND(0xe)
+#define CONDVS COND(0xf)
+
+#define CONDGEU CONDCC
+#define CONDLU CONDCS
+
+#define WDISP22(X) (((X) >> 2) & 0x3fffff)
+#define WDISP19(X) (((X) >> 2) & 0x7ffff)
+
+/* The 10-bit branch displacement for CBCOND is split into two fields */
+static u32 WDISP10(u32 off)
+{
+ u32 ret = ((off >> 2) & 0xff) << 5;
+
+ ret |= ((off >> (2 + 8)) & 0x03) << 19;
+
+ return ret;
+}
+
+#define CBCONDE CBCOND(0x09)
+#define CBCONDLE CBCOND(0x0a)
+#define CBCONDL CBCOND(0x0b)
+#define CBCONDLEU CBCOND(0x0c)
+#define CBCONDCS CBCOND(0x0d)
+#define CBCONDN CBCOND(0x0e)
+#define CBCONDVS CBCOND(0x0f)
+#define CBCONDNE CBCOND(0x19)
+#define CBCONDG CBCOND(0x1a)
+#define CBCONDGE CBCOND(0x1b)
+#define CBCONDGU CBCOND(0x1c)
+#define CBCONDCC CBCOND(0x1d)
+#define CBCONDPOS CBCOND(0x1e)
+#define CBCONDVC CBCOND(0x1f)
+
+#define CBCONDGEU CBCONDCC
+#define CBCONDLU CBCONDCS
+
+#define ANNUL (1 << 29)
+#define XCC (1 << 21)
+
+#define BRANCH (F2(0, 1) | XCC)
+#define CBCOND_OP (F2(0, 3) | XCC)
+
+#define BA (BRANCH | CONDA)
+#define BG (BRANCH | CONDG)
+#define BGU (BRANCH | CONDGU)
+#define BLEU (BRANCH | CONDLEU)
+#define BGE (BRANCH | CONDGE)
+#define BGEU (BRANCH | CONDGEU)
+#define BLU (BRANCH | CONDLU)
+#define BE (BRANCH | CONDE)
+#define BNE (BRANCH | CONDNE)
+
+#define SETHI(K, REG) \
+ (F2(0, 0x4) | RD(REG) | (((K) >> 10) & 0x3fffff))
+#define OR_LO(K, REG) \
+ (F3(2, 0x02) | IMMED | RS1(REG) | ((K) & 0x3ff) | RD(REG))
+
+#define ADD F3(2, 0x00)
+#define AND F3(2, 0x01)
+#define ANDCC F3(2, 0x11)
+#define OR F3(2, 0x02)
+#define XOR F3(2, 0x03)
+#define SUB F3(2, 0x04)
+#define SUBCC F3(2, 0x14)
+#define MUL F3(2, 0x0a)
+#define MULX F3(2, 0x09)
+#define UDIVX F3(2, 0x0d)
+#define DIV F3(2, 0x0e)
+#define SLL F3(2, 0x25)
+#define SLLX (F3(2, 0x25)|(1<<12))
+#define SRA F3(2, 0x27)
+#define SRAX (F3(2, 0x27)|(1<<12))
+#define SRL F3(2, 0x26)
+#define SRLX (F3(2, 0x26)|(1<<12))
+#define JMPL F3(2, 0x38)
+#define SAVE F3(2, 0x3c)
+#define RESTORE F3(2, 0x3d)
+#define CALL F1(1)
+#define BR F2(0, 0x01)
+#define RD_Y F3(2, 0x28)
+#define WR_Y F3(2, 0x30)
+
+#define LD32 F3(3, 0x00)
+#define LD8 F3(3, 0x01)
+#define LD16 F3(3, 0x02)
+#define LD64 F3(3, 0x0b)
+#define LD64A F3(3, 0x1b)
+#define ST8 F3(3, 0x05)
+#define ST16 F3(3, 0x06)
+#define ST32 F3(3, 0x04)
+#define ST64 F3(3, 0x0e)
+
+#define CAS F3(3, 0x3c)
+#define CASX F3(3, 0x3e)
+
+#define LDPTR LD64
+#define BASE_STACKFRAME 176
+
+#define LD32I (LD32 | IMMED)
+#define LD8I (LD8 | IMMED)
+#define LD16I (LD16 | IMMED)
+#define LD64I (LD64 | IMMED)
+#define LDPTRI (LDPTR | IMMED)
+#define ST32I (ST32 | IMMED)
+
+struct jit_ctx {
+ struct bpf_prog *prog;
+ unsigned int *offset;
+ int idx;
+ int epilogue_offset;
+ bool tmp_1_used;
+ bool tmp_2_used;
+ bool tmp_3_used;
+ bool saw_ld_abs_ind;
+ bool saw_frame_pointer;
+ bool saw_call;
+ bool saw_tail_call;
+ u32 *image;
+};
+
+#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
+#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
+#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 2)
+#define SKB_DATA_REG (MAX_BPF_JIT_REG + 3)
+#define TMP_REG_3 (MAX_BPF_JIT_REG + 4)
+
+/* Map BPF registers to SPARC registers */
+static const int bpf2sparc[] = {
+ /* return value from in-kernel function, and exit value from eBPF */
+ [BPF_REG_0] = O5,
+
+ /* arguments from eBPF program to in-kernel function */
+ [BPF_REG_1] = O0,
+ [BPF_REG_2] = O1,
+ [BPF_REG_3] = O2,
+ [BPF_REG_4] = O3,
+ [BPF_REG_5] = O4,
+
+ /* callee saved registers that in-kernel function will preserve */
+ [BPF_REG_6] = L0,
+ [BPF_REG_7] = L1,
+ [BPF_REG_8] = L2,
+ [BPF_REG_9] = L3,
+
+ /* read-only frame pointer to access stack */
+ [BPF_REG_FP] = L6,
+
+ [BPF_REG_AX] = G7,
+
+ /* temporary register for internal BPF JIT */
+ [TMP_REG_1] = G1,
+ [TMP_REG_2] = G2,
+ [TMP_REG_3] = G3,
+
+ [SKB_HLEN_REG] = L4,
+ [SKB_DATA_REG] = L5,
+};
+
+static void emit(const u32 insn, struct jit_ctx *ctx)
+{
+ if (ctx->image != NULL)
+ ctx->image[ctx->idx] = insn;
+
+ ctx->idx++;
+}
+
+static void emit_call(u32 *func, struct jit_ctx *ctx)
+{
+ if (ctx->image != NULL) {
+ void *here = &ctx->image[ctx->idx];
+ unsigned int off;
+
+ off = (void *)func - here;
+ ctx->image[ctx->idx] = CALL | ((off >> 2) & 0x3fffffff);
+ }
+ ctx->idx++;
+}
+
+static void emit_nop(struct jit_ctx *ctx)
+{
+ emit(SETHI(0, G0), ctx);
+}
+
+static void emit_reg_move(u32 from, u32 to, struct jit_ctx *ctx)
+{
+ emit(OR | RS1(G0) | RS2(from) | RD(to), ctx);
+}
+
+/* Emit 32-bit constant, zero extended. */
+static void emit_set_const(s32 K, u32 reg, struct jit_ctx *ctx)
+{
+ emit(SETHI(K, reg), ctx);
+ emit(OR_LO(K, reg), ctx);
+}
+
+/* Emit 32-bit constant, sign extended. */
+static void emit_set_const_sext(s32 K, u32 reg, struct jit_ctx *ctx)
+{
+ if (K >= 0) {
+ emit(SETHI(K, reg), ctx);
+ emit(OR_LO(K, reg), ctx);
+ } else {
+ u32 hbits = ~(u32) K;
+ u32 lbits = -0x400 | (u32) K;
+
+ emit(SETHI(hbits, reg), ctx);
+ emit(XOR | IMMED | RS1(reg) | S13(lbits) | RD(reg), ctx);
+ }
+}
+
+static void emit_alu(u32 opcode, u32 src, u32 dst, struct jit_ctx *ctx)
+{
+ emit(opcode | RS1(dst) | RS2(src) | RD(dst), ctx);
+}
+
+static void emit_alu3(u32 opcode, u32 a, u32 b, u32 c, struct jit_ctx *ctx)
+{
+ emit(opcode | RS1(a) | RS2(b) | RD(c), ctx);
+}
+
+static void emit_alu_K(unsigned int opcode, unsigned int dst, unsigned int imm,
+ struct jit_ctx *ctx)
+{
+ bool small_immed = is_simm13(imm);
+ unsigned int insn = opcode;
+
+ insn |= RS1(dst) | RD(dst);
+ if (small_immed) {
+ emit(insn | IMMED | S13(imm), ctx);
+ } else {
+ unsigned int tmp = bpf2sparc[TMP_REG_1];
+
+ ctx->tmp_1_used = true;
+
+ emit_set_const_sext(imm, tmp, ctx);
+ emit(insn | RS2(tmp), ctx);
+ }
+}
+
+static void emit_alu3_K(unsigned int opcode, unsigned int src, unsigned int imm,
+ unsigned int dst, struct jit_ctx *ctx)
+{
+ bool small_immed = is_simm13(imm);
+ unsigned int insn = opcode;
+
+ insn |= RS1(src) | RD(dst);
+ if (small_immed) {
+ emit(insn | IMMED | S13(imm), ctx);
+ } else {
+ unsigned int tmp = bpf2sparc[TMP_REG_1];
+
+ ctx->tmp_1_used = true;
+
+ emit_set_const_sext(imm, tmp, ctx);
+ emit(insn | RS2(tmp), ctx);
+ }
+}
+
+static void emit_loadimm32(s32 K, unsigned int dest, struct jit_ctx *ctx)
+{
+ if (K >= 0 && is_simm13(K)) {
+ /* or %g0, K, DEST */
+ emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
+ } else {
+ emit_set_const(K, dest, ctx);
+ }
+}
+
+static void emit_loadimm(s32 K, unsigned int dest, struct jit_ctx *ctx)
+{
+ if (is_simm13(K)) {
+ /* or %g0, K, DEST */
+ emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
+ } else {
+ emit_set_const(K, dest, ctx);
+ }
+}
+
+static void emit_loadimm_sext(s32 K, unsigned int dest, struct jit_ctx *ctx)
+{
+ if (is_simm13(K)) {
+ /* or %g0, K, DEST */
+ emit(OR | IMMED | RS1(G0) | S13(K) | RD(dest), ctx);
+ } else {
+ emit_set_const_sext(K, dest, ctx);
+ }
+}
+
+static void analyze_64bit_constant(u32 high_bits, u32 low_bits,
+ int *hbsp, int *lbsp, int *abbasp)
+{
+ int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
+ int i;
+
+ lowest_bit_set = highest_bit_set = -1;
+ i = 0;
+ do {
+ if ((lowest_bit_set == -1) && ((low_bits >> i) & 1))
+ lowest_bit_set = i;
+ if ((highest_bit_set == -1) && ((high_bits >> (32 - i - 1)) & 1))
+ highest_bit_set = (64 - i - 1);
+ } while (++i < 32 && (highest_bit_set == -1 ||
+ lowest_bit_set == -1));
+ if (i == 32) {
+ i = 0;
+ do {
+ if (lowest_bit_set == -1 && ((high_bits >> i) & 1))
+ lowest_bit_set = i + 32;
+ if (highest_bit_set == -1 &&
+ ((low_bits >> (32 - i - 1)) & 1))
+ highest_bit_set = 32 - i - 1;
+ } while (++i < 32 && (highest_bit_set == -1 ||
+ lowest_bit_set == -1));
+ }
+
+ all_bits_between_are_set = 1;
+ for (i = lowest_bit_set; i <= highest_bit_set; i++) {
+ if (i < 32) {
+ if ((low_bits & (1 << i)) != 0)
+ continue;
+ } else {
+ if ((high_bits & (1 << (i - 32))) != 0)
+ continue;
+ }
+ all_bits_between_are_set = 0;
+ break;
+ }
+ *hbsp = highest_bit_set;
+ *lbsp = lowest_bit_set;
+ *abbasp = all_bits_between_are_set;
+}
+
+static unsigned long create_simple_focus_bits(unsigned long high_bits,
+ unsigned long low_bits,
+ int lowest_bit_set, int shift)
+{
+ long hi, lo;
+
+ if (lowest_bit_set < 32) {
+ lo = (low_bits >> lowest_bit_set) << shift;
+ hi = ((high_bits << (32 - lowest_bit_set)) << shift);
+ } else {
+ lo = 0;
+ hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
+ }
+ return hi | lo;
+}
+
+static bool const64_is_2insns(unsigned long high_bits,
+ unsigned long low_bits)
+{
+ int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
+
+ if (high_bits == 0 || high_bits == 0xffffffff)
+ return true;
+
+ analyze_64bit_constant(high_bits, low_bits,
+ &highest_bit_set, &lowest_bit_set,
+ &all_bits_between_are_set);
+
+ if ((highest_bit_set == 63 || lowest_bit_set == 0) &&
+ all_bits_between_are_set != 0)
+ return true;
+
+ if (highest_bit_set - lowest_bit_set < 21)
+ return true;
+
+ return false;
+}
+
+static void sparc_emit_set_const64_quick2(unsigned long high_bits,
+ unsigned long low_imm,
+ unsigned int dest,
+ int shift_count, struct jit_ctx *ctx)
+{
+ emit_loadimm32(high_bits, dest, ctx);
+
+ /* Now shift it up into place. */
+ emit_alu_K(SLLX, dest, shift_count, ctx);
+
+ /* If there is a low immediate part piece, finish up by
+ * putting that in as well.
+ */
+ if (low_imm != 0)
+ emit(OR | IMMED | RS1(dest) | S13(low_imm) | RD(dest), ctx);
+}
+
+static void emit_loadimm64(u64 K, unsigned int dest, struct jit_ctx *ctx)
+{
+ int all_bits_between_are_set, lowest_bit_set, highest_bit_set;
+ unsigned int tmp = bpf2sparc[TMP_REG_1];
+ u32 low_bits = (K & 0xffffffff);
+ u32 high_bits = (K >> 32);
+
+ /* These two tests also take care of all of the one
+ * instruction cases.
+ */
+ if (high_bits == 0xffffffff && (low_bits & 0x80000000))
+ return emit_loadimm_sext(K, dest, ctx);
+ if (high_bits == 0x00000000)
+ return emit_loadimm32(K, dest, ctx);
+
+ analyze_64bit_constant(high_bits, low_bits, &highest_bit_set,
+ &lowest_bit_set, &all_bits_between_are_set);
+
+ /* 1) mov -1, %reg
+ * sllx %reg, shift, %reg
+ * 2) mov -1, %reg
+ * srlx %reg, shift, %reg
+ * 3) mov some_small_const, %reg
+ * sllx %reg, shift, %reg
+ */
+ if (((highest_bit_set == 63 || lowest_bit_set == 0) &&
+ all_bits_between_are_set != 0) ||
+ ((highest_bit_set - lowest_bit_set) < 12)) {
+ int shift = lowest_bit_set;
+ long the_const = -1;
+
+ if ((highest_bit_set != 63 && lowest_bit_set != 0) ||
+ all_bits_between_are_set == 0) {
+ the_const =
+ create_simple_focus_bits(high_bits, low_bits,
+ lowest_bit_set, 0);
+ } else if (lowest_bit_set == 0)
+ shift = -(63 - highest_bit_set);
+
+ emit(OR | IMMED | RS1(G0) | S13(the_const) | RD(dest), ctx);
+ if (shift > 0)
+ emit_alu_K(SLLX, dest, shift, ctx);
+ else if (shift < 0)
+ emit_alu_K(SRLX, dest, -shift, ctx);
+
+ return;
+ }
+
+ /* Now a range of 22 or less bits set somewhere.
+ * 1) sethi %hi(focus_bits), %reg
+ * sllx %reg, shift, %reg
+ * 2) sethi %hi(focus_bits), %reg
+ * srlx %reg, shift, %reg
+ */
+ if ((highest_bit_set - lowest_bit_set) < 21) {
+ unsigned long focus_bits =
+ create_simple_focus_bits(high_bits, low_bits,
+ lowest_bit_set, 10);
+
+ emit(SETHI(focus_bits, dest), ctx);
+
+ /* If lowest_bit_set == 10 then a sethi alone could
+ * have done it.
+ */
+ if (lowest_bit_set < 10)
+ emit_alu_K(SRLX, dest, 10 - lowest_bit_set, ctx);
+ else if (lowest_bit_set > 10)
+ emit_alu_K(SLLX, dest, lowest_bit_set - 10, ctx);
+ return;
+ }
+
+ /* Ok, now 3 instruction sequences. */
+ if (low_bits == 0) {
+ emit_loadimm32(high_bits, dest, ctx);
+ emit_alu_K(SLLX, dest, 32, ctx);
+ return;
+ }
+
+ /* We may be able to do something quick
+ * when the constant is negated, so try that.
+ */
+ if (const64_is_2insns((~high_bits) & 0xffffffff,
+ (~low_bits) & 0xfffffc00)) {
+ /* NOTE: The trailing bits get XOR'd so we need the
+ * non-negated bits, not the negated ones.
+ */
+ unsigned long trailing_bits = low_bits & 0x3ff;
+
+ if ((((~high_bits) & 0xffffffff) == 0 &&
+ ((~low_bits) & 0x80000000) == 0) ||
+ (((~high_bits) & 0xffffffff) == 0xffffffff &&
+ ((~low_bits) & 0x80000000) != 0)) {
+ unsigned long fast_int = (~low_bits & 0xffffffff);
+
+ if ((is_sethi(fast_int) &&
+ (~high_bits & 0xffffffff) == 0)) {
+ emit(SETHI(fast_int, dest), ctx);
+ } else if (is_simm13(fast_int)) {
+ emit(OR | IMMED | RS1(G0) | S13(fast_int) | RD(dest), ctx);
+ } else {
+ emit_loadimm64(fast_int, dest, ctx);
+ }
+ } else {
+ u64 n = ((~low_bits) & 0xfffffc00) |
+ (((unsigned long)((~high_bits) & 0xffffffff))<<32);
+ emit_loadimm64(n, dest, ctx);
+ }
+
+ low_bits = -0x400 | trailing_bits;
+
+ emit(XOR | IMMED | RS1(dest) | S13(low_bits) | RD(dest), ctx);
+ return;
+ }
+
+ /* 1) sethi %hi(xxx), %reg
+ * or %reg, %lo(xxx), %reg
+ * sllx %reg, yyy, %reg
+ */
+ if ((highest_bit_set - lowest_bit_set) < 32) {
+ unsigned long focus_bits =
+ create_simple_focus_bits(high_bits, low_bits,
+ lowest_bit_set, 0);
+
+ /* So what we know is that the set bits straddle the
+ * middle of the 64-bit word.
+ */
+ sparc_emit_set_const64_quick2(focus_bits, 0, dest,
+ lowest_bit_set, ctx);
+ return;
+ }
+
+ /* 1) sethi %hi(high_bits), %reg
+ * or %reg, %lo(high_bits), %reg
+ * sllx %reg, 32, %reg
+ * or %reg, low_bits, %reg
+ */
+ if (is_simm13(low_bits) && ((int)low_bits > 0)) {
+ sparc_emit_set_const64_quick2(high_bits, low_bits,
+ dest, 32, ctx);
+ return;
+ }
+
+ /* Oh well, we tried... Do a full 64-bit decomposition. */
+ ctx->tmp_1_used = true;
+
+ emit_loadimm32(high_bits, tmp, ctx);
+ emit_loadimm32(low_bits, dest, ctx);
+ emit_alu_K(SLLX, tmp, 32, ctx);
+ emit(OR | RS1(dest) | RS2(tmp) | RD(dest), ctx);
+}
+
+static void emit_branch(unsigned int br_opc, unsigned int from_idx, unsigned int to_idx,
+ struct jit_ctx *ctx)
+{
+ unsigned int off = to_idx - from_idx;
+
+ if (br_opc & XCC)
+ emit(br_opc | WDISP19(off << 2), ctx);
+ else
+ emit(br_opc | WDISP22(off << 2), ctx);
+}
+
+static void emit_cbcond(unsigned int cb_opc, unsigned int from_idx, unsigned int to_idx,
+ const u8 dst, const u8 src, struct jit_ctx *ctx)
+{
+ unsigned int off = to_idx - from_idx;
+
+ emit(cb_opc | WDISP10(off << 2) | RS1(dst) | RS2(src), ctx);
+}
+
+static void emit_cbcondi(unsigned int cb_opc, unsigned int from_idx, unsigned int to_idx,
+ const u8 dst, s32 imm, struct jit_ctx *ctx)
+{
+ unsigned int off = to_idx - from_idx;
+
+ emit(cb_opc | IMMED | WDISP10(off << 2) | RS1(dst) | S5(imm), ctx);
+}
+
+#define emit_read_y(REG, CTX) emit(RD_Y | RD(REG), CTX)
+#define emit_write_y(REG, CTX) emit(WR_Y | IMMED | RS1(REG) | S13(0), CTX)
+
+#define emit_cmp(R1, R2, CTX) \
+ emit(SUBCC | RS1(R1) | RS2(R2) | RD(G0), CTX)
+
+#define emit_cmpi(R1, IMM, CTX) \
+ emit(SUBCC | IMMED | RS1(R1) | S13(IMM) | RD(G0), CTX)
+
+#define emit_btst(R1, R2, CTX) \
+ emit(ANDCC | RS1(R1) | RS2(R2) | RD(G0), CTX)
+
+#define emit_btsti(R1, IMM, CTX) \
+ emit(ANDCC | IMMED | RS1(R1) | S13(IMM) | RD(G0), CTX)
+
+static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src,
+ const s32 imm, bool is_imm, int branch_dst,