From 15324652f61287e2e640ca47f2a044df6cccf4b3 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@bootlin.com>
Date: Tue, 23 Jun 2020 16:30:14 +0200
Subject: MIPS: dts: ocelot: describe the load/save GPIO

This patch adds a description of the load/save GPIN pin, used in the
VSC8584 PHY for timestamping operations. The related pinctrl description
is also added.

Signed-off-by: Quentin Schulz <quentin.schulz@bootlin.com>
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/boot/dts/mscc/ocelot_pcb120.dts | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
index 33991fd209f5..897de5025d7f 100644
--- a/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
+++ b/arch/mips/boot/dts/mscc/ocelot_pcb120.dts
@@ -3,6 +3,7 @@
 
 /dts-v1/;
 
+#include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/interrupt-controller/irq.h>
 #include <dt-bindings/phy/phy-ocelot-serdes.h>
 #include "ocelot.dtsi"
@@ -25,6 +26,11 @@
 		pins = "GPIO_4";
 		function = "gpio";
 	};
+
+	phy_load_save_pins: phy_load_save_pins {
+		pins = "GPIO_10";
+		function = "ptp2";
+	};
 };
 
 &mdio0 {
@@ -34,27 +40,31 @@
 &mdio1 {
 	status = "okay";
 	pinctrl-names = "default";
-	pinctrl-0 = <&miim1>, <&phy_int_pins>;
+	pinctrl-0 = <&miim1>, <&phy_int_pins>, <&phy_load_save_pins>;
 
 	phy7: ethernet-phy@0 {
 		reg = <0>;
 		interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-parent = <&gpio>;
+		load-save-gpios = <&gpio 10 GPIO_ACTIVE_HIGH>;
 	};
 	phy6: ethernet-phy@1 {
 		reg = <1>;
 		interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-parent = <&gpio>;
+		load-save-gpios = <&gpio 10 GPIO_ACTIVE_HIGH>;
 	};
 	phy5: ethernet-phy@2 {
 		reg = <2>;
 		interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-parent = <&gpio>;
+		load-save-gpios = <&gpio 10 GPIO_ACTIVE_HIGH>;
 	};
 	phy4: ethernet-phy@3 {
 		reg = <3>;
 		interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
 		interrupt-parent = <&gpio>;
+		load-save-gpios = <&gpio 10 GPIO_ACTIVE_HIGH>;
 	};
 };
 
-- 
cgit v1.2.3


From 55db9c0e853421fa71cac5e6855898601f78a1f5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Jul 2020 08:23:15 +0200
Subject: net: remove compat_sys_{get,set}sockopt

Now that the ->compat_{get,set}sockopt proto_ops methods are gone
there is no good reason left to keep the compat syscalls separate.

This fixes the odd use of unsigned int for the compat_setsockopt
optlen and the missing sock_use_custom_sol_socket.

It would also easily allow running the eBPF hooks for the compat
syscalls, but such a large change in behavior does not belong into
a consolidation patch like this one.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/include/asm/unistd32.h         |  4 ++--
 arch/mips/kernel/syscalls/syscall_n32.tbl |  4 ++--
 arch/mips/kernel/syscalls/syscall_o32.tbl |  4 ++--
 arch/parisc/kernel/syscalls/syscall.tbl   |  4 ++--
 arch/powerpc/kernel/syscalls/syscall.tbl  |  4 ++--
 arch/s390/kernel/syscalls/syscall.tbl     |  4 ++--
 arch/sparc/kernel/sys32.S                 | 12 ++++++------
 arch/sparc/kernel/syscalls/syscall.tbl    |  4 ++--
 arch/x86/entry/syscall_x32.c              |  7 +++++++
 arch/x86/entry/syscalls/syscall_32.tbl    |  4 ++--
 arch/x86/entry/syscalls/syscall_64.tbl    |  4 ++--
 11 files changed, 31 insertions(+), 24 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 6d95d0c8bf2f..166e36903110 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -599,9 +599,9 @@ __SYSCALL(__NR_recvfrom, compat_sys_recvfrom)
 #define __NR_shutdown 293
 __SYSCALL(__NR_shutdown, sys_shutdown)
 #define __NR_setsockopt 294
-__SYSCALL(__NR_setsockopt, compat_sys_setsockopt)
+__SYSCALL(__NR_setsockopt, sys_setsockopt)
 #define __NR_getsockopt 295
-__SYSCALL(__NR_getsockopt, compat_sys_getsockopt)
+__SYSCALL(__NR_getsockopt, sys_getsockopt)
 #define __NR_sendmsg 296
 __SYSCALL(__NR_sendmsg, compat_sys_sendmsg)
 #define __NR_recvmsg 297
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index f777141f5256..8488b0d0a99e 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -60,8 +60,8 @@
 50	n32	getsockname			sys_getsockname
 51	n32	getpeername			sys_getpeername
 52	n32	socketpair			sys_socketpair
-53	n32	setsockopt			compat_sys_setsockopt
-54	n32	getsockopt			compat_sys_getsockopt
+53	n32	setsockopt			sys_setsockopt
+54	n32	getsockopt			sys_getsockopt
 55	n32	clone				__sys_clone
 56	n32	fork				__sys_fork
 57	n32	execve				compat_sys_execve
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 13280625d312..b20522f813f9 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -184,7 +184,7 @@
 170	o32	connect				sys_connect
 171	o32	getpeername			sys_getpeername
 172	o32	getsockname			sys_getsockname
-173	o32	getsockopt			sys_getsockopt			compat_sys_getsockopt
+173	o32	getsockopt			sys_getsockopt			sys_getsockopt
 174	o32	listen				sys_listen
 175	o32	recv				sys_recv			compat_sys_recv
 176	o32	recvfrom			sys_recvfrom			compat_sys_recvfrom
@@ -192,7 +192,7 @@
 178	o32	send				sys_send
 179	o32	sendmsg				sys_sendmsg			compat_sys_sendmsg
 180	o32	sendto				sys_sendto
-181	o32	setsockopt			sys_setsockopt			compat_sys_setsockopt
+181	o32	setsockopt			sys_setsockopt			sys_setsockopt
 182	o32	shutdown			sys_shutdown
 183	o32	socket				sys_socket
 184	o32	socketpair			sys_socketpair
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 5a758fa6ec52..3494e4fa1a17 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -198,8 +198,8 @@
 178	common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
 179	common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
 180	common	chown			sys_chown
-181	common	setsockopt		sys_setsockopt			compat_sys_setsockopt
-182	common	getsockopt		sys_getsockopt			compat_sys_getsockopt
+181	common	setsockopt		sys_setsockopt			sys_setsockopt
+182	common	getsockopt		sys_getsockopt			sys_getsockopt
 183	common	sendmsg			sys_sendmsg			compat_sys_sendmsg
 184	common	recvmsg			sys_recvmsg			compat_sys_recvmsg
 185	common	semop			sys_semop
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index f833a3190822..94eb5b27ef65 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -433,8 +433,8 @@
 336	common	recv				sys_recv			compat_sys_recv
 337	common	recvfrom			sys_recvfrom			compat_sys_recvfrom
 338	common	shutdown			sys_shutdown
-339	common	setsockopt			sys_setsockopt			compat_sys_setsockopt
-340	common	getsockopt			sys_getsockopt			compat_sys_getsockopt
+339	common	setsockopt			sys_setsockopt			sys_setsockopt
+340	common	getsockopt			sys_getsockopt			sys_getsockopt
 341	common	sendmsg				sys_sendmsg			compat_sys_sendmsg
 342	common	recvmsg				sys_recvmsg			compat_sys_recvmsg
 343	32	recvmmsg			sys_recvmmsg_time32		compat_sys_recvmmsg_time32
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index bfdcb7633957..0d63c71fc544 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -372,8 +372,8 @@
 362  common	connect			sys_connect			sys_connect
 363  common	listen			sys_listen			sys_listen
 364  common	accept4			sys_accept4			sys_accept4
-365  common	getsockopt		sys_getsockopt			compat_sys_getsockopt
-366  common	setsockopt		sys_setsockopt			compat_sys_setsockopt
+365  common	getsockopt		sys_getsockopt			sys_getsockopt
+366  common	setsockopt		sys_setsockopt			sys_setsockopt
 367  common	getsockname		sys_getsockname			sys_getsockname
 368  common	getpeername		sys_getpeername			sys_getpeername
 369  common	sendto			sys_sendto			sys_sendto
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index 489ffab918a8..a45f0f31fe51 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -157,22 +157,22 @@ do_sys_shutdown: /* sys_shutdown(int, int) */
 	nop
 	nop
 	nop
-do_sys_setsockopt: /* compat_sys_setsockopt(int, int, int, char *, int) */
+do_sys_setsockopt: /* sys_setsockopt(int, int, int, char *, int) */
 47:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(compat_sys_setsockopt), %g1
+	sethi		%hi(sys_setsockopt), %g1
 48:	ldswa		[%o1 + 0x8] %asi, %o2
 49:	lduwa		[%o1 + 0xc] %asi, %o3
 50:	ldswa		[%o1 + 0x10] %asi, %o4
-	jmpl		%g1 + %lo(compat_sys_setsockopt), %g0
+	jmpl		%g1 + %lo(sys_setsockopt), %g0
 51:	 ldswa		[%o1 + 0x4] %asi, %o1
 	nop
-do_sys_getsockopt: /* compat_sys_getsockopt(int, int, int, u32, u32) */
+do_sys_getsockopt: /* sys_getsockopt(int, int, int, u32, u32) */
 52:	ldswa		[%o1 + 0x0] %asi, %o0
-	sethi		%hi(compat_sys_getsockopt), %g1
+	sethi		%hi(sys_getsockopt), %g1
 53:	ldswa		[%o1 + 0x8] %asi, %o2
 54:	lduwa		[%o1 + 0xc] %asi, %o3
 55:	lduwa		[%o1 + 0x10] %asi, %o4
-	jmpl		%g1 + %lo(compat_sys_getsockopt), %g0
+	jmpl		%g1 + %lo(sys_getsockopt), %g0
 56:	 ldswa		[%o1 + 0x4] %asi, %o1
 	nop
 do_sys_sendmsg: /* compat_sys_sendmsg(int, struct compat_msghdr *, unsigned int) */
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index 8004a276cb74..c59b37965add 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -147,7 +147,7 @@
 115	32	getgroups32		sys_getgroups
 116	common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
 117	common	getrusage		sys_getrusage			compat_sys_getrusage
-118	common	getsockopt		sys_getsockopt			compat_sys_getsockopt
+118	common	getsockopt		sys_getsockopt			sys_getsockopt
 119	common	getcwd			sys_getcwd
 120	common	readv			sys_readv			compat_sys_readv
 121	common	writev			sys_writev			compat_sys_writev
@@ -425,7 +425,7 @@
 352	common	userfaultfd		sys_userfaultfd
 353	common	bind			sys_bind
 354	common	listen			sys_listen
-355	common	setsockopt		sys_setsockopt			compat_sys_setsockopt
+355	common	setsockopt		sys_setsockopt			sys_setsockopt
 356	common	mlock2			sys_mlock2
 357	common	copy_file_range		sys_copy_file_range
 358	common	preadv2			sys_preadv2			compat_sys_preadv2
diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c
index 3d8d70d3896c..1583831f61a9 100644
--- a/arch/x86/entry/syscall_x32.c
+++ b/arch/x86/entry/syscall_x32.c
@@ -8,6 +8,13 @@
 #include <asm/unistd.h>
 #include <asm/syscall.h>
 
+/*
+ * Reuse the 64-bit entry points for the x32 versions that occupy different
+ * slots in the syscall table.
+ */
+#define __x32_sys_getsockopt	__x64_sys_getsockopt
+#define __x32_sys_setsockopt	__x64_sys_setsockopt
+
 #define __SYSCALL_64(nr, sym)
 
 #define __SYSCALL_X32(nr, sym) extern long __x32_##sym(const struct pt_regs *);
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index d8f8a1a69ed1..43742a69dba1 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -376,8 +376,8 @@
 362	i386	connect			sys_connect
 363	i386	listen			sys_listen
 364	i386	accept4			sys_accept4
-365	i386	getsockopt		sys_getsockopt			compat_sys_getsockopt
-366	i386	setsockopt		sys_setsockopt			compat_sys_setsockopt
+365	i386	getsockopt		sys_getsockopt			sys_getsockopt
+366	i386	setsockopt		sys_setsockopt			sys_setsockopt
 367	i386	getsockname		sys_getsockname
 368	i386	getpeername		sys_getpeername
 369	i386	sendto			sys_sendto
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 78847b32e137..e008d638e641 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -396,8 +396,8 @@
 538	x32	sendmmsg		compat_sys_sendmmsg
 539	x32	process_vm_readv	compat_sys_process_vm_readv
 540	x32	process_vm_writev	compat_sys_process_vm_writev
-541	x32	setsockopt		compat_sys_setsockopt
-542	x32	getsockopt		compat_sys_getsockopt
+541	x32	setsockopt		sys_setsockopt
+542	x32	getsockopt		sys_getsockopt
 543	x32	io_setup		compat_sys_io_setup
 544	x32	io_submit		compat_sys_io_submit
 545	x32	execveat		compat_sys_execveat
-- 
cgit v1.2.3


From f1bfd71c8662f20d53e71ef4e18bfb0e5677c27f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Jul 2020 13:36:09 +0200
Subject: arch, net: remove the last csum_partial_copy() leftovers

Most of the tree only uses and implements csum_partial_copy_nocheck,
but the c6x and lib/checksum.c implement a csum_partial_copy that
isn't used anywere except to define csum_partial_copy.  Get rid of
this pointless alias.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/c6x/lib/checksum.c           | 2 +-
 arch/c6x/lib/csum_64plus.S        | 8 ++++----
 arch/nios2/include/asm/checksum.h | 5 ++---
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/c6x/lib/checksum.c b/arch/c6x/lib/checksum.c
index 335ca4900808..dff2e2ec6e64 100644
--- a/arch/c6x/lib/checksum.c
+++ b/arch/c6x/lib/checksum.c
@@ -6,6 +6,6 @@
 
 /* These are from csum_64plus.S */
 EXPORT_SYMBOL(csum_partial);
-EXPORT_SYMBOL(csum_partial_copy);
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
 EXPORT_SYMBOL(ip_compute_csum);
 EXPORT_SYMBOL(ip_fast_csum);
diff --git a/arch/c6x/lib/csum_64plus.S b/arch/c6x/lib/csum_64plus.S
index 8e625a30fd43..9c07127485d1 100644
--- a/arch/c6x/lib/csum_64plus.S
+++ b/arch/c6x/lib/csum_64plus.S
@@ -10,8 +10,8 @@
 #include <linux/linkage.h>
 
 ;
-;unsigned int csum_partial_copy(const char *src, char * dst,
-;				int len, int sum)
+;unsigned int csum_partial_copy_nocheck(const char *src, char * dst,
+;					int len, int sum)
 ;
 ; A4:	src
 ; B4:	dst
@@ -21,7 +21,7 @@
 ;
 
 	.text
-ENTRY(csum_partial_copy)
+ENTRY(csum_partial_copy_nocheck)
 	MVC	.S2	ILC,B30
 
 	MV	.D1X	B6,A31		; given csum
@@ -149,7 +149,7 @@ L10:	ADD	.D1	A31,A9,A9
 
 	BNOP	.S2	B3,4
 	MVC	.S2	B30,ILC
-ENDPROC(csum_partial_copy)
+ENDPROC(csum_partial_copy_nocheck)
 
 ;
 ;unsigned short
diff --git a/arch/nios2/include/asm/checksum.h b/arch/nios2/include/asm/checksum.h
index ec39698d3bea..b4316c361729 100644
--- a/arch/nios2/include/asm/checksum.h
+++ b/arch/nios2/include/asm/checksum.h
@@ -12,10 +12,9 @@
 
 /* Take these from lib/checksum.c */
 extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-extern __wsum csum_partial_copy(const void *src, void *dst, int len,
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len,
 				__wsum sum);
-#define csum_partial_copy_nocheck(src, dst, len, sum)	\
-	csum_partial_copy((src), (dst), (len), (sum))
+#define csum_partial_copy_nocheck csum_partial_copy_nocheck
 
 extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
 extern __sum16 ip_compute_csum(const void *buff, int len);
-- 
cgit v1.2.3


From 7477d43be5b1448bc0d4c85cb185a0144cc080e1 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 17 Jul 2020 18:53:23 +0200
Subject: s390/bpf: Fix sign extension in branch_ku

Both signed and unsigned variants of BPF_JMP | BPF_K require
sign-extending the immediate. JIT emits cgfi for the signed case,
which is correct, and clgfi for the unsigned case, which is not
correct: clgfi zero-extends the immediate.

s390 does not provide an instruction that does sign-extension and
unsigned comparison at the same time. Therefore, fix by first loading
the sign-extended immediate into work register REG_1 and proceeding
as if it's BPF_X.

Fixes: 4e9b4a6883dd ("s390/bpf: Use relative long branches")
Reported-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Seth Forshee <seth.forshee@canonical.com>
Link: https://lore.kernel.org/bpf/20200717165326.6786-3-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index f4242b894cf2..6b3d612948fb 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1417,21 +1417,10 @@ branch_ks:
 		}
 		break;
 branch_ku:
-		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
-		/* clfi or clgfi %dst,imm */
-		EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000,
-			  dst_reg, imm);
-		if (!is_first_pass(jit) &&
-		    can_use_rel(jit, addrs[i + off + 1])) {
-			/* brc mask,off */
-			EMIT4_PCREL_RIC(0xa7040000,
-					mask >> 12, addrs[i + off + 1]);
-		} else {
-			/* brcl mask,off */
-			EMIT6_PCREL_RILC(0xc0040000,
-					 mask >> 12, addrs[i + off + 1]);
-		}
-		break;
+		/* lgfi %w1,imm (load sign extend imm) */
+		src_reg = REG_1;
+		EMIT6_IMM(0xc0010000, src_reg, imm);
+		goto branch_xu;
 branch_xs:
 		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
 		if (!is_first_pass(jit) &&
-- 
cgit v1.2.3


From 5fa6974471c5518a50bdd814067508dbcb477251 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 17 Jul 2020 18:53:24 +0200
Subject: s390/bpf: Use brcl for jumping to exit_ip if necessary

"BPF_MAXINSNS: Maximum possible literals" test causes panic with
bpf_jit_harden = 2. The reason is that BPF_JMP | BPF_EXIT is always
emitted as brc, however, after removal of JITed image size
limitations, brcl might be required.

Fix by using brcl when necessary.

Fixes: 4e9b4a6883dd ("s390/bpf: Use relative long branches")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200717165326.6786-4-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 6b3d612948fb..6b8968f6e207 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -1268,8 +1268,12 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		last = (i == fp->len - 1) ? 1 : 0;
 		if (last)
 			break;
-		/* j <exit> */
-		EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
+		if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
+			/* brc 0xf, <exit> */
+			EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
+		else
+			/* brcl 0xf, <exit> */
+			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
 		break;
 	/*
 	 * Branch relative (number of skipped instructions) to offset on
-- 
cgit v1.2.3


From 1491b73311a15bb5beeab5d30e03bff761ef6c18 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 17 Jul 2020 18:53:25 +0200
Subject: s390/bpf: Tolerate not converging code shrinking

"BPF_MAXINSNS: Maximum possible literals" unnecessarily falls back to
the interpreter because of failing sanity check in bpf_set_addr. The
problem is that there are a lot of branches that can be shrunk, and
doing so opens up the possibility to shrink even more. This process
does not converge after 3 passes, causing code offsets to change during
the codegen pass, which must never happen.

Fix by inserting nops during codegen pass in order to preserve code
offets.

Fixes: 4e9b4a6883dd ("s390/bpf: Use relative long branches")
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200717165326.6786-5-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 6b8968f6e207..a78c5b59e1ab 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -489,6 +489,24 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
 	} while (re <= last);
 }
 
+static void bpf_skip(struct bpf_jit *jit, int size)
+{
+	if (size >= 6 && !is_valid_rel(size)) {
+		/* brcl 0xf,size */
+		EMIT6_PCREL_RIL(0xc0f4000000, size);
+		size -= 6;
+	} else if (size >= 4 && is_valid_rel(size)) {
+		/* brc 0xf,size */
+		EMIT4_PCREL(0xa7f40000, size);
+		size -= 4;
+	}
+	while (size >= 2) {
+		/* bcr 0,%0 */
+		_EMIT2(0x0700);
+		size -= 2;
+	}
+}
+
 /*
  * Emit function prologue
  *
@@ -1503,7 +1521,14 @@ static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
  */
 static int bpf_set_addr(struct bpf_jit *jit, int i)
 {
-	if (!bpf_is_new_addr_sane(jit, i))
+	int delta;
+
+	if (is_codegen_pass(jit)) {
+		delta = jit->prg - jit->addrs[i];
+		if (delta < 0)
+			bpf_skip(jit, -delta);
+	}
+	if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
 		return -1;
 	jit->addrs[i] = jit->prg;
 	return 0;
-- 
cgit v1.2.3


From 94ad428df53688a1f80ab55f2b1186101510a778 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Fri, 17 Jul 2020 18:53:26 +0200
Subject: s390/bpf: Use bpf_skip() in bpf_jit_prologue()

Now that we have bpf_skip() for emitting nops, use it in
bpf_jit_prologue() in order to reduce code duplication.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200717165326.6786-6-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index a78c5b59e1ab..26f97a10e793 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -519,10 +519,11 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
 		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
 		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
 	} else {
-		/* j tail_call_start: NOP if no tail calls are used */
-		EMIT4_PCREL(0xa7f40000, 6);
-		/* bcr 0,%0 */
-		EMIT2(0x0700, 0, REG_0);
+		/*
+		 * There are no tail calls. Insert nops in order to have
+		 * tail_call_start at a predictable offset.
+		 */
+		bpf_skip(jit, 6);
 	}
 	/* Tail calls have to skip above initialization */
 	jit->tail_call_start = jit->prg;
-- 
cgit v1.2.3


From bfabff3cb0fef366086c64f24be8ab316a355b99 Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Mon, 20 Jul 2020 19:52:38 -0700
Subject: bpf, riscv: Modify JIT ctx to support compressed instructions

This patch makes the necessary changes to struct rv_jit_context and to
bpf_int_jit_compile to support compressed riscv (RVC) instructions in
the BPF JIT.

It changes the JIT image to be u16 instead of u32, since RVC instructions
are 2 bytes as opposed to 4.

It also changes ctx->offset and ctx->ninsns to refer to 2-byte
instructions rather than 4-byte ones. The riscv PC is required to be
16-bit aligned with or without RVC, so this is sufficient to refer to
any valid riscv offset.

The code for computing jump offsets in bytes is updated accordingly,
and factored into a new "ninsns_rvoff" function to simplify the code.

Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200721025241.8077-2-luke.r.nels@gmail.com
---
 arch/riscv/net/bpf_jit.h        | 31 ++++++++++++++++++++++++++++---
 arch/riscv/net/bpf_jit_comp32.c | 14 +++++++-------
 arch/riscv/net/bpf_jit_comp64.c | 12 ++++++------
 arch/riscv/net/bpf_jit_core.c   |  6 +++---
 4 files changed, 44 insertions(+), 19 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index 20e235d06f66..e90d336a9e5f 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -13,6 +13,11 @@
 #include <linux/filter.h>
 #include <asm/cacheflush.h>
 
+static inline bool rvc_enabled(void)
+{
+	return IS_ENABLED(CONFIG_RISCV_ISA_C);
+}
+
 enum {
 	RV_REG_ZERO =	0,	/* The constant value 0 */
 	RV_REG_RA =	1,	/* Return address */
@@ -50,7 +55,7 @@ enum {
 
 struct rv_jit_context {
 	struct bpf_prog *prog;
-	u32 *insns;		/* RV insns */
+	u16 *insns;		/* RV insns */
 	int ninsns;
 	int epilogue_offset;
 	int *offset;		/* BPF to RV */
@@ -58,6 +63,12 @@ struct rv_jit_context {
 	int stack_size;
 };
 
+/* Convert from ninsns to bytes. */
+static inline int ninsns_rvoff(int ninsns)
+{
+	return ninsns << 1;
+}
+
 struct rv_jit_data {
 	struct bpf_binary_header *header;
 	u8 *image;
@@ -74,8 +85,22 @@ static inline void bpf_flush_icache(void *start, void *end)
 	flush_icache_range((unsigned long)start, (unsigned long)end);
 }
 
+/* Emit a 4-byte riscv instruction. */
 static inline void emit(const u32 insn, struct rv_jit_context *ctx)
 {
+	if (ctx->insns) {
+		ctx->insns[ctx->ninsns] = insn;
+		ctx->insns[ctx->ninsns + 1] = (insn >> 16);
+	}
+
+	ctx->ninsns += 2;
+}
+
+/* Emit a 2-byte riscv compressed instruction. */
+static inline void emitc(const u16 insn, struct rv_jit_context *ctx)
+{
+	BUILD_BUG_ON(!rvc_enabled());
+
 	if (ctx->insns)
 		ctx->insns[ctx->ninsns] = insn;
 
@@ -86,7 +111,7 @@ static inline int epilogue_offset(struct rv_jit_context *ctx)
 {
 	int to = ctx->epilogue_offset, from = ctx->ninsns;
 
-	return (to - from) << 2;
+	return ninsns_rvoff(to - from);
 }
 
 /* Return -1 or inverted cond. */
@@ -149,7 +174,7 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx)
 	off++; /* BPF branch is from PC+1, RV is from PC */
 	from = (insn > 0) ? ctx->offset[insn - 1] : 0;
 	to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
-	return (to - from) << 2;
+	return ninsns_rvoff(to - from);
 }
 
 /* Instruction formats. */
diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c
index b198eaa74456..bc5f2204693f 100644
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -644,7 +644,7 @@ static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 rvoff,
 
 	e = ctx->ninsns;
 	/* Adjust for extra insns. */
-	rvoff -= (e - s) << 2;
+	rvoff -= ninsns_rvoff(e - s);
 	emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
 	return 0;
 }
@@ -713,7 +713,7 @@ static int emit_bcc(u8 op, u8 rd, u8 rs, int rvoff, struct rv_jit_context *ctx)
 	if (far) {
 		e = ctx->ninsns;
 		/* Adjust for extra insns. */
-		rvoff -= (e - s) << 2;
+		rvoff -= ninsns_rvoff(e - s);
 		emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx);
 	}
 	return 0;
@@ -731,7 +731,7 @@ static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 rvoff,
 
 	e = ctx->ninsns;
 	/* Adjust for extra insns. */
-	rvoff -= (e - s) << 2;
+	rvoff -= ninsns_rvoff(e - s);
 
 	if (emit_bcc(op, lo(rs1), lo(rs2), rvoff, ctx))
 		return -1;
@@ -795,7 +795,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 	 * if (index >= max_entries)
 	 *   goto out;
 	 */
-	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+	off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
 	emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx);
 
 	/*
@@ -804,7 +804,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 	 *   goto out;
 	 */
 	emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx);
-	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+	off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
 	emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx);
 
 	/*
@@ -818,7 +818,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 	if (is_12b_check(off, insn))
 		return -1;
 	emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx);
-	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+	off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
 	emit_bcc(BPF_JEQ, RV_REG_T0, RV_REG_ZERO, off, ctx);
 
 	/*
@@ -1214,7 +1214,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 			emit_imm32(tmp2, imm, ctx);
 			src = tmp2;
 			e = ctx->ninsns;
-			rvoff -= (e - s) << 2;
+			rvoff -= ninsns_rvoff(e - s);
 		}
 
 		if (is64)
diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 6cfd164cbe88..55861269da2a 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -304,14 +304,14 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 	if (is_12b_check(off, insn))
 		return -1;
 	emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
-	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+	off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
 	emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
 
 	/* if (TCC-- < 0)
 	 *     goto out;
 	 */
 	emit(rv_addi(RV_REG_T1, tcc, -1), ctx);
-	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+	off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
 	emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);
 
 	/* prog = array->ptrs[index];
@@ -324,7 +324,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 	if (is_12b_check(off, insn))
 		return -1;
 	emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx);
-	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
+	off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
 	emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
 
 	/* goto *(prog->bpf_func + 4); */
@@ -757,7 +757,7 @@ out_be:
 			e = ctx->ninsns;
 
 			/* Adjust for extra insns */
-			rvoff -= (e - s) << 2;
+			rvoff -= ninsns_rvoff(e - s);
 		}
 
 		if (BPF_OP(code) == BPF_JSET) {
@@ -810,7 +810,7 @@ out_be:
 		e = ctx->ninsns;
 
 		/* Adjust for extra insns */
-		rvoff -= (e - s) << 2;
+		rvoff -= ninsns_rvoff(e - s);
 		emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
 		break;
 
@@ -831,7 +831,7 @@ out_be:
 		if (!is64 && imm < 0)
 			emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx);
 		e = ctx->ninsns;
-		rvoff -= (e - s) << 2;
+		rvoff -= ninsns_rvoff(e - s);
 		emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
 		break;
 
diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c
index 709b94ece3ed..3630d447352c 100644
--- a/arch/riscv/net/bpf_jit_core.c
+++ b/arch/riscv/net/bpf_jit_core.c
@@ -73,7 +73,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
 	if (ctx->offset) {
 		extra_pass = true;
-		image_size = sizeof(u32) * ctx->ninsns;
+		image_size = sizeof(*ctx->insns) * ctx->ninsns;
 		goto skip_init_ctx;
 	}
 
@@ -103,7 +103,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 			if (jit_data->header)
 				break;
 
-			image_size = sizeof(u32) * ctx->ninsns;
+			image_size = sizeof(*ctx->insns) * ctx->ninsns;
 			jit_data->header =
 				bpf_jit_binary_alloc(image_size,
 						     &jit_data->image,
@@ -114,7 +114,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 				goto out_offset;
 			}
 
-			ctx->insns = (u32 *)jit_data->image;
+			ctx->insns = (u16 *)jit_data->image;
 			/*
 			 * Now, when the image is allocated, the image can
 			 * potentially shrink more (auipc/jalr -> jal).
-- 
cgit v1.2.3


From 804ec72c68c8477b8713a1e8f8eda120d3471031 Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Mon, 20 Jul 2020 19:52:39 -0700
Subject: bpf, riscv: Add encodings for compressed instructions

This patch adds functions for encoding and emitting compressed riscv
(RVC) instructions to the BPF JIT.

Some regular riscv instructions can be compressed into an RVC instruction
if the instruction fields meet some requirements. For example, "add rd,
rs1, rs2" can be compressed into "c.add rd, rs2" when rd == rs1.

To make using RVC encodings simpler, this patch also adds helper
functions that selectively emit either a regular instruction or a
compressed instruction if possible.

For example, emit_add will produce a "c.add" if possible and regular
"add" otherwise.

Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200721025241.8077-3-luke.r.nels@gmail.com
---
 arch/riscv/net/bpf_jit.h | 452 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 452 insertions(+)

(limited to 'arch')

diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h
index e90d336a9e5f..75c1e9996867 100644
--- a/arch/riscv/net/bpf_jit.h
+++ b/arch/riscv/net/bpf_jit.h
@@ -53,6 +53,18 @@ enum {
 	RV_REG_T6 =	31,
 };
 
+static inline bool is_creg(u8 reg)
+{
+	return (1 << reg) & (BIT(RV_REG_FP) |
+			     BIT(RV_REG_S1) |
+			     BIT(RV_REG_A0) |
+			     BIT(RV_REG_A1) |
+			     BIT(RV_REG_A2) |
+			     BIT(RV_REG_A3) |
+			     BIT(RV_REG_A4) |
+			     BIT(RV_REG_A5));
+}
+
 struct rv_jit_context {
 	struct bpf_prog *prog;
 	u16 *insns;		/* RV insns */
@@ -142,6 +154,36 @@ static inline int invert_bpf_cond(u8 cond)
 	return -1;
 }
 
+static inline bool is_6b_int(long val)
+{
+	return -(1L << 5) <= val && val < (1L << 5);
+}
+
+static inline bool is_7b_uint(unsigned long val)
+{
+	return val < (1UL << 7);
+}
+
+static inline bool is_8b_uint(unsigned long val)
+{
+	return val < (1UL << 8);
+}
+
+static inline bool is_9b_uint(unsigned long val)
+{
+	return val < (1UL << 9);
+}
+
+static inline bool is_10b_int(long val)
+{
+	return -(1L << 9) <= val && val < (1L << 9);
+}
+
+static inline bool is_10b_uint(unsigned long val)
+{
+	return val < (1UL << 10);
+}
+
 static inline bool is_12b_int(long val)
 {
 	return -(1L << 11) <= val && val < (1L << 11);
@@ -232,6 +274,59 @@ static inline u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1,
 	return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode);
 }
 
+/* RISC-V compressed instruction formats. */
+
+static inline u16 rv_cr_insn(u8 funct4, u8 rd, u8 rs2, u8 op)
+{
+	return (funct4 << 12) | (rd << 7) | (rs2 << 2) | op;
+}
+
+static inline u16 rv_ci_insn(u8 funct3, u32 imm6, u8 rd, u8 op)
+{
+	u32 imm;
+
+	imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2);
+	return (funct3 << 13) | (rd << 7) | op | imm;
+}
+
+static inline u16 rv_css_insn(u8 funct3, u32 uimm, u8 rs2, u8 op)
+{
+	return (funct3 << 13) | (uimm << 7) | (rs2 << 2) | op;
+}
+
+static inline u16 rv_ciw_insn(u8 funct3, u32 uimm, u8 rd, u8 op)
+{
+	return (funct3 << 13) | (uimm << 5) | ((rd & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cl_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rd,
+			     u8 op)
+{
+	return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) |
+		(imm_lo << 5) | ((rd & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cs_insn(u8 funct3, u32 imm_hi, u8 rs1, u32 imm_lo, u8 rs2,
+			     u8 op)
+{
+	return (funct3 << 13) | (imm_hi << 10) | ((rs1 & 0x7) << 7) |
+		(imm_lo << 5) | ((rs2 & 0x7) << 2) | op;
+}
+
+static inline u16 rv_ca_insn(u8 funct6, u8 rd, u8 funct2, u8 rs2, u8 op)
+{
+	return (funct6 << 10) | ((rd & 0x7) << 7) | (funct2 << 5) |
+		((rs2 & 0x7) << 2) | op;
+}
+
+static inline u16 rv_cb_insn(u8 funct3, u32 imm6, u8 funct2, u8 rd, u8 op)
+{
+	u32 imm;
+
+	imm = ((imm6 & 0x20) << 7) | ((imm6 & 0x1f) << 2);
+	return (funct3 << 13) | (funct2 << 10) | ((rd & 0x7) << 7) | op | imm;
+}
+
 /* Instructions shared by both RV32 and RV64. */
 
 static inline u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0)
@@ -439,6 +534,135 @@ static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
 	return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f);
 }
 
+/* RVC instrutions. */
+
+static inline u16 rvc_addi4spn(u8 rd, u32 imm10)
+{
+	u32 imm;
+
+	imm = ((imm10 & 0x30) << 2) | ((imm10 & 0x3c0) >> 4) |
+		((imm10 & 0x4) >> 1) | ((imm10 & 0x8) >> 3);
+	return rv_ciw_insn(0x0, imm, rd, 0x0);
+}
+
+static inline u16 rvc_lw(u8 rd, u32 imm7, u8 rs1)
+{
+	u32 imm_hi, imm_lo;
+
+	imm_hi = (imm7 & 0x38) >> 3;
+	imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6);
+	return rv_cl_insn(0x2, imm_hi, rs1, imm_lo, rd, 0x0);
+}
+
+static inline u16 rvc_sw(u8 rs1, u32 imm7, u8 rs2)
+{
+	u32 imm_hi, imm_lo;
+
+	imm_hi = (imm7 & 0x38) >> 3;
+	imm_lo = ((imm7 & 0x4) >> 1) | ((imm7 & 0x40) >> 6);
+	return rv_cs_insn(0x6, imm_hi, rs1, imm_lo, rs2, 0x0);
+}
+
+static inline u16 rvc_addi(u8 rd, u32 imm6)
+{
+	return rv_ci_insn(0, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_li(u8 rd, u32 imm6)
+{
+	return rv_ci_insn(0x2, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_addi16sp(u32 imm10)
+{
+	u32 imm;
+
+	imm = ((imm10 & 0x200) >> 4) | (imm10 & 0x10) | ((imm10 & 0x40) >> 3) |
+		((imm10 & 0x180) >> 6) | ((imm10 & 0x20) >> 5);
+	return rv_ci_insn(0x3, imm, RV_REG_SP, 0x1);
+}
+
+static inline u16 rvc_lui(u8 rd, u32 imm6)
+{
+	return rv_ci_insn(0x3, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_srli(u8 rd, u32 imm6)
+{
+	return rv_cb_insn(0x4, imm6, 0, rd, 0x1);
+}
+
+static inline u16 rvc_srai(u8 rd, u32 imm6)
+{
+	return rv_cb_insn(0x4, imm6, 0x1, rd, 0x1);
+}
+
+static inline u16 rvc_andi(u8 rd, u32 imm6)
+{
+	return rv_cb_insn(0x4, imm6, 0x2, rd, 0x1);
+}
+
+static inline u16 rvc_sub(u8 rd, u8 rs)
+{
+	return rv_ca_insn(0x23, rd, 0, rs, 0x1);
+}
+
+static inline u16 rvc_xor(u8 rd, u8 rs)
+{
+	return rv_ca_insn(0x23, rd, 0x1, rs, 0x1);
+}
+
+static inline u16 rvc_or(u8 rd, u8 rs)
+{
+	return rv_ca_insn(0x23, rd, 0x2, rs, 0x1);
+}
+
+static inline u16 rvc_and(u8 rd, u8 rs)
+{
+	return rv_ca_insn(0x23, rd, 0x3, rs, 0x1);
+}
+
+static inline u16 rvc_slli(u8 rd, u32 imm6)
+{
+	return rv_ci_insn(0, imm6, rd, 0x2);
+}
+
+static inline u16 rvc_lwsp(u8 rd, u32 imm8)
+{
+	u32 imm;
+
+	imm = ((imm8 & 0xc0) >> 6) | (imm8 & 0x3c);
+	return rv_ci_insn(0x2, imm, rd, 0x2);
+}
+
+static inline u16 rvc_jr(u8 rs1)
+{
+	return rv_cr_insn(0x8, rs1, RV_REG_ZERO, 0x2);
+}
+
+static inline u16 rvc_mv(u8 rd, u8 rs)
+{
+	return rv_cr_insn(0x8, rd, rs, 0x2);
+}
+
+static inline u16 rvc_jalr(u8 rs1)
+{
+	return rv_cr_insn(0x9, rs1, RV_REG_ZERO, 0x2);
+}
+
+static inline u16 rvc_add(u8 rd, u8 rs)
+{
+	return rv_cr_insn(0x9, rd, rs, 0x2);
+}
+
+static inline u16 rvc_swsp(u32 imm8, u8 rs2)
+{
+	u32 imm;
+
+	imm = (imm8 & 0x3c) | ((imm8 & 0xc0) >> 6);
+	return rv_css_insn(0x6, imm, rs2, 0x2);
+}
+
 /*
  * RV64-only instructions.
  *
@@ -528,6 +752,234 @@ static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
 	return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
 }
 
+/* RV64-only RVC instructions. */
+
+static inline u16 rvc_ld(u8 rd, u32 imm8, u8 rs1)
+{
+	u32 imm_hi, imm_lo;
+
+	imm_hi = (imm8 & 0x38) >> 3;
+	imm_lo = (imm8 & 0xc0) >> 6;
+	return rv_cl_insn(0x3, imm_hi, rs1, imm_lo, rd, 0x0);
+}
+
+static inline u16 rvc_sd(u8 rs1, u32 imm8, u8 rs2)
+{
+	u32 imm_hi, imm_lo;
+
+	imm_hi = (imm8 & 0x38) >> 3;
+	imm_lo = (imm8 & 0xc0) >> 6;
+	return rv_cs_insn(0x7, imm_hi, rs1, imm_lo, rs2, 0x0);
+}
+
+static inline u16 rvc_subw(u8 rd, u8 rs)
+{
+	return rv_ca_insn(0x27, rd, 0, rs, 0x1);
+}
+
+static inline u16 rvc_addiw(u8 rd, u32 imm6)
+{
+	return rv_ci_insn(0x1, imm6, rd, 0x1);
+}
+
+static inline u16 rvc_ldsp(u8 rd, u32 imm9)
+{
+	u32 imm;
+
+	imm = ((imm9 & 0x1c0) >> 6) | (imm9 & 0x38);
+	return rv_ci_insn(0x3, imm, rd, 0x2);
+}
+
+static inline u16 rvc_sdsp(u32 imm9, u8 rs2)
+{
+	u32 imm;
+
+	imm = (imm9 & 0x38) | ((imm9 & 0x1c0) >> 6);
+	return rv_css_insn(0x7, imm, rs2, 0x2);
+}
+
+#endif /* __riscv_xlen == 64 */
+
+/* Helper functions that emit RVC instructions when possible. */
+
+static inline void emit_jalr(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rd == RV_REG_RA && rs && !imm)
+		emitc(rvc_jalr(rs), ctx);
+	else if (rvc_enabled() && !rd && rs && !imm)
+		emitc(rvc_jr(rs), ctx);
+	else
+		emit(rv_jalr(rd, rs, imm), ctx);
+}
+
+static inline void emit_mv(u8 rd, u8 rs, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rd && rs)
+		emitc(rvc_mv(rd, rs), ctx);
+	else
+		emit(rv_addi(rd, rs, 0), ctx);
+}
+
+static inline void emit_add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rd && rd == rs1 && rs2)
+		emitc(rvc_add(rd, rs2), ctx);
+	else
+		emit(rv_add(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_addi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rd == RV_REG_SP && rd == rs && is_10b_int(imm) && imm && !(imm & 0xf))
+		emitc(rvc_addi16sp(imm), ctx);
+	else if (rvc_enabled() && is_creg(rd) && rs == RV_REG_SP && is_10b_uint(imm) &&
+		 !(imm & 0x3) && imm)
+		emitc(rvc_addi4spn(rd, imm), ctx);
+	else if (rvc_enabled() && rd && rd == rs && imm && is_6b_int(imm))
+		emitc(rvc_addi(rd, imm), ctx);
+	else
+		emit(rv_addi(rd, rs, imm), ctx);
+}
+
+static inline void emit_li(u8 rd, s32 imm, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rd && is_6b_int(imm))
+		emitc(rvc_li(rd, imm), ctx);
+	else
+		emit(rv_addi(rd, RV_REG_ZERO, imm), ctx);
+}
+
+static inline void emit_lui(u8 rd, s32 imm, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rd && rd != RV_REG_SP && is_6b_int(imm) && imm)
+		emitc(rvc_lui(rd, imm), ctx);
+	else
+		emit(rv_lui(rd, imm), ctx);
+}
+
+static inline void emit_slli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rd && rd == rs && imm && (u32)imm < __riscv_xlen)
+		emitc(rvc_slli(rd, imm), ctx);
+	else
+		emit(rv_slli(rd, rs, imm), ctx);
+}
+
+static inline void emit_andi(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && is_creg(rd) && rd == rs && is_6b_int(imm))
+		emitc(rvc_andi(rd, imm), ctx);
+	else
+		emit(rv_andi(rd, rs, imm), ctx);
+}
+
+static inline void emit_srli(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen)
+		emitc(rvc_srli(rd, imm), ctx);
+	else
+		emit(rv_srli(rd, rs, imm), ctx);
+}
+
+static inline void emit_srai(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && is_creg(rd) && rd == rs && imm && (u32)imm < __riscv_xlen)
+		emitc(rvc_srai(rd, imm), ctx);
+	else
+		emit(rv_srai(rd, rs, imm), ctx);
+}
+
+static inline void emit_sub(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+		emitc(rvc_sub(rd, rs2), ctx);
+	else
+		emit(rv_sub(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_or(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+		emitc(rvc_or(rd, rs2), ctx);
+	else
+		emit(rv_or(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_and(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+		emitc(rvc_and(rd, rs2), ctx);
+	else
+		emit(rv_and(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_xor(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+		emitc(rvc_xor(rd, rs2), ctx);
+	else
+		emit(rv_xor(rd, rs1, rs2), ctx);
+}
+
+static inline void emit_lw(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_8b_uint(off) && !(off & 0x3))
+		emitc(rvc_lwsp(rd, off), ctx);
+	else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_7b_uint(off) && !(off & 0x3))
+		emitc(rvc_lw(rd, off, rs1), ctx);
+	else
+		emit(rv_lw(rd, off, rs1), ctx);
+}
+
+static inline void emit_sw(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rs1 == RV_REG_SP && is_8b_uint(off) && !(off & 0x3))
+		emitc(rvc_swsp(off, rs2), ctx);
+	else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_7b_uint(off) && !(off & 0x3))
+		emitc(rvc_sw(rs1, off, rs2), ctx);
+	else
+		emit(rv_sw(rs1, off, rs2), ctx);
+}
+
+/* RV64-only helper functions. */
+#if __riscv_xlen == 64
+
+static inline void emit_addiw(u8 rd, u8 rs, s32 imm, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rd && rd == rs && is_6b_int(imm))
+		emitc(rvc_addiw(rd, imm), ctx);
+	else
+		emit(rv_addiw(rd, rs, imm), ctx);
+}
+
+static inline void emit_ld(u8 rd, s32 off, u8 rs1, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rs1 == RV_REG_SP && rd && is_9b_uint(off) && !(off & 0x7))
+		emitc(rvc_ldsp(rd, off), ctx);
+	else if (rvc_enabled() && is_creg(rd) && is_creg(rs1) && is_8b_uint(off) && !(off & 0x7))
+		emitc(rvc_ld(rd, off, rs1), ctx);
+	else
+		emit(rv_ld(rd, off, rs1), ctx);
+}
+
+static inline void emit_sd(u8 rs1, s32 off, u8 rs2, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && rs1 == RV_REG_SP && is_9b_uint(off) && !(off & 0x7))
+		emitc(rvc_sdsp(off, rs2), ctx);
+	else if (rvc_enabled() && is_creg(rs1) && is_creg(rs2) && is_8b_uint(off) && !(off & 0x7))
+		emitc(rvc_sd(rs1, off, rs2), ctx);
+	else
+		emit(rv_sd(rs1, off, rs2), ctx);
+}
+
+static inline void emit_subw(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx)
+{
+	if (rvc_enabled() && is_creg(rd) && rd == rs1 && is_creg(rs2))
+		emitc(rvc_subw(rd, rs2), ctx);
+	else
+		emit(rv_subw(rd, rs1, rs2), ctx);
+}
+
 #endif /* __riscv_xlen == 64 */
 
 void bpf_jit_build_prologue(struct rv_jit_context *ctx);
-- 
cgit v1.2.3


From 18a4d8c97b841632920c16a6fa9216d1214f3db7 Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Mon, 20 Jul 2020 19:52:40 -0700
Subject: bpf, riscv: Use compressed instructions in the rv64 JIT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch uses the RVC support and encodings from bpf_jit.h to optimize
the rv64 jit.

The optimizations work by replacing emit(rv_X(...)) with a call to a
helper function emit_X, which will emit a compressed version of the
instruction when possible, and when RVC is enabled.

The JIT continues to pass all tests in lib/test_bpf.c, and introduces
no new failures to test_verifier; both with and without RVC being enabled.

Most changes are straightforward replacements of emit(rv_X(...), ctx)
with emit_X(..., ctx), with the following exceptions bearing mention;

* Change emit_imm to sign-extend the value in "lower", since the
checks for RVC (and the instructions themselves) treat the value as
signed. Otherwise, small negative immediates will not be recognized as
encodable using an RVC instruction. For example, without this change,
emit_imm(rd, -1, ctx) would cause lower to become 4095, which is not a
6b int even though a "c.li rd, -1" instruction suffices.

* For {BPF_MOV,BPF_ADD} BPF_X, drop using addiw,addw in the 32-bit
cases since the values are zero-extended into the upper 32 bits in
the following instructions anyways, and the addition commutes with
zero-extension. (BPF_SUB BPF_X must still use subw since subtraction
does not commute with zero-extension.)

This patch avoids optimizing branches and jumps to use RVC instructions
since surrounding code often makes assumptions about the sizes of
emitted instructions. Optimizing these will require changing these
functions (e.g., emit_branch) to dynamically compute jump offsets.

The following are examples of the JITed code for the verifier selftest
"direct packet read test#3 for CGROUP_SKB OK", without and with RVC
enabled, respectively. The former uses 178 bytes, and the latter uses 112,
for a ~37% reduction in code size for this example.

Without RVC:

   0: 02000813    addi  a6,zero,32
   4: fd010113    addi  sp,sp,-48
   8: 02813423    sd    s0,40(sp)
   c: 02913023    sd    s1,32(sp)
  10: 01213c23    sd    s2,24(sp)
  14: 01313823    sd    s3,16(sp)
  18: 01413423    sd    s4,8(sp)
  1c: 03010413    addi  s0,sp,48
  20: 03056683    lwu   a3,48(a0)
  24: 02069693    slli  a3,a3,0x20
  28: 0206d693    srli  a3,a3,0x20
  2c: 03456703    lwu   a4,52(a0)
  30: 02071713    slli  a4,a4,0x20
  34: 02075713    srli  a4,a4,0x20
  38: 03856483    lwu   s1,56(a0)
  3c: 02049493    slli  s1,s1,0x20
  40: 0204d493    srli  s1,s1,0x20
  44: 03c56903    lwu   s2,60(a0)
  48: 02091913    slli  s2,s2,0x20
  4c: 02095913    srli  s2,s2,0x20
  50: 04056983    lwu   s3,64(a0)
  54: 02099993    slli  s3,s3,0x20
  58: 0209d993    srli  s3,s3,0x20
  5c: 09056a03    lwu   s4,144(a0)
  60: 020a1a13    slli  s4,s4,0x20
  64: 020a5a13    srli  s4,s4,0x20
  68: 00900313    addi  t1,zero,9
  6c: 006a7463    bgeu  s4,t1,0x74
  70: 00000a13    addi  s4,zero,0
  74: 02d52823    sw    a3,48(a0)
  78: 02e52a23    sw    a4,52(a0)
  7c: 02952c23    sw    s1,56(a0)
  80: 03252e23    sw    s2,60(a0)
  84: 05352023    sw    s3,64(a0)
  88: 00000793    addi  a5,zero,0
  8c: 02813403    ld    s0,40(sp)
  90: 02013483    ld    s1,32(sp)
  94: 01813903    ld    s2,24(sp)
  98: 01013983    ld    s3,16(sp)
  9c: 00813a03    ld    s4,8(sp)
  a0: 03010113    addi  sp,sp,48
  a4: 00078513    addi  a0,a5,0
  a8: 00008067    jalr  zero,0(ra)

With RVC:

   0:   02000813    addi    a6,zero,32
   4:   7179        c.addi16sp  sp,-48
   6:   f422        c.sdsp  s0,40(sp)
   8:   f026        c.sdsp  s1,32(sp)
   a:   ec4a        c.sdsp  s2,24(sp)
   c:   e84e        c.sdsp  s3,16(sp)
   e:   e452        c.sdsp  s4,8(sp)
  10:   1800        c.addi4spn  s0,sp,48
  12:   03056683    lwu     a3,48(a0)
  16:   1682        c.slli  a3,0x20
  18:   9281        c.srli  a3,0x20
  1a:   03456703    lwu     a4,52(a0)
  1e:   1702        c.slli  a4,0x20
  20:   9301        c.srli  a4,0x20
  22:   03856483    lwu     s1,56(a0)
  26:   1482        c.slli  s1,0x20
  28:   9081        c.srli  s1,0x20
  2a:   03c56903    lwu     s2,60(a0)
  2e:   1902        c.slli  s2,0x20
  30:   02095913    srli    s2,s2,0x20
  34:   04056983    lwu     s3,64(a0)
  38:   1982        c.slli  s3,0x20
  3a:   0209d993    srli    s3,s3,0x20
  3e:   09056a03    lwu     s4,144(a0)
  42:   1a02        c.slli  s4,0x20
  44:   020a5a13    srli    s4,s4,0x20
  48:   4325        c.li    t1,9
  4a:   006a7363    bgeu    s4,t1,0x50
  4e:   4a01        c.li    s4,0
  50:   d914        c.sw    a3,48(a0)
  52:   d958        c.sw    a4,52(a0)
  54:   dd04        c.sw    s1,56(a0)
  56:   03252e23    sw      s2,60(a0)
  5a:   05352023    sw      s3,64(a0)
  5e:   4781        c.li    a5,0
  60:   7422        c.ldsp  s0,40(sp)
  62:   7482        c.ldsp  s1,32(sp)
  64:   6962        c.ldsp  s2,24(sp)
  66:   69c2        c.ldsp  s3,16(sp)
  68:   6a22        c.ldsp  s4,8(sp)
  6a:   6145        c.addi16sp  sp,48
  6c:   853e        c.mv    a0,a5
  6e:   8082        c.jr    ra

Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Björn Töpel <bjorn.topel@gmail.com>
Link: https://lore.kernel.org/bpf/20200721025241.8077-4-luke.r.nels@gmail.com
---
 arch/riscv/net/bpf_jit_comp64.c | 281 +++++++++++++++++++++-------------------
 1 file changed, 147 insertions(+), 134 deletions(-)

(limited to 'arch')

diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index 55861269da2a..8a56b5293117 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -132,19 +132,23 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
 	 *
 	 * This also means that we need to process LSB to MSB.
 	 */
-	s64 upper = (val + (1 << 11)) >> 12, lower = val & 0xfff;
+	s64 upper = (val + (1 << 11)) >> 12;
+	/* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw,
+	 * and addi are signed and RVC checks will perform signed comparisons.
+	 */
+	s64 lower = ((val & 0xfff) << 52) >> 52;
 	int shift;
 
 	if (is_32b_int(val)) {
 		if (upper)
-			emit(rv_lui(rd, upper), ctx);
+			emit_lui(rd, upper, ctx);
 
 		if (!upper) {
-			emit(rv_addi(rd, RV_REG_ZERO, lower), ctx);
+			emit_li(rd, lower, ctx);
 			return;
 		}
 
-		emit(rv_addiw(rd, rd, lower), ctx);
+		emit_addiw(rd, rd, lower, ctx);
 		return;
 	}
 
@@ -154,9 +158,9 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
 
 	emit_imm(rd, upper, ctx);
 
-	emit(rv_slli(rd, rd, shift), ctx);
+	emit_slli(rd, rd, shift, ctx);
 	if (lower)
-		emit(rv_addi(rd, rd, lower), ctx);
+		emit_addi(rd, rd, lower, ctx);
 }
 
 static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
@@ -164,43 +168,43 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
 	int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
 
 	if (seen_reg(RV_REG_RA, ctx)) {
-		emit(rv_ld(RV_REG_RA, store_offset, RV_REG_SP), ctx);
+		emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx);
 		store_offset -= 8;
 	}
-	emit(rv_ld(RV_REG_FP, store_offset, RV_REG_SP), ctx);
+	emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx);
 	store_offset -= 8;
 	if (seen_reg(RV_REG_S1, ctx)) {
-		emit(rv_ld(RV_REG_S1, store_offset, RV_REG_SP), ctx);
+		emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx);
 		store_offset -= 8;
 	}
 	if (seen_reg(RV_REG_S2, ctx)) {
-		emit(rv_ld(RV_REG_S2, store_offset, RV_REG_SP), ctx);
+		emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx);
 		store_offset -= 8;
 	}
 	if (seen_reg(RV_REG_S3, ctx)) {
-		emit(rv_ld(RV_REG_S3, store_offset, RV_REG_SP), ctx);
+		emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx);
 		store_offset -= 8;
 	}
 	if (seen_reg(RV_REG_S4, ctx)) {
-		emit(rv_ld(RV_REG_S4, store_offset, RV_REG_SP), ctx);
+		emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx);
 		store_offset -= 8;
 	}
 	if (seen_reg(RV_REG_S5, ctx)) {
-		emit(rv_ld(RV_REG_S5, store_offset, RV_REG_SP), ctx);
+		emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx);
 		store_offset -= 8;
 	}
 	if (seen_reg(RV_REG_S6, ctx)) {
-		emit(rv_ld(RV_REG_S6, store_offset, RV_REG_SP), ctx);
+		emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
 		store_offset -= 8;
 	}
 
-	emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
+	emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
 	/* Set return value. */
 	if (!is_tail_call)
-		emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
-	emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
-		     is_tail_call ? 4 : 0), /* skip TCC init */
-	     ctx);
+		emit_mv(RV_REG_A0, RV_REG_A5, ctx);
+	emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
+		  is_tail_call ? 4 : 0, /* skip TCC init */
+		  ctx);
 }
 
 static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
@@ -280,8 +284,8 @@ static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
 
 static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
 {
-	emit(rv_slli(reg, reg, 32), ctx);
-	emit(rv_srli(reg, reg, 32), ctx);
+	emit_slli(reg, reg, 32, ctx);
+	emit_srli(reg, reg, 32, ctx);
 }
 
 static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
@@ -310,7 +314,7 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 	/* if (TCC-- < 0)
 	 *     goto out;
 	 */
-	emit(rv_addi(RV_REG_T1, tcc, -1), ctx);
+	emit_addi(RV_REG_T1, tcc, -1, ctx);
 	off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
 	emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);
 
@@ -318,12 +322,12 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 	 * if (!prog)
 	 *     goto out;
 	 */
-	emit(rv_slli(RV_REG_T2, RV_REG_A2, 3), ctx);
-	emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_A1), ctx);
+	emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx);
+	emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx);
 	off = offsetof(struct bpf_array, ptrs);
 	if (is_12b_check(off, insn))
 		return -1;
-	emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx);
+	emit_ld(RV_REG_T2, off, RV_REG_T2, ctx);
 	off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
 	emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
 
@@ -331,8 +335,8 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 	off = offsetof(struct bpf_prog, bpf_func);
 	if (is_12b_check(off, insn))
 		return -1;
-	emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx);
-	emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx);
+	emit_ld(RV_REG_T3, off, RV_REG_T2, ctx);
+	emit_mv(RV_REG_TCC, RV_REG_T1, ctx);
 	__build_epilogue(true, ctx);
 	return 0;
 }
@@ -360,9 +364,9 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
 
 static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
 {
-	emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
+	emit_mv(RV_REG_T2, *rd, ctx);
 	emit_zext_32(RV_REG_T2, ctx);
-	emit(rv_addi(RV_REG_T1, *rs, 0), ctx);
+	emit_mv(RV_REG_T1, *rs, ctx);
 	emit_zext_32(RV_REG_T1, ctx);
 	*rd = RV_REG_T2;
 	*rs = RV_REG_T1;
@@ -370,15 +374,15 @@ static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
 
 static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
 {
-	emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
-	emit(rv_addiw(RV_REG_T1, *rs, 0), ctx);
+	emit_addiw(RV_REG_T2, *rd, 0, ctx);
+	emit_addiw(RV_REG_T1, *rs, 0, ctx);
 	*rd = RV_REG_T2;
 	*rs = RV_REG_T1;
 }
 
 static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
 {
-	emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
+	emit_mv(RV_REG_T2, *rd, ctx);
 	emit_zext_32(RV_REG_T2, ctx);
 	emit_zext_32(RV_REG_T1, ctx);
 	*rd = RV_REG_T2;
@@ -386,7 +390,7 @@ static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
 
 static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
 {
-	emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
+	emit_addiw(RV_REG_T2, *rd, 0, ctx);
 	*rd = RV_REG_T2;
 }
 
@@ -432,7 +436,7 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
 	if (ret)
 		return ret;
 	rd = bpf_to_rv_reg(BPF_REG_0, ctx);
-	emit(rv_addi(rd, RV_REG_A0, 0), ctx);
+	emit_mv(rd, RV_REG_A0, ctx);
 	return 0;
 }
 
@@ -458,7 +462,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 			emit_zext_32(rd, ctx);
 			break;
 		}
-		emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx);
+		emit_mv(rd, rs, ctx);
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
@@ -466,31 +470,35 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 	/* dst = dst OP src */
 	case BPF_ALU | BPF_ADD | BPF_X:
 	case BPF_ALU64 | BPF_ADD | BPF_X:
-		emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx);
+		emit_add(rd, rd, rs, ctx);
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
 	case BPF_ALU | BPF_SUB | BPF_X:
 	case BPF_ALU64 | BPF_SUB | BPF_X:
-		emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx);
+		if (is64)
+			emit_sub(rd, rd, rs, ctx);
+		else
+			emit_subw(rd, rd, rs, ctx);
+
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
 	case BPF_ALU | BPF_AND | BPF_X:
 	case BPF_ALU64 | BPF_AND | BPF_X:
-		emit(rv_and(rd, rd, rs), ctx);
+		emit_and(rd, rd, rs, ctx);
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
 	case BPF_ALU | BPF_OR | BPF_X:
 	case BPF_ALU64 | BPF_OR | BPF_X:
-		emit(rv_or(rd, rd, rs), ctx);
+		emit_or(rd, rd, rs, ctx);
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
 	case BPF_ALU | BPF_XOR | BPF_X:
 	case BPF_ALU64 | BPF_XOR | BPF_X:
-		emit(rv_xor(rd, rd, rs), ctx);
+		emit_xor(rd, rd, rs, ctx);
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
@@ -534,8 +542,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 	/* dst = -dst */
 	case BPF_ALU | BPF_NEG:
 	case BPF_ALU64 | BPF_NEG:
-		emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) :
-		     rv_subw(rd, RV_REG_ZERO, rd), ctx);
+		emit_sub(rd, RV_REG_ZERO, rd, ctx);
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
@@ -544,8 +551,8 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 	case BPF_ALU | BPF_END | BPF_FROM_LE:
 		switch (imm) {
 		case 16:
-			emit(rv_slli(rd, rd, 48), ctx);
-			emit(rv_srli(rd, rd, 48), ctx);
+			emit_slli(rd, rd, 48, ctx);
+			emit_srli(rd, rd, 48, ctx);
 			break;
 		case 32:
 			if (!aux->verifier_zext)
@@ -558,51 +565,51 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 		break;
 
 	case BPF_ALU | BPF_END | BPF_FROM_BE:
-		emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx);
+		emit_li(RV_REG_T2, 0, ctx);
 
-		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-		emit(rv_srli(rd, rd, 8), ctx);
+		emit_andi(RV_REG_T1, rd, 0xff, ctx);
+		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+		emit_srli(rd, rd, 8, ctx);
 		if (imm == 16)
 			goto out_be;
 
-		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-		emit(rv_srli(rd, rd, 8), ctx);
+		emit_andi(RV_REG_T1, rd, 0xff, ctx);
+		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+		emit_srli(rd, rd, 8, ctx);
 
-		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-		emit(rv_srli(rd, rd, 8), ctx);
+		emit_andi(RV_REG_T1, rd, 0xff, ctx);
+		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+		emit_srli(rd, rd, 8, ctx);
 		if (imm == 32)
 			goto out_be;
 
-		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-		emit(rv_srli(rd, rd, 8), ctx);
-
-		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-		emit(rv_srli(rd, rd, 8), ctx);
-
-		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-		emit(rv_srli(rd, rd, 8), ctx);
-
-		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
-		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
-		emit(rv_srli(rd, rd, 8), ctx);
+		emit_andi(RV_REG_T1, rd, 0xff, ctx);
+		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+		emit_srli(rd, rd, 8, ctx);
+
+		emit_andi(RV_REG_T1, rd, 0xff, ctx);
+		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+		emit_srli(rd, rd, 8, ctx);
+
+		emit_andi(RV_REG_T1, rd, 0xff, ctx);
+		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+		emit_srli(rd, rd, 8, ctx);
+
+		emit_andi(RV_REG_T1, rd, 0xff, ctx);
+		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
+		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
+		emit_srli(rd, rd, 8, ctx);
 out_be:
-		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
-		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
+		emit_andi(RV_REG_T1, rd, 0xff, ctx);
+		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
 
-		emit(rv_addi(rd, RV_REG_T2, 0), ctx);
+		emit_mv(rd, RV_REG_T2, ctx);
 		break;
 
 	/* dst = imm */
@@ -617,12 +624,10 @@ out_be:
 	case BPF_ALU | BPF_ADD | BPF_K:
 	case BPF_ALU64 | BPF_ADD | BPF_K:
 		if (is_12b_int(imm)) {
-			emit(is64 ? rv_addi(rd, rd, imm) :
-			     rv_addiw(rd, rd, imm), ctx);
+			emit_addi(rd, rd, imm, ctx);
 		} else {
 			emit_imm(RV_REG_T1, imm, ctx);
-			emit(is64 ? rv_add(rd, rd, RV_REG_T1) :
-			     rv_addw(rd, rd, RV_REG_T1), ctx);
+			emit_add(rd, rd, RV_REG_T1, ctx);
 		}
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
@@ -630,12 +635,10 @@ out_be:
 	case BPF_ALU | BPF_SUB | BPF_K:
 	case BPF_ALU64 | BPF_SUB | BPF_K:
 		if (is_12b_int(-imm)) {
-			emit(is64 ? rv_addi(rd, rd, -imm) :
-			     rv_addiw(rd, rd, -imm), ctx);
+			emit_addi(rd, rd, -imm, ctx);
 		} else {
 			emit_imm(RV_REG_T1, imm, ctx);
-			emit(is64 ? rv_sub(rd, rd, RV_REG_T1) :
-			     rv_subw(rd, rd, RV_REG_T1), ctx);
+			emit_sub(rd, rd, RV_REG_T1, ctx);
 		}
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
@@ -643,10 +646,10 @@ out_be:
 	case BPF_ALU | BPF_AND | BPF_K:
 	case BPF_ALU64 | BPF_AND | BPF_K:
 		if (is_12b_int(imm)) {
-			emit(rv_andi(rd, rd, imm), ctx);
+			emit_andi(rd, rd, imm, ctx);
 		} else {
 			emit_imm(RV_REG_T1, imm, ctx);
-			emit(rv_and(rd, rd, RV_REG_T1), ctx);
+			emit_and(rd, rd, RV_REG_T1, ctx);
 		}
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
@@ -657,7 +660,7 @@ out_be:
 			emit(rv_ori(rd, rd, imm), ctx);
 		} else {
 			emit_imm(RV_REG_T1, imm, ctx);
-			emit(rv_or(rd, rd, RV_REG_T1), ctx);
+			emit_or(rd, rd, RV_REG_T1, ctx);
 		}
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
@@ -668,7 +671,7 @@ out_be:
 			emit(rv_xori(rd, rd, imm), ctx);
 		} else {
 			emit_imm(RV_REG_T1, imm, ctx);
-			emit(rv_xor(rd, rd, RV_REG_T1), ctx);
+			emit_xor(rd, rd, RV_REG_T1, ctx);
 		}
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
@@ -699,19 +702,28 @@ out_be:
 		break;
 	case BPF_ALU | BPF_LSH | BPF_K:
 	case BPF_ALU64 | BPF_LSH | BPF_K:
-		emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx);
+		emit_slli(rd, rd, imm, ctx);
+
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
 	case BPF_ALU | BPF_RSH | BPF_K:
 	case BPF_ALU64 | BPF_RSH | BPF_K:
-		emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx);
+		if (is64)
+			emit_srli(rd, rd, imm, ctx);
+		else
+			emit(rv_srliw(rd, rd, imm), ctx);
+
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
 	case BPF_ALU | BPF_ARSH | BPF_K:
 	case BPF_ALU64 | BPF_ARSH | BPF_K:
-		emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx);
+		if (is64)
+			emit_srai(rd, rd, imm, ctx);
+		else
+			emit(rv_sraiw(rd, rd, imm), ctx);
+
 		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
@@ -763,7 +775,7 @@ out_be:
 		if (BPF_OP(code) == BPF_JSET) {
 			/* Adjust f