From 504641859e5c616210c0894149e09fb6928e398f Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 3 Aug 2017 17:23:21 +0100
Subject: arm64: fpsimd: Consistently use __this_cpu_ ops where appropriate

__this_cpu_ ops are not used consistently with regard to this_cpu_
ops in a couple of places in fpsimd.c.

Since preemption is explicitly disabled in
fpsimd_restore_current_state() and fpsimd_update_current_state(),
this patch converts this_cpu_ ops in those functions to __this_cpu_
ops.  This doesn't save cost on arm64, but benefits from additional
assertions in the core code.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 06da8ea16bbe..d7e5f8a2d4f5 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -194,7 +194,7 @@ void fpsimd_restore_current_state(void)
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
 		fpsimd_load_state(st);
-		this_cpu_write(fpsimd_last_state, st);
+		__this_cpu_write(fpsimd_last_state, st);
 		st->cpu = smp_processor_id();
 	}
 	preempt_enable();
@@ -214,7 +214,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
-		this_cpu_write(fpsimd_last_state, st);
+		__this_cpu_write(fpsimd_last_state, st);
 		st->cpu = smp_processor_id();
 	}
 	preempt_enable();
-- 
cgit v1.2.3


From 4328825d4fdc185d365d8e858cace8b324198a70 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 3 Aug 2017 17:23:22 +0100
Subject: arm64: neon: Allow EFI runtime services to use FPSIMD in irq context

In order to be able to cope with kernel-mode NEON being unavailable
in hardirq/nmi context and non-nestable, we need special handling
for EFI runtime service calls that may be made during an interrupt
that interrupted a kernel_neon_begin()..._end() block.  This will
occur if the kernel tries to write diagnostic data to EFI
persistent storage during a panic triggered by an NMI for example.

EFI runtime services specify an ABI that clobbers the FPSIMD state,
rather than being able to use it optionally as an accelerator.
This means that EFI is really a special case and can be handled
specially.

To enable EFI calls from interrupts, this patch creates dedicated
__efi_fpsimd_{begin,end}() helpers solely for this purpose, which
save/restore to a separate percpu buffer if called in a context
where kernel_neon_begin() is not usable.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index d7e5f8a2d4f5..bcde88e2d981 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -21,12 +21,15 @@
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/preempt.h>
 #include <linux/sched/signal.h>
 #include <linux/signal.h>
 #include <linux/hardirq.h>
 
 #include <asm/fpsimd.h>
 #include <asm/cputype.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
 
 #define FPEXC_IOF	(1 << 0)
 #define FPEXC_DZF	(1 << 1)
@@ -276,6 +279,55 @@ void kernel_neon_end(void)
 }
 EXPORT_SYMBOL(kernel_neon_end);
 
+DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state);
+DEFINE_PER_CPU(bool, efi_fpsimd_state_used);
+
+/*
+ * EFI runtime services support functions
+ *
+ * The ABI for EFI runtime services allows EFI to use FPSIMD during the call.
+ * This means that for EFI (and only for EFI), we have to assume that FPSIMD
+ * is always used rather than being an optional accelerator.
+ *
+ * These functions provide the necessary support for ensuring FPSIMD
+ * save/restore in the contexts from which EFI is used.
+ *
+ * Do not use them for any other purpose -- if tempted to do so, you are
+ * either doing something wrong or you need to propose some refactoring.
+ */
+
+/*
+ * __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call
+ */
+void __efi_fpsimd_begin(void)
+{
+	if (!system_supports_fpsimd())
+		return;
+
+	WARN_ON(preemptible());
+
+	if (may_use_simd())
+		kernel_neon_begin();
+	else {
+		fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state));
+		__this_cpu_write(efi_fpsimd_state_used, true);
+	}
+}
+
+/*
+ * __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call
+ */
+void __efi_fpsimd_end(void)
+{
+	if (!system_supports_fpsimd())
+		return;
+
+	if (__this_cpu_xchg(efi_fpsimd_state_used, false))
+		fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state));
+	else
+		kernel_neon_end();
+}
+
 #endif /* CONFIG_KERNEL_MODE_NEON */
 
 #ifdef CONFIG_CPU_PM
-- 
cgit v1.2.3


From cb84d11e1625aa3a081d898ca2640bf3a9ca0e96 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Thu, 3 Aug 2017 17:23:23 +0100
Subject: arm64: neon: Remove support for nested or hardirq kernel-mode NEON

Support for kernel-mode NEON to be nested and/or used in hardirq
context adds significant complexity, and the benefits may be
marginal.  In practice, kernel-mode NEON is not used in hardirq
context, and is rarely used in softirq context (by certain mac80211
drivers).

This patch implements an arm64 may_use_simd() function to allow
clients to check whether kernel-mode NEON is usable in the current
context, and simplifies kernel_neon_{begin,end}() to handle only
saving of the task FPSIMD state (if any).  Without nesting, there
is no other state to save.

The partial fpsimd save/restore functions become redundant as a
result of these changes, so they are removed too.

The save/restore model is changed to operate directly on
task_struct without additional percpu storage.  This simplifies the
code and saves a bit of memory, but means that softirqs must now be
disabled when manipulating the task fpsimd state from task context:
correspondingly, preempt_{en,dis}sable() calls are upgraded to
local_bh_{en,dis}able() as appropriate.  fpsimd_thread_switch()
already runs with hardirqs disabled and so is already protected
from softirqs.

These changes should make it easier to support kernel-mode NEON in
the presence of the Scalable Vector extension in the future.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry-fpsimd.S |  24 --------
 arch/arm64/kernel/fpsimd.c       | 115 ++++++++++++++++++++++++++-------------
 2 files changed, 78 insertions(+), 61 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index c44a82f146b1..6a27cd6dbfa6 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -41,27 +41,3 @@ ENTRY(fpsimd_load_state)
 	fpsimd_restore x0, 8
 	ret
 ENDPROC(fpsimd_load_state)
-
-#ifdef CONFIG_KERNEL_MODE_NEON
-
-/*
- * Save the bottom n FP registers.
- *
- * x0 - pointer to struct fpsimd_partial_state
- */
-ENTRY(fpsimd_save_partial_state)
-	fpsimd_save_partial x0, 1, 8, 9
-	ret
-ENDPROC(fpsimd_save_partial_state)
-
-/*
- * Load the bottom n FP registers.
- *
- * x0 - pointer to struct fpsimd_partial_state
- */
-ENTRY(fpsimd_load_partial_state)
-	fpsimd_restore_partial x0, 8, 9
-	ret
-ENDPROC(fpsimd_load_partial_state)
-
-#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index bcde88e2d981..138fcfaeadc1 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -17,18 +17,18 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/bottom_half.h>
 #include <linux/cpu.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/percpu.h>
 #include <linux/preempt.h>
 #include <linux/sched/signal.h>
 #include <linux/signal.h>
-#include <linux/hardirq.h>
 
 #include <asm/fpsimd.h>
 #include <asm/cputype.h>
-#include <asm/neon.h>
 #include <asm/simd.h>
 
 #define FPEXC_IOF	(1 << 0)
@@ -65,6 +65,13 @@
  * CPU currently contain the most recent userland FPSIMD state of the current
  * task.
  *
+ * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may
+ * save the task's FPSIMD context back to task_struct from softirq context.
+ * To prevent this from racing with the manipulation of the task's FPSIMD state
+ * from task context and thereby corrupting the state, it is necessary to
+ * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE
+ * flag with local_bh_disable() unless softirqs are already masked.
+ *
  * For a certain task, the sequence may look something like this:
  * - the task gets scheduled in; if both the task's fpsimd_state.cpu field
  *   contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu
@@ -164,9 +171,14 @@ void fpsimd_flush_thread(void)
 {
 	if (!system_supports_fpsimd())
 		return;
+
+	local_bh_disable();
+
 	memset(&current->thread.fpsimd_state, 0, sizeof(struct fpsimd_state));
 	fpsimd_flush_task_state(current);
 	set_thread_flag(TIF_FOREIGN_FPSTATE);
+
+	local_bh_enable();
 }
 
 /*
@@ -177,10 +189,13 @@ void fpsimd_preserve_current_state(void)
 {
 	if (!system_supports_fpsimd())
 		return;
-	preempt_disable();
+
+	local_bh_disable();
+
 	if (!test_thread_flag(TIF_FOREIGN_FPSTATE))
 		fpsimd_save_state(&current->thread.fpsimd_state);
-	preempt_enable();
+
+	local_bh_enable();
 }
 
 /*
@@ -192,7 +207,9 @@ void fpsimd_restore_current_state(void)
 {
 	if (!system_supports_fpsimd())
 		return;
-	preempt_disable();
+
+	local_bh_disable();
+
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
 
@@ -200,7 +217,8 @@ void fpsimd_restore_current_state(void)
 		__this_cpu_write(fpsimd_last_state, st);
 		st->cpu = smp_processor_id();
 	}
-	preempt_enable();
+
+	local_bh_enable();
 }
 
 /*
@@ -212,7 +230,9 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 {
 	if (!system_supports_fpsimd())
 		return;
-	preempt_disable();
+
+	local_bh_disable();
+
 	fpsimd_load_state(state);
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
 		struct fpsimd_state *st = &current->thread.fpsimd_state;
@@ -220,7 +240,8 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
 		__this_cpu_write(fpsimd_last_state, st);
 		st->cpu = smp_processor_id();
 	}
-	preempt_enable();
+
+	local_bh_enable();
 }
 
 /*
@@ -233,49 +254,69 @@ void fpsimd_flush_task_state(struct task_struct *t)
 
 #ifdef CONFIG_KERNEL_MODE_NEON
 
-static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate);
-static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate);
+DEFINE_PER_CPU(bool, kernel_neon_busy);
 
 /*
  * Kernel-side NEON support functions
  */
-void kernel_neon_begin_partial(u32 num_regs)
+
+/*
+ * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling
+ * context
+ *
+ * Must not be called unless may_use_simd() returns true.
+ * Task context in the FPSIMD registers is saved back to memory as necessary.
+ *
+ * A matching call to kernel_neon_end() must be made before returning from the
+ * calling context.
+ *
+ * The caller may freely use the FPSIMD registers until kernel_neon_end() is
+ * called.
+ */
+void kernel_neon_begin(void)
 {
 	if (WARN_ON(!system_supports_fpsimd()))
 		return;
-	if (in_interrupt()) {
-		struct fpsimd_partial_state *s = this_cpu_ptr(
-			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
 
-		BUG_ON(num_regs > 32);
-		fpsimd_save_partial_state(s, roundup(num_regs, 2));
-	} else {
-		/*
-		 * Save the userland FPSIMD state if we have one and if we
-		 * haven't done so already. Clear fpsimd_last_state to indicate
-		 * that there is no longer userland FPSIMD state in the
-		 * registers.
-		 */
-		preempt_disable();
-		if (current->mm &&
-		    !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
-			fpsimd_save_state(&current->thread.fpsimd_state);
-		this_cpu_write(fpsimd_last_state, NULL);
-	}
+	BUG_ON(!may_use_simd());
+
+	local_bh_disable();
+
+	__this_cpu_write(kernel_neon_busy, true);
+
+	/* Save unsaved task fpsimd state, if any: */
+	if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE))
+		fpsimd_save_state(&current->thread.fpsimd_state);
+
+	/* Invalidate any task state remaining in the fpsimd regs: */
+	__this_cpu_write(fpsimd_last_state, NULL);
+
+	preempt_disable();
+
+	local_bh_enable();
 }
-EXPORT_SYMBOL(kernel_neon_begin_partial);
+EXPORT_SYMBOL(kernel_neon_begin);
 
+/*
+ * kernel_neon_end(): give the CPU FPSIMD registers back to the current task
+ *
+ * Must be called from a context in which kernel_neon_begin() was previously
+ * called, with no call to kernel_neon_end() in the meantime.
+ *
+ * The caller must not use the FPSIMD registers after this function is called,
+ * unless kernel_neon_begin() is called again in the meantime.
+ */
 void kernel_neon_end(void)
 {
+	bool busy;
+
 	if (!system_supports_fpsimd())
 		return;
-	if (in_interrupt()) {
-		struct fpsimd_partial_state *s = this_cpu_ptr(
-			in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate);
-		fpsimd_load_partial_state(s);
-	} else {
-		preempt_enable();
-	}
+
+	busy = __this_cpu_xchg(kernel_neon_busy, false);
+	WARN_ON(!busy);	/* No matching kernel_neon_begin()? */
+
+	preempt_enable();
 }
 EXPORT_SYMBOL(kernel_neon_end);
 
-- 
cgit v1.2.3


From 35d0e6fb4d219d64ab3b7cffef7a11a0662140f5 Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Tue, 1 Aug 2017 15:35:53 +0100
Subject: arm64: syscallno is secretly an int, make it official

The upper 32 bits of the syscallno field in thread_struct are
handled inconsistently, being sometimes zero extended and sometimes
sign-extended.  In fact, only the lower 32 bits seem to have any
real significance for the behaviour of the code: it's been OK to
handle the upper bits inconsistently because they don't matter.

Currently, the only place I can find where those bits are
significant is in calling trace_sys_enter(), which may be
unintentional: for example, if a compat tracer attempts to cancel a
syscall by passing -1 to (COMPAT_)PTRACE_SET_SYSCALL at the
syscall-enter-stop, it will be traced as syscall 4294967295
rather than -1 as might be expected (and as occurs for a native
tracer doing the same thing).  Elsewhere, reads of syscallno cast
it to an int or truncate it.

There's also a conspicuous amount of code and casting to bodge
around the fact that although semantically an int, syscallno is
stored as a u64.

Let's not pretend any more.

In order to preserve the stp x instruction that stores the syscall
number in entry.S, this patch special-cases the layout of struct
pt_regs for big endian so that the newly 32-bit syscallno field
maps onto the low bits of the stored value.  This is not beautiful,
but benchmarking of the getpid syscall on Juno suggests indicates a
minor slowdown if the stp is split into an stp x and stp w.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry.S    | 34 +++++++++++++++++-----------------
 arch/arm64/kernel/ptrace.c   |  2 +-
 arch/arm64/kernel/signal.c   |  6 +++---
 arch/arm64/kernel/signal32.c |  2 +-
 arch/arm64/kernel/traps.c    |  2 +-
 5 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b738880350f9..3bf0bd7a2f29 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -142,8 +142,8 @@ alternative_else_nop_endif
 	 * Set syscallno to -1 by default (overridden later if real syscall).
 	 */
 	.if	\el == 0
-	mvn	x21, xzr
-	str	x21, [sp, #S_SYSCALLNO]
+	mvn	w21, wzr
+	str	w21, [sp, #S_SYSCALLNO]
 	.endif
 
 	/*
@@ -290,8 +290,9 @@ alternative_else_nop_endif
  *
  * x7 is reserved for the system call number in 32-bit mode.
  */
-sc_nr	.req	x25		// number of system calls
-scno	.req	x26		// syscall number
+wsc_nr	.req	w25		// number of system calls
+wscno	.req	w26		// syscall number
+xscno	.req	x26		// syscall number (zero-extended)
 stbl	.req	x27		// syscall table pointer
 tsk	.req	x28		// current thread_info
 
@@ -577,8 +578,8 @@ el0_svc_compat:
 	 * AArch32 syscall handling
 	 */
 	adrp	stbl, compat_sys_call_table	// load compat syscall table pointer
-	uxtw	scno, w7			// syscall number in w7 (r7)
-	mov     sc_nr, #__NR_compat_syscalls
+	mov	wscno, w7			// syscall number in w7 (r7)
+	mov     wsc_nr, #__NR_compat_syscalls
 	b	el0_svc_naked
 
 	.align	6
@@ -798,19 +799,19 @@ ENDPROC(ret_from_fork)
 	.align	6
 el0_svc:
 	adrp	stbl, sys_call_table		// load syscall table pointer
-	uxtw	scno, w8			// syscall number in w8
-	mov	sc_nr, #__NR_syscalls
+	mov	wscno, w8			// syscall number in w8
+	mov	wsc_nr, #__NR_syscalls
 el0_svc_naked:					// compat entry point
-	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
+	stp	x0, xscno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
 	enable_dbg_and_irq
 	ct_user_exit 1
 
 	ldr	x16, [tsk, #TSK_TI_FLAGS]	// check for syscall hooks
 	tst	x16, #_TIF_SYSCALL_WORK
 	b.ne	__sys_trace
-	cmp     scno, sc_nr                     // check upper syscall limit
+	cmp     wscno, wsc_nr			// check upper syscall limit
 	b.hs	ni_sys
-	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table
+	ldr	x16, [stbl, xscno, lsl #3]	// address in the syscall table
 	blr	x16				// call sys_* routine
 	b	ret_fast_syscall
 ni_sys:
@@ -824,24 +825,23 @@ ENDPROC(el0_svc)
 	 * switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	mov	w0, #-1				// set default errno for
-	cmp     scno, x0			// user-issued syscall(-1)
+	cmp     wscno, #-1			// user-issued syscall(-1)?
 	b.ne	1f
-	mov	x0, #-ENOSYS
+	mov	x0, #-ENOSYS			// set default errno if so
 	str	x0, [sp, #S_X0]
 1:	mov	x0, sp
 	bl	syscall_trace_enter
 	cmp	w0, #-1				// skip the syscall?
 	b.eq	__sys_trace_return_skipped
-	uxtw	scno, w0			// syscall number (possibly new)
+	mov	wscno, w0			// syscall number (possibly new)
 	mov	x1, sp				// pointer to regs
-	cmp	scno, sc_nr			// check upper syscall limit
+	cmp	wscno, wsc_nr			// check upper syscall limit
 	b.hs	__ni_sys_trace
 	ldp	x0, x1, [sp]			// restore the syscall args
 	ldp	x2, x3, [sp, #S_X2]
 	ldp	x4, x5, [sp, #S_X4]
 	ldp	x6, x7, [sp, #S_X6]
-	ldr	x16, [stbl, scno, lsl #3]	// address in the syscall table
+	ldr	x16, [stbl, xscno, lsl #3]	// address in the syscall table
 	blr	x16				// call sys_* routine
 
 __sys_trace_return:
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 1b38c0150aec..de774805f672 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1363,7 +1363,7 @@ static void tracehook_report_syscall(struct pt_regs *regs,
 	if (dir == PTRACE_SYSCALL_EXIT)
 		tracehook_report_syscall_exit(regs, 0);
 	else if (tracehook_report_syscall_entry(regs))
-		regs->syscallno = ~0UL;
+		regs->syscallno = ~0;
 
 	regs->regs[regno] = saved_reg;
 }
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 089c3747995d..4d04b891c00d 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -387,7 +387,7 @@ static int restore_sigframe(struct pt_regs *regs,
 	/*
 	 * Avoid sys_rt_sigreturn() restarting.
 	 */
-	regs->syscallno = ~0UL;
+	regs->syscallno = ~0;
 
 	err |= !valid_user_regs(&regs->user_regs, current);
 	if (err == 0)
@@ -673,7 +673,7 @@ static void do_signal(struct pt_regs *regs)
 {
 	unsigned long continue_addr = 0, restart_addr = 0;
 	int retval = 0;
-	int syscall = (int)regs->syscallno;
+	int syscall = regs->syscallno;
 	struct ksignal ksig;
 
 	/*
@@ -687,7 +687,7 @@ static void do_signal(struct pt_regs *regs)
 		/*
 		 * Avoid additional syscall restarting via ret_to_user.
 		 */
-		regs->syscallno = ~0UL;
+		regs->syscallno = ~0;
 
 		/*
 		 * Prepare for system call restart. We do this here so that a
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index c747a0fc5d7d..d98ca76cbd39 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -354,7 +354,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
 	/*
 	 * Avoid compat_sys_sigreturn() restarting.
 	 */
-	regs->syscallno = ~0UL;
+	regs->syscallno = ~0;
 
 	err |= !valid_user_regs(&regs->user_regs, current);
 
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 8a62648848e5..0f047e916cee 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -593,7 +593,7 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs)
 
 	if (show_unhandled_signals_ratelimited()) {
 		pr_info("%s[%d]: syscall %d\n", current->comm,
-			task_pid_nr(current), (int)regs->syscallno);
+			task_pid_nr(current), regs->syscallno);
 		dump_instr("", regs);
 		if (user_mode(regs))
 			__show_regs(regs);
-- 
cgit v1.2.3


From 17c28958600928109049a3bcc814b0d5bfb1ff3a Mon Sep 17 00:00:00 2001
From: Dave Martin <Dave.Martin@arm.com>
Date: Tue, 1 Aug 2017 15:35:54 +0100
Subject: arm64: Abstract syscallno manipulation

The -1 "no syscall" value is written in various ways, shared with
the user ABI in some places, and generally obscure.

This patch attempts to make things a little more consistent and
readable by replacing all these uses with a single #define.  A
couple of symbolic helpers are provided to clarify the intent
further.

Because the in-syscall check in do_signal() is changed from >= 0 to
!= NO_SYSCALL by this patch, different behaviour may be observable
if syscallno is set to values less than -1 by a tracer.  However,
this is not different from the behaviour that is already observable
if a tracer sets syscallno to a value >= __NR_(compat_)syscalls.

It appears that this can cause spurious syscall restarting, but
that is not a new behaviour either, and does not appear harmful.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/entry.S    | 10 ++++------
 arch/arm64/kernel/ptrace.c   |  2 +-
 arch/arm64/kernel/signal.c   | 10 +++++-----
 arch/arm64/kernel/signal32.c |  2 +-
 4 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 3bf0bd7a2f29..cace76d17535 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -138,11 +138,9 @@ alternative_else_nop_endif
 
 	stp	x22, x23, [sp, #S_PC]
 
-	/*
-	 * Set syscallno to -1 by default (overridden later if real syscall).
-	 */
+	/* Not in a syscall by default (el0_svc overwrites for real syscall) */
 	.if	\el == 0
-	mvn	w21, wzr
+	mov	w21, #NO_SYSCALL
 	str	w21, [sp, #S_SYSCALLNO]
 	.endif
 
@@ -825,13 +823,13 @@ ENDPROC(el0_svc)
 	 * switches, and waiting for our parent to respond.
 	 */
 __sys_trace:
-	cmp     wscno, #-1			// user-issued syscall(-1)?
+	cmp     wscno, #NO_SYSCALL		// user-issued syscall(-1)?
 	b.ne	1f
 	mov	x0, #-ENOSYS			// set default errno if so
 	str	x0, [sp, #S_X0]
 1:	mov	x0, sp
 	bl	syscall_trace_enter
-	cmp	w0, #-1				// skip the syscall?
+	cmp	w0, #NO_SYSCALL			// skip the syscall?
 	b.eq	__sys_trace_return_skipped
 	mov	wscno, w0			// syscall number (possibly new)
 	mov	x1, sp				// pointer to regs
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index de774805f672..28619b5b6746 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1363,7 +1363,7 @@ static void tracehook_report_syscall(struct pt_regs *regs,
 	if (dir == PTRACE_SYSCALL_EXIT)
 		tracehook_report_syscall_exit(regs, 0);
 	else if (tracehook_report_syscall_entry(regs))
-		regs->syscallno = ~0;
+		forget_syscall(regs);
 
 	regs->regs[regno] = saved_reg;
 }
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 4d04b891c00d..4991e87f80cc 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -36,6 +36,7 @@
 #include <asm/ucontext.h>
 #include <asm/unistd.h>
 #include <asm/fpsimd.h>
+#include <asm/ptrace.h>
 #include <asm/signal32.h>
 #include <asm/vdso.h>
 
@@ -387,7 +388,7 @@ static int restore_sigframe(struct pt_regs *regs,
 	/*
 	 * Avoid sys_rt_sigreturn() restarting.
 	 */
-	regs->syscallno = ~0;
+	forget_syscall(regs);
 
 	err |= !valid_user_regs(&regs->user_regs, current);
 	if (err == 0)
@@ -673,13 +674,12 @@ static void do_signal(struct pt_regs *regs)
 {
 	unsigned long continue_addr = 0, restart_addr = 0;
 	int retval = 0;
-	int syscall = regs->syscallno;
 	struct ksignal ksig;
 
 	/*
 	 * If we were from a system call, check for system call restarting...
 	 */
-	if (syscall >= 0) {
+	if (in_syscall(regs)) {
 		continue_addr = regs->pc;
 		restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4);
 		retval = regs->regs[0];
@@ -687,7 +687,7 @@ static void do_signal(struct pt_regs *regs)
 		/*
 		 * Avoid additional syscall restarting via ret_to_user.
 		 */
-		regs->syscallno = ~0;
+		forget_syscall(regs);
 
 		/*
 		 * Prepare for system call restart. We do this here so that a
@@ -731,7 +731,7 @@ static void do_signal(struct pt_regs *regs)
 	 * Handle restarting a different system call. As above, if a debugger
 	 * has chosen to restart at a different PC, ignore the restart.
 	 */
-	if (syscall >= 0 && regs->pc == restart_addr) {
+	if (in_syscall(regs) && regs->pc == restart_addr) {
 		if (retval == -ERESTART_RESTARTBLOCK)
 			setup_restart_syscall(regs);
 		user_rewind_single_step(current);
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index d98ca76cbd39..4e5a664be04b 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -354,7 +354,7 @@ static int compat_restore_sigframe(struct pt_regs *regs,
 	/*
 	 * Avoid compat_sys_sigreturn() restarting.
 	 */
-	regs->syscallno = ~0;
+	forget_syscall(regs);
 
 	err |= !valid_user_regs(&regs->user_regs, current);
 
-- 
cgit v1.2.3


From 11cefd5ac25f242349994140a3bce3a20db0c751 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 7 Aug 2017 12:36:35 +0100
Subject: arm64: neon: Export kernel_neon_busy to loadable modules

may_use_simd() can be invoked from loadable modules and it accesses
kernel_neon_busy. Make sure that the latter is exported.

Fixes: cb84d11e1625 ("arm64: neon: Remove support for nested or hardirq kernel-mode NEON")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/fpsimd.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 138fcfaeadc1..9da4e636b328 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -255,6 +255,7 @@ void fpsimd_flush_task_state(struct task_struct *t)
 #ifdef CONFIG_KERNEL_MODE_NEON
 
 DEFINE_PER_CPU(bool, kernel_neon_busy);
+EXPORT_PER_CPU_SYMBOL(kernel_neon_busy);
 
 /*
  * Kernel-side NEON support functions
-- 
cgit v1.2.3


From 1031a1592908ccd3240f4a5731c96c382c932310 Mon Sep 17 00:00:00 2001
From: Pratyush Anand <panand@redhat.com>
Date: Sat, 1 Jul 2017 12:03:35 +0530
Subject: arm64: perf: Allow more than one cycle counter to be used

Currently:
$ perf stat -e cycles:u -e cycles:k  true

 Performance counter stats for 'true':

          2,24,699      cycles:u
     <not counted>      cycles:k	(0.00%)

       0.000788087 seconds time elapsed

We can not count more than one cycle counter in one instance,because we
allow to map cycle counter into PMCCNTR_EL0 only. However, if I did not
miss anything then specification do not prohibit to use PMEVCNTR<n>_EL0
for cycle count as well.

Modify the code so that it still prefers to use PMCCNTR_EL0 for cycle
counter, however allow to use PMEVCNTR<n>_EL0 if PMCCNTR_EL0 is already
in use.

After this patch:

$ perf stat -e cycles:u -e cycles:k   true

 Performance counter stats for 'true':

          2,17,310      cycles:u
          7,40,009      cycles:k

       0.000764149 seconds time elapsed

Signed-off-by: Pratyush Anand <panand@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/perf_event.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index b5798ba21189..372317667773 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -846,17 +846,14 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc,
 	struct hw_perf_event *hwc = &event->hw;
 	unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT;
 
-	/* Always place a cycle counter into the cycle counter. */
+	/* Always prefer to place a cycle counter into the cycle counter. */
 	if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) {
-		if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
-			return -EAGAIN;
-
-		return ARMV8_IDX_CYCLE_COUNTER;
+		if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask))
+			return ARMV8_IDX_CYCLE_COUNTER;
 	}
 
 	/*
-	 * For anything other than a cycle counter, try and use
-	 * the events counters
+	 * Otherwise use events counters
 	 */
 	for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) {
 		if (!test_and_set_bit(idx, cpuc->used_mask))
-- 
cgit v1.2.3


From 2d0e751a4789fc5ab4a5c9de5d6407b41fdfbbf0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 26 Jul 2017 11:14:53 +0100
Subject: arm64: consistently use bl for C exception entry

In most cases, our exception entry assembly branches to C handlers with
a BL instruction, but in cases where we do not expect to return, we use
B instead.

While this is correct today, it means that backtraces for fatal
exceptions miss the entry assembly (as the LR is stale at the point we
call C code), while non-fatal exceptions have the entry assembly in the
LR. In subsequent patches, we will need the LR to be set in these cases
in order to backtrace reliably.

This patch updates these sites to use a BL, ensuring consistency, and
preparing for backtrace rework. An ASM_BUG() is added after each of
these new BLs, which both catches unexpected returns, and ensures that
the LR value doesn't point to another function label.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/entry.S | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b738880350f9..660612a07ec5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -351,7 +351,8 @@ END(vectors)
 	mov	x0, sp
 	mov	x1, #\reason
 	mrs	x2, esr_el1
-	b	bad_mode
+	bl	bad_mode
+	ASM_BUG()
 	.endm
 
 el0_sync_invalid:
@@ -448,14 +449,16 @@ el1_sp_pc:
 	mrs	x0, far_el1
 	enable_dbg
 	mov	x2, sp
-	b	do_sp_pc_abort
+	bl	do_sp_pc_abort
+	ASM_BUG()
 el1_undef:
 	/*
 	 * Undefined instruction
 	 */
 	enable_dbg
 	mov	x0, sp
-	b	do_undefinstr
+	bl	do_undefinstr
+	ASM_BUG()
 el1_dbg:
 	/*
 	 * Debug exception handling
@@ -473,7 +476,8 @@ el1_inv:
 	mov	x0, sp
 	mov	x2, x1
 	mov	x1, #BAD_SYNC
-	b	bad_mode
+	bl	bad_mode
+	ASM_BUG()
 ENDPROC(el1_sync)
 
 	.align	6
-- 
cgit v1.2.3


From ed84b4e9582bdfeffc617589fe17dddfc5fe6672 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 26 Jul 2017 16:05:20 +0100
Subject: arm64: move non-entry code out of .entry.text

Currently, cpu_switch_to and ret_from_fork both live in .entry.text,
though neither form the critical path for an exception entry.

In subsequent patches, we will require that code in .entry.text is part
of the critical path for exception entry, for which we can assume
certain properties (e.g. the presence of exception regs on the stack).

Neither cpu_switch_to nor ret_from_fork will meet these requirements, so
we must move them out of .entry.text. To ensure that neither are kprobed
after being moved out of .entry.text, we must explicitly blacklist them,
requiring a new NOKPROBE() asm helper.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/entry.S | 90 ++++++++++++++++++++++++-----------------------
 1 file changed, 46 insertions(+), 44 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 660612a07ec5..9e126d3d8b53 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -710,38 +710,6 @@ el0_irq_naked:
 	b	ret_to_user
 ENDPROC(el0_irq)
 
-/*
- * Register switch for AArch64. The callee-saved registers need to be saved
- * and restored. On entry:
- *   x0 = previous task_struct (must be preserved across the switch)
- *   x1 = next task_struct
- * Previous and next are guaranteed not to be the same.
- *
- */
-ENTRY(cpu_switch_to)
-	mov	x10, #THREAD_CPU_CONTEXT
-	add	x8, x0, x10
-	mov	x9, sp
-	stp	x19, x20, [x8], #16		// store callee-saved registers
-	stp	x21, x22, [x8], #16
-	stp	x23, x24, [x8], #16
-	stp	x25, x26, [x8], #16
-	stp	x27, x28, [x8], #16
-	stp	x29, x9, [x8], #16
-	str	lr, [x8]
-	add	x8, x1, x10
-	ldp	x19, x20, [x8], #16		// restore callee-saved registers
-	ldp	x21, x22, [x8], #16
-	ldp	x23, x24, [x8], #16
-	ldp	x25, x26, [x8], #16
-	ldp	x27, x28, [x8], #16
-	ldp	x29, x9, [x8], #16
-	ldr	lr, [x8]
-	mov	sp, x9
-	msr	sp_el0, x1
-	ret
-ENDPROC(cpu_switch_to)
-
 /*
  * This is the fast syscall return path.  We do as little as possible here,
  * and this includes saving x0 back into the kernel stack.
@@ -784,18 +752,6 @@ finish_ret_to_user:
 	kernel_exit 0
 ENDPROC(ret_to_user)
 
-/*
- * This is how we return from a fork.
- */
-ENTRY(ret_from_fork)
-	bl	schedule_tail
-	cbz	x19, 1f				// not a kernel thread
-	mov	x0, x20
-	blr	x19
-1:	get_thread_info tsk
-	b	ret_to_user
-ENDPROC(ret_from_fork)
-
 /*
  * SVC handler.
  */
@@ -869,3 +825,49 @@ ENTRY(sys_rt_sigreturn_wrapper)
 	mov	x0, sp
 	b	sys_rt_sigreturn
 ENDPROC(sys_rt_sigreturn_wrapper)
+
+/*
+ * Register switch for AArch64. The callee-saved registers need to be saved
+ * and restored. On entry:
+ *   x0 = previous task_struct (must be preserved across the switch)
+ *   x1 = next task_struct
+ * Previous and next are guaranteed not to be the same.
+ *
+ */
+ENTRY(cpu_switch_to)
+	mov	x10, #THREAD_CPU_CONTEXT
+	add	x8, x0, x10
+	mov	x9, sp
+	stp	x19, x20, [x8], #16		// store callee-saved registers
+	stp	x21, x22, [x8], #16
+	stp	x23, x24, [x8], #16
+	stp	x25, x26, [x8], #16
+	stp	x27, x28, [x8], #16
+	stp	x29, x9, [x8], #16
+	str	lr, [x8]
+	add	x8, x1, x10
+	ldp	x19, x20, [x8], #16		// restore callee-saved registers
+	ldp	x21, x22, [x8], #16
+	ldp	x23, x24, [x8], #16
+	ldp	x25, x26, [x8], #16
+	ldp	x27, x28, [x8], #16
+	ldp	x29, x9, [x8], #16
+	ldr	lr, [x8]
+	mov	sp, x9
+	msr	sp_el0, x1
+	ret
+ENDPROC(cpu_switch_to)
+NOKPROBE(cpu_switch_to)
+
+/*
+ * This is how we return from a fork.
+ */
+ENTRY(ret_from_fork)
+	bl	schedule_tail
+	cbz	x19, 1f				// not a kernel thread
+	mov	x0, x20
+	blr	x19
+1:	get_thread_info tsk
+	b	ret_to_user
+ENDPROC(ret_from_fork)
+NOKPROBE(ret_from_fork)
-- 
cgit v1.2.3


From 096683724cb2eb95fea759a2580996df1039fdd0 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 20 Jul 2017 14:01:01 +0100
Subject: arm64: unwind: avoid percpu indirection for irq stack

Our IRQ_STACK_PTR() and on_irq_stack() helpers both take a cpu argument,
used to generate a percpu address. In all cases, they are passed
{raw_,}smp_processor_id(), so this parameter is redundant.

Since {raw_,}smp_processor_id() use a percpu variable internally, this
approach means we generate a percpu offset to find the current cpu, then
use this to index an array of percpu offsets, which we then use to find
the current CPU's IRQ stack pointer. Thus, most of the work is
redundant.

Instead, we can consistently use raw_cpu_ptr() to generate the CPU's
irq_stack pointer by simply adding the percpu offset to the irq_stack
address, which is simpler in both respects.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/ptrace.c     | 2 +-
 arch/arm64/kernel/stacktrace.c | 4 ++--
 arch/arm64/kernel/traps.c      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 1b38c0150aec..baf0838205c7 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -127,7 +127,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
 {
 	return ((addr & ~(THREAD_SIZE - 1))  ==
 		(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) ||
-		on_irq_stack(addr, raw_smp_processor_id());
+		on_irq_stack(addr);
 }
 
 /**
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 09d37d66b630..6ffb965be641 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -54,13 +54,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	 * non-preemptible context.
 	 */
 	if (tsk == current && !preemptible())
-		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+		irq_stack_ptr = IRQ_STACK_PTR();
 	else
 		irq_stack_ptr = 0;
 
 	low  = frame->sp;
 	/* irq stacks are not THREAD_SIZE aligned */
-	if (on_irq_stack(frame->sp, raw_smp_processor_id()))
+	if (on_irq_stack(frame->sp))
 		high = irq_stack_ptr;
 	else
 		high = ALIGN(low, THREAD_SIZE) - 0x20;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index d48f47080213..5797f5037ec9 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -159,7 +159,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 	 * non-preemptible context.
 	 */
 	if (tsk == current && !preemptible())
-		irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id());
+		irq_stack_ptr = IRQ_STACK_PTR();
 	else
 		irq_stack_ptr = 0;
 
-- 
cgit v1.2.3


From c7365330753c55a061db0a1837a27fd5e44b1408 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 22 Jul 2017 12:48:34 +0100
Subject: arm64: unwind: disregard frame.sp when validating frame pointer

Currently, when unwinding the call stack, we validate the frame pointer
of each frame against frame.sp, whose value is not clearly defined, and
which makes it more difficult to link stack frames together across
different stacks. It is far better to simply check whether the frame
pointer itself points into a valid stack.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/stacktrace.c | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 6ffb965be641..beaf51fb3088 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -42,9 +42,10 @@
  */
 int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 {
-	unsigned long high, low;
 	unsigned long fp = frame->fp;
-	unsigned long irq_stack_ptr;
+
+	if (fp & 0xf)
+		return -EINVAL;
 
 	if (!tsk)
 		tsk = current;
@@ -53,19 +54,8 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	 * Switching between stacks is valid when tracing current and in
 	 * non-preemptible context.
 	 */
-	if (tsk == current && !preemptible())
-		irq_stack_ptr = IRQ_STACK_PTR();
-	else
-		irq_stack_ptr = 0;
-
-	low  = frame->sp;
-	/* irq stacks are not THREAD_SIZE aligned */
-	if (on_irq_stack(frame->sp))
-		high = irq_stack_ptr;
-	else
-		high = ALIGN(low, THREAD_SIZE) - 0x20;
-
-	if (fp < low || fp > high || fp & 0xf)
+	if (!(tsk == current && !preemptible() && on_irq_stack(fp)) &&
+	    !on_task_stack(tsk, fp))
 		return -EINVAL;
 
 	frame->sp = fp + 0x10;
@@ -94,9 +84,9 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 	 * Check the frame->fp we read from the bottom of the irq_stack,
 	 * and the original task stack pointer are both in current->stack.
 	 */
-	if (frame->sp == irq_stack_ptr) {
+	if (frame->sp == IRQ_STACK_PTR()) {
 		struct pt_regs *irq_args;
-		unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+		unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(frame->sp);
 
 		if (object_is_on_stack((void *)orig_sp) &&
 		   object_is_on_stack((void *)frame->fp)) {
-- 
cgit v1.2.3


From 6c833bb9247ed51028279ef7b82ebbbe60d789e3 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 8 Aug 2017 16:58:33 +0100
Subject: arm64: perf: Allow standard PMUv3 events to be extended by the CPU
 type

Rather than continue adding CPU-specific event maps, instead look up by
default in the PMUv3 event map and only fallback to the CPU-specific maps
if either the event isn't described by PMUv3, or it is described but
the PMCEID registers say that it is unsupported by the current CPU.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/perf_event.c | 46 ++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 372317667773..b83f986e7fbf 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -921,7 +921,13 @@ static void armv8pmu_reset(void *info)
 			    ARMV8_PMU_PMCR_LC);
 }
 
-static int armv8_pmuv3_map_event(struct perf_event *event)
+static int __armv8_pmuv3_map_event(struct perf_event *event,
+				   const unsigned (*extra_event_map)
+						  [PERF_COUNT_HW_MAX],
+				   const unsigned (*extra_cache_map)
+						  [PERF_COUNT_HW_CACHE_MAX]
+						  [PERF_COUNT_HW_CACHE_OP_MAX]
+						  [PERF_COUNT_HW_CACHE_RESULT_MAX])
 {
 	int hw_event_id;
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
@@ -929,44 +935,44 @@ static int armv8_pmuv3_map_event(struct perf_event *event)
 	hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map,
 				       &armv8_pmuv3_perf_cache_map,
 				       ARMV8_PMU_EVTYPE_EVENT);
-	if (hw_event_id < 0)
-		return hw_event_id;
 
-	/* disable micro/arch events not supported by this PMU */
-	if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) &&
-		!test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
-			return -EOPNOTSUPP;
+	/* Onl expose micro/arch events supported by this PMU */
+	if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS)
+	    && test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
+		return hw_event_id;
 	}
 
-	return hw_event_id;
+	return armpmu_map_event(event, extra_event_map, extra_cache_map,
+				ARMV8_PMU_EVTYPE_EVENT);
+}
+
+static int armv8_pmuv3_map_event(struct perf_event *event)
+{
+	return __armv8_pmuv3_map_event(event, NULL, NULL);
 }
 
 static int armv8_a53_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_a53_perf_map,
-				&armv8_a53_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, &armv8_a53_perf_map,
+				       &armv8_a53_perf_cache_map);
 }
 
 static int armv8_a57_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_a57_perf_map,
-				&armv8_a57_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, &armv8_a57_perf_map,
+				       &armv8_a57_perf_cache_map);
 }
 
 static int armv8_thunder_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_thunder_perf_map,
-				&armv8_thunder_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, &armv8_thunder_perf_map,
+				       &armv8_thunder_perf_cache_map);
 }
 
 static int armv8_vulcan_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_vulcan_perf_map,
-				&armv8_vulcan_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	return __armv8_pmuv3_map_event(event, &armv8_vulcan_perf_map,
+				       &armv8_vulcan_perf_cache_map);
 }
 
 struct armv8pmu_probe_info {
-- 
cgit v1.2.3


From d46befef4c03fb61a62b3319ff5265aaac7bc465 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 25 Jul 2017 11:55:39 +0100
Subject: arm64: Convert __inval_cache_range() to area-based

__inval_cache_range() is already the odd one out among our data cache
maintenance routines as the only remaining range-based one; as we're
going to want an invalidation routine to call from C code for the pmem
API, let's tweak the prototype and name to bring it in line with the
clean operations, and to make its relationship with __dma_inv_area()
neatly mirror that of __clean_dcache_area_poc() and __dma_clean_area().
The loop clearing the early page tables gets mildly massaged in the
process for the sake of consistency.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/head.S | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 973df7de7bf8..73a0531e0187 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -143,8 +143,8 @@ preserve_boot_args:
 	dmb	sy				// needed before dc ivac with
 						// MMU off
 
-	add	x1, x0, #0x20			// 4 x 8 bytes
-	b	__inval_cache_range		// tail call
+	mov	x1, #0x20			// 4 x 8 bytes
+	b	__inval_dcache_area		// tail call
 ENDPROC(preserve_boot_args)
 
 /*
@@ -221,20 +221,20 @@ __create_page_tables:
 	 * dirty cache lines being evicted.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
-	bl	__inval_cache_range
+	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+	bl	__inval_dcache_area
 
 	/*
 	 * Clear the idmap and swapper page tables.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
+	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
 1:	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
-	cmp	x0, x6
-	b.lo	1b
+	subs	x1, x1, #64
+	b.ne	1b
 
 	mov	x7, SWAPPER_MM_MMUFLAGS
 
@@ -307,9 +307,9 @@ __create_page_tables:
 	 * tables again to remove any speculatively loaded cache lines.
 	 */
 	adrp	x0, idmap_pg_dir
-	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE
+	ldr	x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
 	dmb	sy
-	bl	__inval_cache_range
+	bl	__inval_dcache_area
 
 	ret	x28
 ENDPROC(__create_page_tables)
-- 
cgit v1.2.3


From 7aac405ebb3224037efd56b73d82d181111cdac3 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 25 Jul 2017 11:55:40 +0100
Subject: arm64: Expose DC CVAP to userspace

The ARMv8.2-DCPoP feature introduces persistent memory support to the
architecture, by defining a point of persistence in the memory
hierarchy, and a corresponding cache maintenance operation, DC CVAP.
Expose the support via HWCAP and MRS emulation.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 2 ++
 arch/arm64/kernel/cpuinfo.c    | 1 +
 2 files changed, 3 insertions(+)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9f9e0064c8c1..a2542ef3ff25 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -120,6 +120,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
+	ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_DPB_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
@@ -916,6 +917,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
 	HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
+	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
 	HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index f495ee5049fd..311885962830 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -68,6 +68,7 @@ static const char *const hwcap_str[] = {
 	"jscvt",
 	"fcma",
 	"lrcpc",
+	"dcpop",
 	NULL
 };
 
-- 
cgit v1.2.3


From e1bc5d1b8e0547c258e65dd97a03560f4d69e635 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 25 Jul 2017 11:55:41 +0100
Subject: arm64: Handle trapped DC CVAP

Cache clean to PoP is subject to the same access controls as to PoC, so
if we are trapping userspace cache maintenance with SCTLR_EL1.UCI, we
need to be prepared to handle it. To avoid getting into complicated
fights with binutils about ARMv8.2 options, we'll just cheat and use the
raw SYS instruction rather than the 'proper' DC alias.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/traps.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 0f047e916cee..ccb9727d67b2 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -484,6 +484,9 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 	case ESR_ELx_SYS64_ISS_CRM_DC_CVAC:	/* DC CVAC, gets promoted */
 		__user_cache_maint("dc civac", address, ret);
 		break;
+	case ESR_ELx_SYS64_ISS_CRM_DC_CVAP:	/* DC CVAP */
+		__user_cache_maint("sys 3, c7, c12, 1", address, ret);
+		break;
 	case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC:	/* DC CIVAC */
 		__user_cache_maint("dc civac", address, ret);
 		break;
-- 
cgit v1.2.3


From d50e071fdaa33c1b399c764c44fa1ce879881185 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 25 Jul 2017 11:55:42 +0100
Subject: arm64: Implement pmem API support

Add a clean-to-point-of-persistence cache maintenance helper, and wire
up the basic architectural support for the pmem driver based on it.

Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
[catalin.marinas@arm.com: move arch_*_pmem() functions to arch/arm64/mm/flush.c]
[catalin.marinas@arm.com: change dmb(sy) to dmb(osh)]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a2542ef3ff25..cd52d365d1f0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -889,6 +889,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
 		.min_field_value = 0,
 		.matches = has_no_fpsimd,
 	},
+#ifdef CONFIG_ARM64_PMEM
+	{
+		.desc = "Data cache clean to Point of Persistence",
+		.capability = ARM64_HAS_DCPOP,
+		.def_scope = SCOPE_SYSTEM,
+		.matches = has_cpuid_feature,
+		.sys_reg = SYS_ID_AA64ISAR1_EL1,
+		.field_pos = ID_AA64ISAR1_DPB_SHIFT,
+		.min_field_value = 1,
+	},
+#endif
 	{},
 };
 
-- 
cgit v1.2.3


From 739586951b8abe381a98797a5e27a0a9336333d6 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dsafonov@virtuozzo.com>
Date: Wed, 26 Jul 2017 20:07:37 +0300
Subject: arm64/vdso: Support mremap() for vDSO

vDSO VMA address is saved in mm_context for the purpose of using
restorer from vDSO page to return to userspace after signal handling.

In Checkpoint Restore in Userspace (CRIU) project we place vDSO VMA
on restore back to the place where it was on the dump.
With the exception for x86 (where there is API to map vDSO with
arch_prctl()), we move vDSO inherited from CRIU task to restoree
position by mremap().

CRIU does support arm64 architecture, but kernel doesn't update
context.vdso pointer after mremap(). Which results in translation
fault after signal handling on restored application:
https://github.com/xemul/criu/issues/288

Make vDSO code track the VMA address by supplying .mremap() fops
the same way it's done for x86 and arm32 by:
commit b059a453b1cf ("x86/vdso: Add mremap hook to vm_special_mapping")
commit 280e87e98c09 ("ARM: 8683/1: ARM32: Support mremap() for sigpage/vDSO").

Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@virtuozzo.com>
Cc: Christopher Covington <cov@codeaurora.org>
Reviewed-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/vdso.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index e8f759f764f2..2d419006ad43 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -110,12 +110,27 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
 }
 #endif /* CONFIG_COMPAT */
 
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		struct vm_area_struct *new_vma)
+{
+	unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
+	unsigned long vdso_size = vdso_end - vdso_start;
+
+	if (vdso_size != new_size)
+		return -EINVAL;
+
+	current->mm->context.vdso = (void *)new_vma->vm_start;
+
+	return 0;
+}
+
 static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
 	{
 		.name	= "[vvar]",
 	},
 	{
 		.name	= "[vdso]",
+		.mremap = vdso_mremap,
 	},
 };
 
-- 
cgit v1.2.3


From 7326749801396105aef0ed9229df746ac9e24300 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sat, 22 Jul 2017 18:45:33 +0100
Subject: arm64: unwind: reference pt_regs via embedded stack frame

As it turns out, the unwind code is slightly broken, and probably has
been for a while. The problem is in the dumping of the exception stack,
which is intended to dump the contents of the pt_regs struct at each
level in the call stack where an exception was taken and routed to a
routine marked as __exception (which means its stack frame is right
below the pt_regs struct on the stack).

'Right below the pt_regs struct' is ill defined, though: the unwind
code assigns 'frame pointer + 0x10' to the .sp member of the stackframe
struct at each level, and dump_backtrace() happily dereferences that as
the pt_regs pointer when encountering an __exception routine. However,
the actual size of the stack frame created by this routine (which could
be one of many __exception routines we have in the kernel) is not known,
and so frame.sp is pretty useless to figure out where struct pt_regs
really is.

So it seems the only way to ensure that we can find our struct pt_regs
when walking the stack frames is to put it at a known fixed offset of
the stack frame pointer that is passed to such __exception routines.
The simplest way to do that is to put it inside pt_regs itself, which is
the main change implemented by this patch. As a bonus, doing this allows
us to get rid of a fair amount of cruft related to walking from one stack
to the other, which is especially nice since we intend to introduce yet
another stack for overflow handling once we add support for vmapped
stacks. It also fixes an inconsistency where we only add a stack frame
pointing to ELR_EL1 if we are executing from the IRQ stack but not when
we are executing from the task stack.

To consistly identify exceptions regs even in the presence of exceptions
taken from entry code, we must check whether the next frame was created
by entry text, rather than whether the current frame was crated by
exception text.

To avoid backtracing using PCs that fall in the idmap, or are controlled
by userspace, we must explcitly zero the FP and LR in startup paths, and
must ensure that the frame embedded in pt_regs is zeroed upon entry from
EL0. To avoid these NULL entries showin in the backtrace, unwind_frame()
is updated to avoid them.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[Mark: compare current frame against .entry.text, avoid bogus PCs]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/asm-offsets.c |  1 +
 arch/arm64/kernel/entry.S       | 20 ++++++++++++--------
 arch/arm64/kernel/head.S        |  4 ++++
 arch/arm64/kernel/stacktrace.c  | 33 ++++++---------------------------
 arch/arm64/kernel/traps.c       | 32 +++++++-------------------------
 5 files changed, 30 insertions(+), 60 deletions(-)

(limited to 'arch/arm64/kernel')

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index b3bb7ef97bc8..71bf088f1e4b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -75,6 +75,7 @@ int main(void)
   DEFINE(S_ORIG_X0,		offsetof(struct pt_regs, orig_x0));
   DEFINE(S_SYSCALLNO,		offsetof(struct pt_regs, syscallno));
   DEFINE(S_ORIG_ADDR_LIMIT,	offsetof(struct pt_regs, orig_addr_limit));
+  DEFINE(S_STACKFRAME,		offsetof(struct pt_regs, stackframe));
   DEFINE(S_FRAME_SIZE,		sizeof(struct pt_regs));
   BLANK();
   DEFINE(MM_CONTEXT_ID,		offsetof(struct mm_struct, context.id.counter));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 9e126d3d8b53..612a077ba109 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -111,6 +111,18 @@
 	mrs	x23, spsr_el1
 	stp	lr, x21, [sp, #S_LR]
 
+	/*
+	 * In order to be able to dump the contents of struct pt_regs at the
+	 * time the exception was taken (in case we attempt to walk the call
+	 * stack later), chain it together with the stack frames.
+	 */
+	.if \el == 0
+	stp	xzr, xzr, [sp, #S_STACKFRAME]
+	.else
+	stp	x29, x22, [sp, #S_STACKFRAME]
+	.endif
+	add	x29, sp, #S_STACKFRAME
+
 #ifdef CONFIG_ARM64_SW_TTBR0_PAN
 	/*
 	 * Set the TTBR0 PAN bit in SPSR. When the exception is taken from
@@ -265,14 +277,6 @@ alternative_else_nop_endif
 
 	/* switch to the irq stack */
 	mov	sp, x26
-
-	/*
-	 * Add a dummy stack frame, this non-standard format is fixed up
-	 * by unwind_frame()
-	 */
-	stp     x29, x19, [sp, #-16]!
-	mov	x29, sp
-
 9998:
 	.endm
 
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 973df7de7bf8..f9e4aacf4f42 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -362,6 +362,9 @@ __primary_switched:
 	ret					// to __primary_switch()
 0:
 #endif
+	add	sp, sp, #16
+	mov	x29, #0
+	mov	x30, #0
 	b	start_kernel
 ENDPROC(__primary_switched)
 
@@ -617,6 +620,7 @@ __secondary_switched:
 	ldr	x2, [x0, #CPU_BOOT_TASK]
 	msr	sp_el0, x2
 	mov	x29, #0
+	mov	x30, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
 
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index beaf51fb3088..81d9262acaf0 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -76,34 +76,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 	/*
-	 * Check whether we are going to walk through from interrupt stack
-	 * to task stack.
-	 * If we reach the end of the stack - and its an interrupt stack,
-	 * unpack the dummy frame to find the original elr.
-	 *
-	 * Check the frame->fp we read from the bottom of the irq_stack,
-	 * and the original task stack pointer are both in current->stack.
+	 * Frames created upon entry from EL0 have NULL FP and PC values, so
+	 * don't bother reporting these. Frames created by __noreturn functions
+	 * might have a valid FP even if PC is bogus, so only terminate where
+	 * both are NULL.
 	 */
-	if (frame->sp == IRQ_STACK_PTR()) {
-		struct pt_regs *irq_args;
-		unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(frame->sp);
-
-		if (object_is_on_stack((void *)orig_sp) &&
-		   object_is_on_stack((void *)frame->fp)) {
-			frame->sp = orig_sp;
-
-			/* orig_sp is the saved pt_regs, find the elr */
-			irq_args = (struct pt_regs *)orig_sp;
-			frame->pc = irq_args->pc;
-		} else {
-			/*
-			 * This frame has a non-standard format, and we
-			 * didn't fix it, because the data looked wrong.
-			 * Refuse to output this frame.
-			 */
-			return -EINVAL;
-		}
-	}
+	if (!frame->fp && !frame->pc)
+		return -EINVAL;
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 5797f5037ec9..075c29a24345 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -143,7 +143,6 @@ static void dump_instr(const char *lvl, struct pt_regs *regs)
 void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 {
 	struct stackframe frame;
-	unsigned long irq_stack_ptr;
 	int skip;
 
 	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
@@ -154,15 +153,6 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 	if (!try_get_task_stack(tsk))
 		return;
 
-	/*
-	 * Switching between stacks is valid when tracing current and in
-	 * non-preemptible context.
-	 */
-	if (tsk == current && !preemptible())
-		irq_stack_ptr = IRQ_STACK_PTR();
-	else
-		irq_stack_ptr = 0;
-
 	if (tsk == current) {
 		frame.fp = (unsigned long)__builtin_frame_address(0);
 		frame.sp = current_stack_pointer;
@@ -182,13 +172,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 	skip = !!regs;
 	printk("Call trace:\n");
 	while (1) {
-		unsigned long where = frame.pc;
 		unsigned long stack;
 		int ret;
 
 		/* skip until specified stack frame */
 		if (!skip) {
-			dump_backtrace_entry(where);
+			dump_backtrace_entry(frame.pc);
 		} else if (frame.fp == regs->regs[29]) {
 			skip = 0;
 			/*
@@ -203,20 +192,13 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
 		ret = unwind_frame(tsk, &frame);
 		if (ret < 0)
 			break;
-		stack = frame.sp;
-		if (in_exception_text(where)) {
-			/*
-			 * If we switched to the irq_stack before calling this
-			 * exception handler, then the pt_regs will be on the
-			 * task stack. The easiest way to tell is if the large
-			 * pt_regs would overlap with the end of the irq_stack.
-			 */
-			if (stack < irq_stack_ptr &&
-			    (stack + sizeof(struct pt_regs)) > irq_stack_ptr)
-				stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr);
+		if (in_entry_text(frame.pc)) {
+			stack = frame.fp - offsetof(struct pt_regs, stackframe);
 
-			dump_mem("", "Exception stack", stack,
-				 stack + sizeof(struct pt_regs));
+			if (on_task_stack(tsk, stack) ||
+			    (tsk == current && !preemptible() && on_irq_stack(stack)))
+				dump_mem("", "Exception stack", stack,
+					 stack + sizeof(struct pt_regs));
 		}
 	}
 
-- 
cgit v1.2.3


From 31e43ad3b74a5d7b282023b72f25fc677c14c727 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Sun, 23 Jul 2017 09:05:38 +0100
Subject: arm64: unwind: remove sp from struct stackframe

The unwind code sets the sp member of struct stackframe to
'frame pointer + 0x10' unconditionally, without regard for whether
doing so produces a legal value. So let's simply remove it now that
we have stopped using it anyway.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morse <james.morse@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/perf_callcha