From 504641859e5c616210c0894149e09fb6928e398f Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 3 Aug 2017 17:23:21 +0100 Subject: arm64: fpsimd: Consistently use __this_cpu_ ops where appropriate __this_cpu_ ops are not used consistently with regard to this_cpu_ ops in a couple of places in fpsimd.c. Since preemption is explicitly disabled in fpsimd_restore_current_state() and fpsimd_update_current_state(), this patch converts this_cpu_ ops in those functions to __this_cpu_ ops. This doesn't save cost on arm64, but benefits from additional assertions in the core code. Signed-off-by: Dave Martin Reviewed-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 06da8ea16bbe..d7e5f8a2d4f5 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -194,7 +194,7 @@ void fpsimd_restore_current_state(void) struct fpsimd_state *st = ¤t->thread.fpsimd_state; fpsimd_load_state(st); - this_cpu_write(fpsimd_last_state, st); + __this_cpu_write(fpsimd_last_state, st); st->cpu = smp_processor_id(); } preempt_enable(); @@ -214,7 +214,7 @@ void fpsimd_update_current_state(struct fpsimd_state *state) if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { struct fpsimd_state *st = ¤t->thread.fpsimd_state; - this_cpu_write(fpsimd_last_state, st); + __this_cpu_write(fpsimd_last_state, st); st->cpu = smp_processor_id(); } preempt_enable(); -- cgit v1.2.3 From 4328825d4fdc185d365d8e858cace8b324198a70 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 3 Aug 2017 17:23:22 +0100 Subject: arm64: neon: Allow EFI runtime services to use FPSIMD in irq context In order to be able to cope with kernel-mode NEON being unavailable in hardirq/nmi context and non-nestable, we need special handling for EFI runtime service calls that may be made during an interrupt that interrupted a kernel_neon_begin()..._end() block. This will occur if the kernel tries to write diagnostic data to EFI persistent storage during a panic triggered by an NMI for example. EFI runtime services specify an ABI that clobbers the FPSIMD state, rather than being able to use it optionally as an accelerator. This means that EFI is really a special case and can be handled specially. To enable EFI calls from interrupts, this patch creates dedicated __efi_fpsimd_{begin,end}() helpers solely for this purpose, which save/restore to a separate percpu buffer if called in a context where kernel_neon_begin() is not usable. Signed-off-by: Dave Martin Reviewed-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index d7e5f8a2d4f5..bcde88e2d981 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -21,12 +21,15 @@ #include #include #include +#include #include #include #include #include #include +#include +#include #define FPEXC_IOF (1 << 0) #define FPEXC_DZF (1 << 1) @@ -276,6 +279,55 @@ void kernel_neon_end(void) } EXPORT_SYMBOL(kernel_neon_end); +DEFINE_PER_CPU(struct fpsimd_state, efi_fpsimd_state); +DEFINE_PER_CPU(bool, efi_fpsimd_state_used); + +/* + * EFI runtime services support functions + * + * The ABI for EFI runtime services allows EFI to use FPSIMD during the call. + * This means that for EFI (and only for EFI), we have to assume that FPSIMD + * is always used rather than being an optional accelerator. + * + * These functions provide the necessary support for ensuring FPSIMD + * save/restore in the contexts from which EFI is used. + * + * Do not use them for any other purpose -- if tempted to do so, you are + * either doing something wrong or you need to propose some refactoring. + */ + +/* + * __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call + */ +void __efi_fpsimd_begin(void) +{ + if (!system_supports_fpsimd()) + return; + + WARN_ON(preemptible()); + + if (may_use_simd()) + kernel_neon_begin(); + else { + fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); + __this_cpu_write(efi_fpsimd_state_used, true); + } +} + +/* + * __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call + */ +void __efi_fpsimd_end(void) +{ + if (!system_supports_fpsimd()) + return; + + if (__this_cpu_xchg(efi_fpsimd_state_used, false)) + fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state)); + else + kernel_neon_end(); +} + #endif /* CONFIG_KERNEL_MODE_NEON */ #ifdef CONFIG_CPU_PM -- cgit v1.2.3 From cb84d11e1625aa3a081d898ca2640bf3a9ca0e96 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 3 Aug 2017 17:23:23 +0100 Subject: arm64: neon: Remove support for nested or hardirq kernel-mode NEON Support for kernel-mode NEON to be nested and/or used in hardirq context adds significant complexity, and the benefits may be marginal. In practice, kernel-mode NEON is not used in hardirq context, and is rarely used in softirq context (by certain mac80211 drivers). This patch implements an arm64 may_use_simd() function to allow clients to check whether kernel-mode NEON is usable in the current context, and simplifies kernel_neon_{begin,end}() to handle only saving of the task FPSIMD state (if any). Without nesting, there is no other state to save. The partial fpsimd save/restore functions become redundant as a result of these changes, so they are removed too. The save/restore model is changed to operate directly on task_struct without additional percpu storage. This simplifies the code and saves a bit of memory, but means that softirqs must now be disabled when manipulating the task fpsimd state from task context: correspondingly, preempt_{en,dis}sable() calls are upgraded to local_bh_{en,dis}able() as appropriate. fpsimd_thread_switch() already runs with hardirqs disabled and so is already protected from softirqs. These changes should make it easier to support kernel-mode NEON in the presence of the Scalable Vector extension in the future. Signed-off-by: Dave Martin Reviewed-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry-fpsimd.S | 24 -------- arch/arm64/kernel/fpsimd.c | 115 ++++++++++++++++++++++++++------------- 2 files changed, 78 insertions(+), 61 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index c44a82f146b1..6a27cd6dbfa6 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -41,27 +41,3 @@ ENTRY(fpsimd_load_state) fpsimd_restore x0, 8 ret ENDPROC(fpsimd_load_state) - -#ifdef CONFIG_KERNEL_MODE_NEON - -/* - * Save the bottom n FP registers. - * - * x0 - pointer to struct fpsimd_partial_state - */ -ENTRY(fpsimd_save_partial_state) - fpsimd_save_partial x0, 1, 8, 9 - ret -ENDPROC(fpsimd_save_partial_state) - -/* - * Load the bottom n FP registers. - * - * x0 - pointer to struct fpsimd_partial_state - */ -ENTRY(fpsimd_load_partial_state) - fpsimd_restore_partial x0, 8, 9 - ret -ENDPROC(fpsimd_load_partial_state) - -#endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index bcde88e2d981..138fcfaeadc1 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -17,18 +17,18 @@ * along with this program. If not, see . */ +#include #include #include #include #include +#include #include #include #include -#include #include #include -#include #include #define FPEXC_IOF (1 << 0) @@ -65,6 +65,13 @@ * CPU currently contain the most recent userland FPSIMD state of the current * task. * + * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may + * save the task's FPSIMD context back to task_struct from softirq context. + * To prevent this from racing with the manipulation of the task's FPSIMD state + * from task context and thereby corrupting the state, it is necessary to + * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE + * flag with local_bh_disable() unless softirqs are already masked. + * * For a certain task, the sequence may look something like this: * - the task gets scheduled in; if both the task's fpsimd_state.cpu field * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu @@ -164,9 +171,14 @@ void fpsimd_flush_thread(void) { if (!system_supports_fpsimd()) return; + + local_bh_disable(); + memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); fpsimd_flush_task_state(current); set_thread_flag(TIF_FOREIGN_FPSTATE); + + local_bh_enable(); } /* @@ -177,10 +189,13 @@ void fpsimd_preserve_current_state(void) { if (!system_supports_fpsimd()) return; - preempt_disable(); + + local_bh_disable(); + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); - preempt_enable(); + + local_bh_enable(); } /* @@ -192,7 +207,9 @@ void fpsimd_restore_current_state(void) { if (!system_supports_fpsimd()) return; - preempt_disable(); + + local_bh_disable(); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { struct fpsimd_state *st = ¤t->thread.fpsimd_state; @@ -200,7 +217,8 @@ void fpsimd_restore_current_state(void) __this_cpu_write(fpsimd_last_state, st); st->cpu = smp_processor_id(); } - preempt_enable(); + + local_bh_enable(); } /* @@ -212,7 +230,9 @@ void fpsimd_update_current_state(struct fpsimd_state *state) { if (!system_supports_fpsimd()) return; - preempt_disable(); + + local_bh_disable(); + fpsimd_load_state(state); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { struct fpsimd_state *st = ¤t->thread.fpsimd_state; @@ -220,7 +240,8 @@ void fpsimd_update_current_state(struct fpsimd_state *state) __this_cpu_write(fpsimd_last_state, st); st->cpu = smp_processor_id(); } - preempt_enable(); + + local_bh_enable(); } /* @@ -233,49 +254,69 @@ void fpsimd_flush_task_state(struct task_struct *t) #ifdef CONFIG_KERNEL_MODE_NEON -static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); -static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); +DEFINE_PER_CPU(bool, kernel_neon_busy); /* * Kernel-side NEON support functions */ -void kernel_neon_begin_partial(u32 num_regs) + +/* + * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling + * context + * + * Must not be called unless may_use_simd() returns true. + * Task context in the FPSIMD registers is saved back to memory as necessary. + * + * A matching call to kernel_neon_end() must be made before returning from the + * calling context. + * + * The caller may freely use the FPSIMD registers until kernel_neon_end() is + * called. + */ +void kernel_neon_begin(void) { if (WARN_ON(!system_supports_fpsimd())) return; - if (in_interrupt()) { - struct fpsimd_partial_state *s = this_cpu_ptr( - in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); - BUG_ON(num_regs > 32); - fpsimd_save_partial_state(s, roundup(num_regs, 2)); - } else { - /* - * Save the userland FPSIMD state if we have one and if we - * haven't done so already. Clear fpsimd_last_state to indicate - * that there is no longer userland FPSIMD state in the - * registers. - */ - preempt_disable(); - if (current->mm && - !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) - fpsimd_save_state(¤t->thread.fpsimd_state); - this_cpu_write(fpsimd_last_state, NULL); - } + BUG_ON(!may_use_simd()); + + local_bh_disable(); + + __this_cpu_write(kernel_neon_busy, true); + + /* Save unsaved task fpsimd state, if any: */ + if (current->mm && !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + + /* Invalidate any task state remaining in the fpsimd regs: */ + __this_cpu_write(fpsimd_last_state, NULL); + + preempt_disable(); + + local_bh_enable(); } -EXPORT_SYMBOL(kernel_neon_begin_partial); +EXPORT_SYMBOL(kernel_neon_begin); +/* + * kernel_neon_end(): give the CPU FPSIMD registers back to the current task + * + * Must be called from a context in which kernel_neon_begin() was previously + * called, with no call to kernel_neon_end() in the meantime. + * + * The caller must not use the FPSIMD registers after this function is called, + * unless kernel_neon_begin() is called again in the meantime. + */ void kernel_neon_end(void) { + bool busy; + if (!system_supports_fpsimd()) return; - if (in_interrupt()) { - struct fpsimd_partial_state *s = this_cpu_ptr( - in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); - fpsimd_load_partial_state(s); - } else { - preempt_enable(); - } + + busy = __this_cpu_xchg(kernel_neon_busy, false); + WARN_ON(!busy); /* No matching kernel_neon_begin()? */ + + preempt_enable(); } EXPORT_SYMBOL(kernel_neon_end); -- cgit v1.2.3 From 35d0e6fb4d219d64ab3b7cffef7a11a0662140f5 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 1 Aug 2017 15:35:53 +0100 Subject: arm64: syscallno is secretly an int, make it official The upper 32 bits of the syscallno field in thread_struct are handled inconsistently, being sometimes zero extended and sometimes sign-extended. In fact, only the lower 32 bits seem to have any real significance for the behaviour of the code: it's been OK to handle the upper bits inconsistently because they don't matter. Currently, the only place I can find where those bits are significant is in calling trace_sys_enter(), which may be unintentional: for example, if a compat tracer attempts to cancel a syscall by passing -1 to (COMPAT_)PTRACE_SET_SYSCALL at the syscall-enter-stop, it will be traced as syscall 4294967295 rather than -1 as might be expected (and as occurs for a native tracer doing the same thing). Elsewhere, reads of syscallno cast it to an int or truncate it. There's also a conspicuous amount of code and casting to bodge around the fact that although semantically an int, syscallno is stored as a u64. Let's not pretend any more. In order to preserve the stp x instruction that stores the syscall number in entry.S, this patch special-cases the layout of struct pt_regs for big endian so that the newly 32-bit syscallno field maps onto the low bits of the stored value. This is not beautiful, but benchmarking of the getpid syscall on Juno suggests indicates a minor slowdown if the stp is split into an stp x and stp w. Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 34 +++++++++++++++++----------------- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/signal.c | 6 +++--- arch/arm64/kernel/signal32.c | 2 +- arch/arm64/kernel/traps.c | 2 +- 5 files changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b738880350f9..3bf0bd7a2f29 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -142,8 +142,8 @@ alternative_else_nop_endif * Set syscallno to -1 by default (overridden later if real syscall). */ .if \el == 0 - mvn x21, xzr - str x21, [sp, #S_SYSCALLNO] + mvn w21, wzr + str w21, [sp, #S_SYSCALLNO] .endif /* @@ -290,8 +290,9 @@ alternative_else_nop_endif * * x7 is reserved for the system call number in 32-bit mode. */ -sc_nr .req x25 // number of system calls -scno .req x26 // syscall number +wsc_nr .req w25 // number of system calls +wscno .req w26 // syscall number +xscno .req x26 // syscall number (zero-extended) stbl .req x27 // syscall table pointer tsk .req x28 // current thread_info @@ -577,8 +578,8 @@ el0_svc_compat: * AArch32 syscall handling */ adrp stbl, compat_sys_call_table // load compat syscall table pointer - uxtw scno, w7 // syscall number in w7 (r7) - mov sc_nr, #__NR_compat_syscalls + mov wscno, w7 // syscall number in w7 (r7) + mov wsc_nr, #__NR_compat_syscalls b el0_svc_naked .align 6 @@ -798,19 +799,19 @@ ENDPROC(ret_from_fork) .align 6 el0_svc: adrp stbl, sys_call_table // load syscall table pointer - uxtw scno, w8 // syscall number in w8 - mov sc_nr, #__NR_syscalls + mov wscno, w8 // syscall number in w8 + mov wsc_nr, #__NR_syscalls el0_svc_naked: // compat entry point - stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number + stp x0, xscno, [sp, #S_ORIG_X0] // save the original x0 and syscall number enable_dbg_and_irq ct_user_exit 1 ldr x16, [tsk, #TSK_TI_FLAGS] // check for syscall hooks tst x16, #_TIF_SYSCALL_WORK b.ne __sys_trace - cmp scno, sc_nr // check upper syscall limit + cmp wscno, wsc_nr // check upper syscall limit b.hs ni_sys - ldr x16, [stbl, scno, lsl #3] // address in the syscall table + ldr x16, [stbl, xscno, lsl #3] // address in the syscall table blr x16 // call sys_* routine b ret_fast_syscall ni_sys: @@ -824,24 +825,23 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - mov w0, #-1 // set default errno for - cmp scno, x0 // user-issued syscall(-1) + cmp wscno, #-1 // user-issued syscall(-1)? b.ne 1f - mov x0, #-ENOSYS + mov x0, #-ENOSYS // set default errno if so str x0, [sp, #S_X0] 1: mov x0, sp bl syscall_trace_enter cmp w0, #-1 // skip the syscall? b.eq __sys_trace_return_skipped - uxtw scno, w0 // syscall number (possibly new) + mov wscno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs - cmp scno, sc_nr // check upper syscall limit + cmp wscno, wsc_nr // check upper syscall limit b.hs __ni_sys_trace ldp x0, x1, [sp] // restore the syscall args ldp x2, x3, [sp, #S_X2] ldp x4, x5, [sp, #S_X4] ldp x6, x7, [sp, #S_X6] - ldr x16, [stbl, scno, lsl #3] // address in the syscall table + ldr x16, [stbl, xscno, lsl #3] // address in the syscall table blr x16 // call sys_* routine __sys_trace_return: diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 1b38c0150aec..de774805f672 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1363,7 +1363,7 @@ static void tracehook_report_syscall(struct pt_regs *regs, if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) - regs->syscallno = ~0UL; + regs->syscallno = ~0; regs->regs[regno] = saved_reg; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 089c3747995d..4d04b891c00d 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -387,7 +387,7 @@ static int restore_sigframe(struct pt_regs *regs, /* * Avoid sys_rt_sigreturn() restarting. */ - regs->syscallno = ~0UL; + regs->syscallno = ~0; err |= !valid_user_regs(®s->user_regs, current); if (err == 0) @@ -673,7 +673,7 @@ static void do_signal(struct pt_regs *regs) { unsigned long continue_addr = 0, restart_addr = 0; int retval = 0; - int syscall = (int)regs->syscallno; + int syscall = regs->syscallno; struct ksignal ksig; /* @@ -687,7 +687,7 @@ static void do_signal(struct pt_regs *regs) /* * Avoid additional syscall restarting via ret_to_user. */ - regs->syscallno = ~0UL; + regs->syscallno = ~0; /* * Prepare for system call restart. We do this here so that a diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index c747a0fc5d7d..d98ca76cbd39 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -354,7 +354,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, /* * Avoid compat_sys_sigreturn() restarting. */ - regs->syscallno = ~0UL; + regs->syscallno = ~0; err |= !valid_user_regs(®s->user_regs, current); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8a62648848e5..0f047e916cee 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -593,7 +593,7 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs) if (show_unhandled_signals_ratelimited()) { pr_info("%s[%d]: syscall %d\n", current->comm, - task_pid_nr(current), (int)regs->syscallno); + task_pid_nr(current), regs->syscallno); dump_instr("", regs); if (user_mode(regs)) __show_regs(regs); -- cgit v1.2.3 From 17c28958600928109049a3bcc814b0d5bfb1ff3a Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 1 Aug 2017 15:35:54 +0100 Subject: arm64: Abstract syscallno manipulation The -1 "no syscall" value is written in various ways, shared with the user ABI in some places, and generally obscure. This patch attempts to make things a little more consistent and readable by replacing all these uses with a single #define. A couple of symbolic helpers are provided to clarify the intent further. Because the in-syscall check in do_signal() is changed from >= 0 to != NO_SYSCALL by this patch, different behaviour may be observable if syscallno is set to values less than -1 by a tracer. However, this is not different from the behaviour that is already observable if a tracer sets syscallno to a value >= __NR_(compat_)syscalls. It appears that this can cause spurious syscall restarting, but that is not a new behaviour either, and does not appear harmful. Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 10 ++++------ arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/signal.c | 10 +++++----- arch/arm64/kernel/signal32.c | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 3bf0bd7a2f29..cace76d17535 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -138,11 +138,9 @@ alternative_else_nop_endif stp x22, x23, [sp, #S_PC] - /* - * Set syscallno to -1 by default (overridden later if real syscall). - */ + /* Not in a syscall by default (el0_svc overwrites for real syscall) */ .if \el == 0 - mvn w21, wzr + mov w21, #NO_SYSCALL str w21, [sp, #S_SYSCALLNO] .endif @@ -825,13 +823,13 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - cmp wscno, #-1 // user-issued syscall(-1)? + cmp wscno, #NO_SYSCALL // user-issued syscall(-1)? b.ne 1f mov x0, #-ENOSYS // set default errno if so str x0, [sp, #S_X0] 1: mov x0, sp bl syscall_trace_enter - cmp w0, #-1 // skip the syscall? + cmp w0, #NO_SYSCALL // skip the syscall? b.eq __sys_trace_return_skipped mov wscno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index de774805f672..28619b5b6746 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1363,7 +1363,7 @@ static void tracehook_report_syscall(struct pt_regs *regs, if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) - regs->syscallno = ~0; + forget_syscall(regs); regs->regs[regno] = saved_reg; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 4d04b891c00d..4991e87f80cc 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -387,7 +388,7 @@ static int restore_sigframe(struct pt_regs *regs, /* * Avoid sys_rt_sigreturn() restarting. */ - regs->syscallno = ~0; + forget_syscall(regs); err |= !valid_user_regs(®s->user_regs, current); if (err == 0) @@ -673,13 +674,12 @@ static void do_signal(struct pt_regs *regs) { unsigned long continue_addr = 0, restart_addr = 0; int retval = 0; - int syscall = regs->syscallno; struct ksignal ksig; /* * If we were from a system call, check for system call restarting... */ - if (syscall >= 0) { + if (in_syscall(regs)) { continue_addr = regs->pc; restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4); retval = regs->regs[0]; @@ -687,7 +687,7 @@ static void do_signal(struct pt_regs *regs) /* * Avoid additional syscall restarting via ret_to_user. */ - regs->syscallno = ~0; + forget_syscall(regs); /* * Prepare for system call restart. We do this here so that a @@ -731,7 +731,7 @@ static void do_signal(struct pt_regs *regs) * Handle restarting a different system call. As above, if a debugger * has chosen to restart at a different PC, ignore the restart. */ - if (syscall >= 0 && regs->pc == restart_addr) { + if (in_syscall(regs) && regs->pc == restart_addr) { if (retval == -ERESTART_RESTARTBLOCK) setup_restart_syscall(regs); user_rewind_single_step(current); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index d98ca76cbd39..4e5a664be04b 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -354,7 +354,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, /* * Avoid compat_sys_sigreturn() restarting. */ - regs->syscallno = ~0; + forget_syscall(regs); err |= !valid_user_regs(®s->user_regs, current); -- cgit v1.2.3 From 11cefd5ac25f242349994140a3bce3a20db0c751 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 7 Aug 2017 12:36:35 +0100 Subject: arm64: neon: Export kernel_neon_busy to loadable modules may_use_simd() can be invoked from loadable modules and it accesses kernel_neon_busy. Make sure that the latter is exported. Fixes: cb84d11e1625 ("arm64: neon: Remove support for nested or hardirq kernel-mode NEON") Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 138fcfaeadc1..9da4e636b328 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -255,6 +255,7 @@ void fpsimd_flush_task_state(struct task_struct *t) #ifdef CONFIG_KERNEL_MODE_NEON DEFINE_PER_CPU(bool, kernel_neon_busy); +EXPORT_PER_CPU_SYMBOL(kernel_neon_busy); /* * Kernel-side NEON support functions -- cgit v1.2.3 From 1031a1592908ccd3240f4a5731c96c382c932310 Mon Sep 17 00:00:00 2001 From: Pratyush Anand Date: Sat, 1 Jul 2017 12:03:35 +0530 Subject: arm64: perf: Allow more than one cycle counter to be used Currently: $ perf stat -e cycles:u -e cycles:k true Performance counter stats for 'true': 2,24,699 cycles:u cycles:k (0.00%) 0.000788087 seconds time elapsed We can not count more than one cycle counter in one instance,because we allow to map cycle counter into PMCCNTR_EL0 only. However, if I did not miss anything then specification do not prohibit to use PMEVCNTR_EL0 for cycle count as well. Modify the code so that it still prefers to use PMCCNTR_EL0 for cycle counter, however allow to use PMEVCNTR_EL0 if PMCCNTR_EL0 is already in use. After this patch: $ perf stat -e cycles:u -e cycles:k true Performance counter stats for 'true': 2,17,310 cycles:u 7,40,009 cycles:k 0.000764149 seconds time elapsed Signed-off-by: Pratyush Anand Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index b5798ba21189..372317667773 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -846,17 +846,14 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, struct hw_perf_event *hwc = &event->hw; unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; - /* Always place a cycle counter into the cycle counter. */ + /* Always prefer to place a cycle counter into the cycle counter. */ if (evtype == ARMV8_PMUV3_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV8_IDX_CYCLE_COUNTER; + if (!test_and_set_bit(ARMV8_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return ARMV8_IDX_CYCLE_COUNTER; } /* - * For anything other than a cycle counter, try and use - * the events counters + * Otherwise use events counters */ for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { if (!test_and_set_bit(idx, cpuc->used_mask)) -- cgit v1.2.3 From 2d0e751a4789fc5ab4a5c9de5d6407b41fdfbbf0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 26 Jul 2017 11:14:53 +0100 Subject: arm64: consistently use bl for C exception entry In most cases, our exception entry assembly branches to C handlers with a BL instruction, but in cases where we do not expect to return, we use B instead. While this is correct today, it means that backtraces for fatal exceptions miss the entry assembly (as the LR is stale at the point we call C code), while non-fatal exceptions have the entry assembly in the LR. In subsequent patches, we will need the LR to be set in these cases in order to backtrace reliably. This patch updates these sites to use a BL, ensuring consistency, and preparing for backtrace rework. An ASM_BUG() is added after each of these new BLs, which both catches unexpected returns, and ensures that the LR value doesn't point to another function label. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/kernel/entry.S | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b738880350f9..660612a07ec5 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -351,7 +351,8 @@ END(vectors) mov x0, sp mov x1, #\reason mrs x2, esr_el1 - b bad_mode + bl bad_mode + ASM_BUG() .endm el0_sync_invalid: @@ -448,14 +449,16 @@ el1_sp_pc: mrs x0, far_el1 enable_dbg mov x2, sp - b do_sp_pc_abort + bl do_sp_pc_abort + ASM_BUG() el1_undef: /* * Undefined instruction */ enable_dbg mov x0, sp - b do_undefinstr + bl do_undefinstr + ASM_BUG() el1_dbg: /* * Debug exception handling @@ -473,7 +476,8 @@ el1_inv: mov x0, sp mov x2, x1 mov x1, #BAD_SYNC - b bad_mode + bl bad_mode + ASM_BUG() ENDPROC(el1_sync) .align 6 -- cgit v1.2.3 From ed84b4e9582bdfeffc617589fe17dddfc5fe6672 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 26 Jul 2017 16:05:20 +0100 Subject: arm64: move non-entry code out of .entry.text Currently, cpu_switch_to and ret_from_fork both live in .entry.text, though neither form the critical path for an exception entry. In subsequent patches, we will require that code in .entry.text is part of the critical path for exception entry, for which we can assume certain properties (e.g. the presence of exception regs on the stack). Neither cpu_switch_to nor ret_from_fork will meet these requirements, so we must move them out of .entry.text. To ensure that neither are kprobed after being moved out of .entry.text, we must explicitly blacklist them, requiring a new NOKPROBE() asm helper. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/kernel/entry.S | 90 ++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 44 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 660612a07ec5..9e126d3d8b53 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -710,38 +710,6 @@ el0_irq_naked: b ret_to_user ENDPROC(el0_irq) -/* - * Register switch for AArch64. The callee-saved registers need to be saved - * and restored. On entry: - * x0 = previous task_struct (must be preserved across the switch) - * x1 = next task_struct - * Previous and next are guaranteed not to be the same. - * - */ -ENTRY(cpu_switch_to) - mov x10, #THREAD_CPU_CONTEXT - add x8, x0, x10 - mov x9, sp - stp x19, x20, [x8], #16 // store callee-saved registers - stp x21, x22, [x8], #16 - stp x23, x24, [x8], #16 - stp x25, x26, [x8], #16 - stp x27, x28, [x8], #16 - stp x29, x9, [x8], #16 - str lr, [x8] - add x8, x1, x10 - ldp x19, x20, [x8], #16 // restore callee-saved registers - ldp x21, x22, [x8], #16 - ldp x23, x24, [x8], #16 - ldp x25, x26, [x8], #16 - ldp x27, x28, [x8], #16 - ldp x29, x9, [x8], #16 - ldr lr, [x8] - mov sp, x9 - msr sp_el0, x1 - ret -ENDPROC(cpu_switch_to) - /* * This is the fast syscall return path. We do as little as possible here, * and this includes saving x0 back into the kernel stack. @@ -784,18 +752,6 @@ finish_ret_to_user: kernel_exit 0 ENDPROC(ret_to_user) -/* - * This is how we return from a fork. - */ -ENTRY(ret_from_fork) - bl schedule_tail - cbz x19, 1f // not a kernel thread - mov x0, x20 - blr x19 -1: get_thread_info tsk - b ret_to_user -ENDPROC(ret_from_fork) - /* * SVC handler. */ @@ -869,3 +825,49 @@ ENTRY(sys_rt_sigreturn_wrapper) mov x0, sp b sys_rt_sigreturn ENDPROC(sys_rt_sigreturn_wrapper) + +/* + * Register switch for AArch64. The callee-saved registers need to be saved + * and restored. On entry: + * x0 = previous task_struct (must be preserved across the switch) + * x1 = next task_struct + * Previous and next are guaranteed not to be the same. + * + */ +ENTRY(cpu_switch_to) + mov x10, #THREAD_CPU_CONTEXT + add x8, x0, x10 + mov x9, sp + stp x19, x20, [x8], #16 // store callee-saved registers + stp x21, x22, [x8], #16 + stp x23, x24, [x8], #16 + stp x25, x26, [x8], #16 + stp x27, x28, [x8], #16 + stp x29, x9, [x8], #16 + str lr, [x8] + add x8, x1, x10 + ldp x19, x20, [x8], #16 // restore callee-saved registers + ldp x21, x22, [x8], #16 + ldp x23, x24, [x8], #16 + ldp x25, x26, [x8], #16 + ldp x27, x28, [x8], #16 + ldp x29, x9, [x8], #16 + ldr lr, [x8] + mov sp, x9 + msr sp_el0, x1 + ret +ENDPROC(cpu_switch_to) +NOKPROBE(cpu_switch_to) + +/* + * This is how we return from a fork. + */ +ENTRY(ret_from_fork) + bl schedule_tail + cbz x19, 1f // not a kernel thread + mov x0, x20 + blr x19 +1: get_thread_info tsk + b ret_to_user +ENDPROC(ret_from_fork) +NOKPROBE(ret_from_fork) -- cgit v1.2.3 From 096683724cb2eb95fea759a2580996df1039fdd0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 20 Jul 2017 14:01:01 +0100 Subject: arm64: unwind: avoid percpu indirection for irq stack Our IRQ_STACK_PTR() and on_irq_stack() helpers both take a cpu argument, used to generate a percpu address. In all cases, they are passed {raw_,}smp_processor_id(), so this parameter is redundant. Since {raw_,}smp_processor_id() use a percpu variable internally, this approach means we generate a percpu offset to find the current cpu, then use this to index an array of percpu offsets, which we then use to find the current CPU's IRQ stack pointer. Thus, most of the work is redundant. Instead, we can consistently use raw_cpu_ptr() to generate the CPU's irq_stack pointer by simply adding the percpu offset to the irq_stack address, which is simpler in both respects. Signed-off-by: Mark Rutland Signed-off-by: Ard Biesheuvel Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/stacktrace.c | 4 ++-- arch/arm64/kernel/traps.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 1b38c0150aec..baf0838205c7 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -127,7 +127,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { return ((addr & ~(THREAD_SIZE - 1)) == (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || - on_irq_stack(addr, raw_smp_processor_id()); + on_irq_stack(addr); } /** diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 09d37d66b630..6ffb965be641 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -54,13 +54,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) * non-preemptible context. */ if (tsk == current && !preemptible()) - irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + irq_stack_ptr = IRQ_STACK_PTR(); else irq_stack_ptr = 0; low = frame->sp; /* irq stacks are not THREAD_SIZE aligned */ - if (on_irq_stack(frame->sp, raw_smp_processor_id())) + if (on_irq_stack(frame->sp)) high = irq_stack_ptr; else high = ALIGN(low, THREAD_SIZE) - 0x20; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d48f47080213..5797f5037ec9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -159,7 +159,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) * non-preemptible context. */ if (tsk == current && !preemptible()) - irq_stack_ptr = IRQ_STACK_PTR(smp_processor_id()); + irq_stack_ptr = IRQ_STACK_PTR(); else irq_stack_ptr = 0; -- cgit v1.2.3 From c7365330753c55a061db0a1837a27fd5e44b1408 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 22 Jul 2017 12:48:34 +0100 Subject: arm64: unwind: disregard frame.sp when validating frame pointer Currently, when unwinding the call stack, we validate the frame pointer of each frame against frame.sp, whose value is not clearly defined, and which makes it more difficult to link stack frames together across different stacks. It is far better to simply check whether the frame pointer itself points into a valid stack. Signed-off-by: Ard Biesheuvel Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/kernel/stacktrace.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 6ffb965be641..beaf51fb3088 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -42,9 +42,10 @@ */ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) { - unsigned long high, low; unsigned long fp = frame->fp; - unsigned long irq_stack_ptr; + + if (fp & 0xf) + return -EINVAL; if (!tsk) tsk = current; @@ -53,19 +54,8 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) * Switching between stacks is valid when tracing current and in * non-preemptible context. */ - if (tsk == current && !preemptible()) - irq_stack_ptr = IRQ_STACK_PTR(); - else - irq_stack_ptr = 0; - - low = frame->sp; - /* irq stacks are not THREAD_SIZE aligned */ - if (on_irq_stack(frame->sp)) - high = irq_stack_ptr; - else - high = ALIGN(low, THREAD_SIZE) - 0x20; - - if (fp < low || fp > high || fp & 0xf) + if (!(tsk == current && !preemptible() && on_irq_stack(fp)) && + !on_task_stack(tsk, fp)) return -EINVAL; frame->sp = fp + 0x10; @@ -94,9 +84,9 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) * Check the frame->fp we read from the bottom of the irq_stack, * and the original task stack pointer are both in current->stack. */ - if (frame->sp == irq_stack_ptr) { + if (frame->sp == IRQ_STACK_PTR()) { struct pt_regs *irq_args; - unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(frame->sp); if (object_is_on_stack((void *)orig_sp) && object_is_on_stack((void *)frame->fp)) { -- cgit v1.2.3 From 6c833bb9247ed51028279ef7b82ebbbe60d789e3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 8 Aug 2017 16:58:33 +0100 Subject: arm64: perf: Allow standard PMUv3 events to be extended by the CPU type Rather than continue adding CPU-specific event maps, instead look up by default in the PMUv3 event map and only fallback to the CPU-specific maps if either the event isn't described by PMUv3, or it is described but the PMCEID registers say that it is unsupported by the current CPU. Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 46 ++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 372317667773..b83f986e7fbf 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -921,7 +921,13 @@ static void armv8pmu_reset(void *info) ARMV8_PMU_PMCR_LC); } -static int armv8_pmuv3_map_event(struct perf_event *event) +static int __armv8_pmuv3_map_event(struct perf_event *event, + const unsigned (*extra_event_map) + [PERF_COUNT_HW_MAX], + const unsigned (*extra_cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]) { int hw_event_id; struct arm_pmu *armpmu = to_arm_pmu(event->pmu); @@ -929,44 +935,44 @@ static int armv8_pmuv3_map_event(struct perf_event *event) hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT); - if (hw_event_id < 0) - return hw_event_id; - /* disable micro/arch events not supported by this PMU */ - if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) && - !test_bit(hw_event_id, armpmu->pmceid_bitmap)) { - return -EOPNOTSUPP; + /* Onl expose micro/arch events supported by this PMU */ + if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) + && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { + return hw_event_id; } - return hw_event_id; + return armpmu_map_event(event, extra_event_map, extra_cache_map, + ARMV8_PMU_EVTYPE_EVENT); +} + +static int armv8_pmuv3_map_event(struct perf_event *event) +{ + return __armv8_pmuv3_map_event(event, NULL, NULL); } static int armv8_a53_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_a53_perf_map, - &armv8_a53_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_a53_perf_map, + &armv8_a53_perf_cache_map); } static int armv8_a57_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_a57_perf_map, - &armv8_a57_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_a57_perf_map, + &armv8_a57_perf_cache_map); } static int armv8_thunder_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_thunder_perf_map, - &armv8_thunder_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_thunder_perf_map, + &armv8_thunder_perf_cache_map); } static int armv8_vulcan_map_event(struct perf_event *event) { - return armpmu_map_event(event, &armv8_vulcan_perf_map, - &armv8_vulcan_perf_cache_map, - ARMV8_PMU_EVTYPE_EVENT); + return __armv8_pmuv3_map_event(event, &armv8_vulcan_perf_map, + &armv8_vulcan_perf_cache_map); } struct armv8pmu_probe_info { -- cgit v1.2.3 From d46befef4c03fb61a62b3319ff5265aaac7bc465 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 25 Jul 2017 11:55:39 +0100 Subject: arm64: Convert __inval_cache_range() to area-based __inval_cache_range() is already the odd one out among our data cache maintenance routines as the only remaining range-based one; as we're going to want an invalidation routine to call from C code for the pmem API, let's tweak the prototype and name to bring it in line with the clean operations, and to make its relationship with __dma_inv_area() neatly mirror that of __clean_dcache_area_poc() and __dma_clean_area(). The loop clearing the early page tables gets mildly massaged in the process for the sake of consistency. Reviewed-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 973df7de7bf8..73a0531e0187 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -143,8 +143,8 @@ preserve_boot_args: dmb sy // needed before dc ivac with // MMU off - add x1, x0, #0x20 // 4 x 8 bytes - b __inval_cache_range // tail call + mov x1, #0x20 // 4 x 8 bytes + b __inval_dcache_area // tail call ENDPROC(preserve_boot_args) /* @@ -221,20 +221,20 @@ __create_page_tables: * dirty cache lines being evicted. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE - bl __inval_cache_range + ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) + bl __inval_dcache_area /* * Clear the idmap and swapper page tables. */ adrp x0, idmap_pg_dir - adrp x6, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE + ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) 1: stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 stp xzr, xzr, [x0], #16 - cmp x0, x6 - b.lo 1b + subs x1, x1, #64 + b.ne 1b mov x7, SWAPPER_MM_MMUFLAGS @@ -307,9 +307,9 @@ __create_page_tables: * tables again to remove any speculatively loaded cache lines. */ adrp x0, idmap_pg_dir - adrp x1, swapper_pg_dir + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE + ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE) dmb sy - bl __inval_cache_range + bl __inval_dcache_area ret x28 ENDPROC(__create_page_tables) -- cgit v1.2.3 From 7aac405ebb3224037efd56b73d82d181111cdac3 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 25 Jul 2017 11:55:40 +0100 Subject: arm64: Expose DC CVAP to userspace The ARMv8.2-DCPoP feature introduces persistent memory support to the architecture, by defining a point of persistence in the memory hierarchy, and a corresponding cache maintenance operation, DC CVAP. Expose the support via HWCAP and MRS emulation. Reviewed-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 2 ++ arch/arm64/kernel/cpuinfo.c | 1 + 2 files changed, 3 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9f9e0064c8c1..a2542ef3ff25 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -120,6 +120,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_DPB_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -916,6 +917,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index f495ee5049fd..311885962830 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -68,6 +68,7 @@ static const char *const hwcap_str[] = { "jscvt", "fcma", "lrcpc", + "dcpop", NULL }; -- cgit v1.2.3 From e1bc5d1b8e0547c258e65dd97a03560f4d69e635 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 25 Jul 2017 11:55:41 +0100 Subject: arm64: Handle trapped DC CVAP Cache clean to PoP is subject to the same access controls as to PoC, so if we are trapping userspace cache maintenance with SCTLR_EL1.UCI, we need to be prepared to handle it. To avoid getting into complicated fights with binutils about ARMv8.2 options, we'll just cheat and use the raw SYS instruction rather than the 'proper' DC alias. Reviewed-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0f047e916cee..ccb9727d67b2 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -484,6 +484,9 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) case ESR_ELx_SYS64_ISS_CRM_DC_CVAC: /* DC CVAC, gets promoted */ __user_cache_maint("dc civac", address, ret); break; + case ESR_ELx_SYS64_ISS_CRM_DC_CVAP: /* DC CVAP */ + __user_cache_maint("sys 3, c7, c12, 1", address, ret); + break; case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC: /* DC CIVAC */ __user_cache_maint("dc civac", address, ret); break; -- cgit v1.2.3 From d50e071fdaa33c1b399c764c44fa1ce879881185 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 25 Jul 2017 11:55:42 +0100 Subject: arm64: Implement pmem API support Add a clean-to-point-of-persistence cache maintenance helper, and wire up the basic architectural support for the pmem driver based on it. Reviewed-by: Will Deacon Signed-off-by: Robin Murphy [catalin.marinas@arm.com: move arch_*_pmem() functions to arch/arm64/mm/flush.c] [catalin.marinas@arm.com: change dmb(sy) to dmb(osh)] Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a2542ef3ff25..cd52d365d1f0 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -889,6 +889,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = 0, .matches = has_no_fpsimd, }, +#ifdef CONFIG_ARM64_PMEM + { + .desc = "Data cache clean to Point of Persistence", + .capability = ARM64_HAS_DCPOP, + .def_scope = SCOPE_SYSTEM, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64ISAR1_EL1, + .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .min_field_value = 1, + }, +#endif {}, }; -- cgit v1.2.3 From 739586951b8abe381a98797a5e27a0a9336333d6 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Wed, 26 Jul 2017 20:07:37 +0300 Subject: arm64/vdso: Support mremap() for vDSO vDSO VMA address is saved in mm_context for the purpose of using restorer from vDSO page to return to userspace after signal handling. In Checkpoint Restore in Userspace (CRIU) project we place vDSO VMA on restore back to the place where it was on the dump. With the exception for x86 (where there is API to map vDSO with arch_prctl()), we move vDSO inherited from CRIU task to restoree position by mremap(). CRIU does support arm64 architecture, but kernel doesn't update context.vdso pointer after mremap(). Which results in translation fault after signal handling on restored application: https://github.com/xemul/criu/issues/288 Make vDSO code track the VMA address by supplying .mremap() fops the same way it's done for x86 and arm32 by: commit b059a453b1cf ("x86/vdso: Add mremap hook to vm_special_mapping") commit 280e87e98c09 ("ARM: 8683/1: ARM32: Support mremap() for sigpage/vDSO"). Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Cc: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Christopher Covington Reviewed-by: Will Deacon Signed-off-by: Dmitry Safonov Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index e8f759f764f2..2d419006ad43 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -110,12 +110,27 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) } #endif /* CONFIG_COMPAT */ +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + unsigned long new_size = new_vma->vm_end - new_vma->vm_start; + unsigned long vdso_size = vdso_end - vdso_start; + + if (vdso_size != new_size) + return -EINVAL; + + current->mm->context.vdso = (void *)new_vma->vm_start; + + return 0; +} + static struct vm_special_mapping vdso_spec[2] __ro_after_init = { { .name = "[vvar]", }, { .name = "[vdso]", + .mremap = vdso_mremap, }, }; -- cgit v1.2.3 From 7326749801396105aef0ed9229df746ac9e24300 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 22 Jul 2017 18:45:33 +0100 Subject: arm64: unwind: reference pt_regs via embedded stack frame As it turns out, the unwind code is slightly broken, and probably has been for a while. The problem is in the dumping of the exception stack, which is intended to dump the contents of the pt_regs struct at each level in the call stack where an exception was taken and routed to a routine marked as __exception (which means its stack frame is right below the pt_regs struct on the stack). 'Right below the pt_regs struct' is ill defined, though: the unwind code assigns 'frame pointer + 0x10' to the .sp member of the stackframe struct at each level, and dump_backtrace() happily dereferences that as the pt_regs pointer when encountering an __exception routine. However, the actual size of the stack frame created by this routine (which could be one of many __exception routines we have in the kernel) is not known, and so frame.sp is pretty useless to figure out where struct pt_regs really is. So it seems the only way to ensure that we can find our struct pt_regs when walking the stack frames is to put it at a known fixed offset of the stack frame pointer that is passed to such __exception routines. The simplest way to do that is to put it inside pt_regs itself, which is the main change implemented by this patch. As a bonus, doing this allows us to get rid of a fair amount of cruft related to walking from one stack to the other, which is especially nice since we intend to introduce yet another stack for overflow handling once we add support for vmapped stacks. It also fixes an inconsistency where we only add a stack frame pointing to ELR_EL1 if we are executing from the IRQ stack but not when we are executing from the task stack. To consistly identify exceptions regs even in the presence of exceptions taken from entry code, we must check whether the next frame was created by entry text, rather than whether the current frame was crated by exception text. To avoid backtracing using PCs that fall in the idmap, or are controlled by userspace, we must explcitly zero the FP and LR in startup paths, and must ensure that the frame embedded in pt_regs is zeroed upon entry from EL0. To avoid these NULL entries showin in the backtrace, unwind_frame() is updated to avoid them. Signed-off-by: Ard Biesheuvel [Mark: compare current frame against .entry.text, avoid bogus PCs] Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/entry.S | 20 ++++++++++++-------- arch/arm64/kernel/head.S | 4 ++++ arch/arm64/kernel/stacktrace.c | 33 ++++++--------------------------- arch/arm64/kernel/traps.c | 32 +++++++------------------------- 5 files changed, 30 insertions(+), 60 deletions(-) (limited to 'arch/arm64/kernel') diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index b3bb7ef97bc8..71bf088f1e4b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -75,6 +75,7 @@ int main(void) DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); + DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 9e126d3d8b53..612a077ba109 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -111,6 +111,18 @@ mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] + /* + * In order to be able to dump the contents of struct pt_regs at the + * time the exception was taken (in case we attempt to walk the call + * stack later), chain it together with the stack frames. + */ + .if \el == 0 + stp xzr, xzr, [sp, #S_STACKFRAME] + .else + stp x29, x22, [sp, #S_STACKFRAME] + .endif + add x29, sp, #S_STACKFRAME + #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Set the TTBR0 PAN bit in SPSR. When the exception is taken from @@ -265,14 +277,6 @@ alternative_else_nop_endif /* switch to the irq stack */ mov sp, x26 - - /* - * Add a dummy stack frame, this non-standard format is fixed up - * by unwind_frame() - */ - stp x29, x19, [sp, #-16]! - mov x29, sp - 9998: .endm diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 973df7de7bf8..f9e4aacf4f42 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -362,6 +362,9 @@ __primary_switched: ret // to __primary_switch() 0: #endif + add sp, sp, #16 + mov x29, #0 + mov x30, #0 b start_kernel ENDPROC(__primary_switched) @@ -617,6 +620,7 @@ __secondary_switched: ldr x2, [x0, #CPU_BOOT_TASK] msr sp_el0, x2 mov x29, #0 + mov x30, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index beaf51fb3088..81d9262acaf0 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -76,34 +76,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ /* - * Check whether we are going to walk through from interrupt stack - * to task stack. - * If we reach the end of the stack - and its an interrupt stack, - * unpack the dummy frame to find the original elr. - * - * Check the frame->fp we read from the bottom of the irq_stack, - * and the original task stack pointer are both in current->stack. + * Frames created upon entry from EL0 have NULL FP and PC values, so + * don't bother reporting these. Frames created by __noreturn functions + * might have a valid FP even if PC is bogus, so only terminate where + * both are NULL. */ - if (frame->sp == IRQ_STACK_PTR()) { - struct pt_regs *irq_args; - unsigned long orig_sp = IRQ_STACK_TO_TASK_STACK(frame->sp); - - if (object_is_on_stack((void *)orig_sp) && - object_is_on_stack((void *)frame->fp)) { - frame->sp = orig_sp; - - /* orig_sp is the saved pt_regs, find the elr */ - irq_args = (struct pt_regs *)orig_sp; - frame->pc = irq_args->pc; - } else { - /* - * This frame has a non-standard format, and we - * didn't fix it, because the data looked wrong. - * Refuse to output this frame. - */ - return -EINVAL; - } - } + if (!frame->fp && !frame->pc) + return -EINVAL; return 0; } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 5797f5037ec9..075c29a24345 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -143,7 +143,6 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; - unsigned long irq_stack_ptr; int skip; pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); @@ -154,15 +153,6 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) if (!try_get_task_stack(tsk)) return; - /* - * Switching between stacks is valid when tracing current and in - * non-preemptible context. - */ - if (tsk == current && !preemptible()) - irq_stack_ptr = IRQ_STACK_PTR(); - else - irq_stack_ptr = 0; - if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_stack_pointer; @@ -182,13 +172,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) skip = !!regs; printk("Call trace:\n"); while (1) { - unsigned long where = frame.pc; unsigned long stack; int ret; /* skip until specified stack frame */ if (!skip) { - dump_backtrace_entry(where); + dump_backtrace_entry(frame.pc); } else if (frame.fp == regs->regs[29]) { skip = 0; /* @@ -203,20 +192,13 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) ret = unwind_frame(tsk, &frame); if (ret < 0) break; - stack = frame.sp; - if (in_exception_text(where)) { - /* - * If we switched to the irq_stack before calling this - * exception handler, then the pt_regs will be on the - * task stack. The easiest way to tell is if the large - * pt_regs would overlap with the end of the irq_stack. - */ - if (stack < irq_stack_ptr && - (stack + sizeof(struct pt_regs)) > irq_stack_ptr) - stack = IRQ_STACK_TO_TASK_STACK(irq_stack_ptr); + if (in_entry_text(frame.pc)) { + stack = frame.fp - offsetof(struct pt_regs, stackframe); - dump_mem("", "Exception stack", stack, - stack + sizeof(struct pt_regs)); + if (on_task_stack(tsk, stack) || + (tsk == current && !preemptible() && on_irq_stack(stack))) + dump_mem("", "Exception stack", stack, + stack + sizeof(struct pt_regs)); } } -- cgit v1.2.3 From 31e43ad3b74a5d7b282023b72f25fc677c14c727 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 23 Jul 2017 09:05:38 +0100 Subject: arm64: unwind: remove sp from struct stackframe The unwind code sets the sp member of struct stackframe to 'frame pointer + 0x10' unconditionally, without regard for whether doing so produces a legal value. So let's simply remove it now that we have stopped using it anyway. Signed-off-by: Ard Biesheuvel Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/kernel/perf_callcha