diff options
author | Ingo Molnar <mingo@kernel.org> | 2020-07-26 19:52:16 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2020-07-26 19:52:30 +0200 |
commit | 2d65685a4a6ff6ef6bfe42717222db92c04224c2 (patch) | |
tree | 894aebbf77ce53375e999cd5c6bb34ee5d28bb9f /arch/x86 | |
parent | de0038bfaf53af0e8bc4961b7aacdcb79f43bf08 (diff) | |
parent | fbe0d451bcea569fc0ed3455511a90646c8a9c81 (diff) |
Merge branch 'x86/urgent' into x86/cleanups
Refresh the branch for a dependent commit.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
78 files changed, 556 insertions, 361 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a0cc524882d..883da0abf779 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -67,7 +67,7 @@ config X86 select ARCH_HAS_FILTER_PGPROT select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_HAS_KCOV if X86_64 + select ARCH_HAS_KCOV if X86_64 && STACK_VALIDATION select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_MEMBARRIER_SYNC_CORE select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 7619742f91c9..5a828fde7a42 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -90,8 +90,8 @@ endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o -vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o +efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a # The compressed kernel is built with -fPIC/-fPIE so that a boot loader # can place it anywhere in memory and it will still run. However, since @@ -115,7 +115,7 @@ endef quiet_cmd_check-and-link-vmlinux = LD $@ cmd_check-and-link-vmlinux = $(cmd_check_data_rel); $(cmd_ld) -$(obj)/vmlinux: $(vmlinux-objs-y) FORCE +$(obj)/vmlinux: $(vmlinux-objs-y) $(efi-obj-y) FORCE $(call if_changed,check-and-link-vmlinux) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index e821a7d7d5c4..97d37f0a34f5 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -213,7 +213,6 @@ SYM_FUNC_START(startup_32) * We place all of the values on our mini stack so lret can * used to perform that far jump. */ - pushl $__KERNEL_CS leal startup_64(%ebp), %eax #ifdef CONFIG_EFI_MIXED movl efi32_boot_args(%ebp), %edi @@ -224,11 +223,20 @@ SYM_FUNC_START(startup_32) movl efi32_boot_args+8(%ebp), %edx // saved bootparams pointer cmpl $0, %edx jnz 1f + /* + * efi_pe_entry uses MS calling convention, which requires 32 bytes of + * shadow space on the stack even if all arguments are passed in + * registers. We also need an additional 8 bytes for the space that + * would be occupied by the return address, and this also results in + * the correct stack alignment for entry. + */ + subl $40, %esp leal efi_pe_entry(%ebp), %eax movl %edi, %ecx // MS calling convention movl %esi, %edx 1: #endif + pushl $__KERNEL_CS pushl %eax /* Enter paged protected Mode, activating Long Mode */ @@ -784,6 +792,7 @@ SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) SYM_DATA_START_LOCAL(boot_stack) .fill BOOT_STACK_SIZE, 1, 0 + .balign 16 SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end) /* diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index b7a5790d8d63..08bf95dbc911 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -7,12 +7,20 @@ KASAN_SANITIZE := n UBSAN_SANITIZE := n KCOV_INSTRUMENT := n -CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector-strong -CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector-strong -CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE) -fstack-protector -fstack-protector-strong +CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_syscall_32.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_syscall_x32.o = $(CC_FLAGS_FTRACE) + +CFLAGS_common.o += -fno-stack-protector +CFLAGS_syscall_64.o += -fno-stack-protector +CFLAGS_syscall_32.o += -fno-stack-protector +CFLAGS_syscall_x32.o += -fno-stack-protector CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall_x32.o += $(call cc-option,-Wno-override-init,) + obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index bd3f14175193..f09288431f28 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -45,6 +45,32 @@ #define CREATE_TRACE_POINTS #include <trace/events/syscalls.h> +/* Check that the stack and regs on entry from user mode are sane. */ +static noinstr void check_user_regs(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) { + /* + * Make sure that the entry code gave us a sensible EFLAGS + * register. Native because we want to check the actual CPU + * state, not the interrupt state as imagined by Xen. + */ + unsigned long flags = native_save_fl(); + WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF | + X86_EFLAGS_NT)); + + /* We think we came from user mode. Make sure pt_regs agrees. */ + WARN_ON_ONCE(!user_mode(regs)); + + /* + * All entries from user mode (except #DF) should be on the + * normal thread stack and should have user pt_regs in the + * correct location. + */ + WARN_ON_ONCE(!on_thread_stack()); + WARN_ON_ONCE(regs != task_pt_regs(current)); + } +} + #ifdef CONFIG_CONTEXT_TRACKING /** * enter_from_user_mode - Establish state when coming from user mode @@ -127,9 +153,6 @@ static long syscall_trace_enter(struct pt_regs *regs) unsigned long ret = 0; u32 work; - if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) - BUG_ON(regs != task_pt_regs(current)); - work = READ_ONCE(ti->flags); if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) { @@ -271,7 +294,7 @@ static void __prepare_exit_to_usermode(struct pt_regs *regs) #endif } -__visible noinstr void prepare_exit_to_usermode(struct pt_regs *regs) +static noinstr void prepare_exit_to_usermode(struct pt_regs *regs) { instrumentation_begin(); __prepare_exit_to_usermode(regs); @@ -346,6 +369,8 @@ __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) { struct thread_info *ti; + check_user_regs(regs); + enter_from_user_mode(); instrumentation_begin(); @@ -409,6 +434,8 @@ static void do_syscall_32_irqs_on(struct pt_regs *regs) /* Handles int $0x80 */ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { + check_user_regs(regs); + enter_from_user_mode(); instrumentation_begin(); @@ -460,6 +487,8 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs) vdso_image_32.sym_int80_landing_pad; bool success; + check_user_regs(regs); + /* * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward * so that 'regs->ip -= 2' lands back on an int $0x80 instruction. @@ -510,6 +539,18 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs) (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0; #endif } + +/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ +__visible noinstr long do_SYSENTER_32(struct pt_regs *regs) +{ + /* SYSENTER loses RSP, but the vDSO saved it in RBP. */ + regs->sp = regs->bp; + + /* SYSENTER clobbers EFLAGS.IF. Assume it was set in usermode. */ + regs->flags |= X86_EFLAGS_IF; + + return do_fast_syscall_32(regs); +} #endif SYSCALL_DEFINE0(ni_syscall) @@ -553,6 +594,7 @@ SYSCALL_DEFINE0(ni_syscall) bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs) { if (user_mode(regs)) { + check_user_regs(regs); enter_from_user_mode(); return false; } @@ -686,6 +728,7 @@ void noinstr idtentry_exit_cond_rcu(struct pt_regs *regs, bool rcu_exit) */ void noinstr idtentry_enter_user(struct pt_regs *regs) { + check_user_regs(regs); enter_from_user_mode(); } diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 024d7d276cd4..2d0bd5d5f032 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -933,9 +933,8 @@ SYM_FUNC_START(entry_SYSENTER_32) .Lsysenter_past_esp: pushl $__USER_DS /* pt_regs->ss */ - pushl %ebp /* pt_regs->sp (stashed in bp) */ + pushl $0 /* pt_regs->sp (placeholder) */ pushfl /* pt_regs->flags (except IF = 0) */ - orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ pushl $__USER_CS /* pt_regs->cs */ pushl $0 /* pt_regs->ip = 0 (placeholder) */ pushl %eax /* pt_regs->orig_ax */ @@ -965,7 +964,7 @@ SYM_FUNC_START(entry_SYSENTER_32) .Lsysenter_flags_fixed: movl %esp, %eax - call do_fast_syscall_32 + call do_SYSENTER_32 /* XEN PV guests always use IRET path */ ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ "jmp .Lsyscall_32_done", X86_FEATURE_XENPV diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0f974ae01e62..541fdaf64045 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -57,29 +57,30 @@ SYM_CODE_START(entry_SYSENTER_compat) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - /* - * User tracing code (ptrace or signal handlers) might assume that - * the saved RAX contains a 32-bit number when we're invoking a 32-bit - * syscall. Just in case the high bits are nonzero, zero-extend - * the syscall number. (This could almost certainly be deleted - * with no ill effects.) - */ - movl %eax, %eax - /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ - pushq %rbp /* pt_regs->sp (stashed in bp) */ + pushq $0 /* pt_regs->sp = 0 (placeholder) */ /* * Push flags. This is nasty. First, interrupts are currently - * off, but we need pt_regs->flags to have IF set. Second, even - * if TF was set when SYSENTER started, it's clear by now. We fix - * that later using TIF_SINGLESTEP. + * off, but we need pt_regs->flags to have IF set. Second, if TS + * was set in usermode, it's still set, and we're singlestepping + * through this code. do_SYSENTER_32() will fix up IF. */ pushfq /* pt_regs->flags (except IF = 0) */ - orl $X86_EFLAGS_IF, (%rsp) /* Fix saved flags */ pushq $__USER32_CS /* pt_regs->cs */ pushq $0 /* pt_regs->ip = 0 (placeholder) */ +SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) + + /* + * User tracing code (ptrace or signal handlers) might assume that + * the saved RAX contains a 32-bit number when we're invoking a 32-bit + * syscall. Just in case the high bits are nonzero, zero-extend + * the syscall number. (This could almost certainly be deleted + * with no ill effects.) + */ + movl %eax, %eax + pushq %rax /* pt_regs->orig_ax */ pushq %rdi /* pt_regs->di */ pushq %rsi /* pt_regs->si */ @@ -135,7 +136,7 @@ SYM_CODE_START(entry_SYSENTER_compat) .Lsysenter_flags_fixed: movq %rsp, %rdi - call do_fast_syscall_32 + call do_SYSENTER_32 /* XEN PV guests always use IRET path */ ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \ "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 12c42eba77ec..9933c0e8e97a 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += core.o probe.o -obj-$(PERF_EVENTS_INTEL_RAPL) += rapl.o +obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += rapl.o obj-y += amd/ obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_CPU_SUP_INTEL) += intel/ diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index a54c6a401581..6035df1b49e1 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -375,7 +375,10 @@ void __init hyperv_init(void) guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); - hv_hypercall_pg = vmalloc_exec(PAGE_SIZE); + hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, + VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, + VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); if (hv_hypercall_pg == NULL) { wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); goto remove_cpuhp_state; diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 35460fef39b8..0367efdc5b7a 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -201,12 +201,8 @@ arch_test_and_change_bit(long nr, volatile unsigned long *addr) return GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, c, "Ir", nr); } -static __no_kcsan_or_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) +static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) { - /* - * Because this is a plain access, we need to disable KCSAN here to - * avoid double instrumentation via instrumented bitops. - */ return ((1UL << (nr & (BITS_PER_LONG-1))) & (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index fb34ff641e0a..028189575560 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -75,6 +75,12 @@ do { \ unreachable(); \ } while (0) +/* + * This instrumentation_begin() is strictly speaking incorrect; but it + * suppresses the complaints from WARN()s in noinstr code. If such a WARN() + * were to trigger, we'd rather wreck the machine in an attempt to get the + * message out than not know about it. + */ #define __WARN_FLAGS(flags) \ do { \ instrumentation_begin(); \ diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index dd17c2da1af5..da78ccbd493b 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -58,4 +58,9 @@ static inline bool handle_guest_split_lock(unsigned long ip) return false; } #endif +#ifdef CONFIG_IA32_FEAT_CTL +void init_ia32_feat_ctl(struct cpuinfo_x86 *c); +#else +static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {} +#endif #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 6722ffcef2e6..3afa990d756b 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -11,5 +11,23 @@ extern cpumask_var_t cpu_sibling_setup_mask; extern void setup_cpu_local_masks(void); +/* + * NMI and MCE exceptions need cpu_is_offline() _really_ early, + * provide an arch_ special for them to avoid instrumentation. + */ +#if NR_CPUS > 1 +static __always_inline bool arch_cpu_online(int cpu) +{ + return arch_test_bit(cpu, cpumask_bits(cpu_online_mask)); +} +#else +static __always_inline bool arch_cpu_online(int cpu) +{ + return cpu == 0; +} +#endif + +#define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu)) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_CPUMASK_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 42159f45bf9c..845e7481ab77 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -623,6 +623,11 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * MXCSR and XCR definitions: */ +static inline void ldmxcsr(u32 mxcsr) +{ + asm volatile("ldmxcsr %0" :: "m" (mxcsr)); +} + extern unsigned int mxcsr_feature_mask; #define XCR_XFEATURE_ENABLED_MASK 0x00000000 diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index cf51c50eb356..80d3b30d3ee3 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -353,10 +353,6 @@ static __always_inline void __##func(struct pt_regs *regs) #else /* CONFIG_X86_64 */ |