summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/x86/exception-tables.rst2
-rw-r--r--arch/x86/entry/calling.h15
-rw-r--r--arch/x86/entry/entry_32.S145
-rw-r--r--arch/x86/include/asm/frame.h49
-rw-r--r--arch/x86/include/asm/kexec.h17
-rw-r--r--arch/x86/include/asm/ptrace.h17
-rw-r--r--arch/x86/include/asm/special_insns.h37
-rw-r--r--arch/x86/include/asm/stacktrace.h2
-rw-r--r--arch/x86/include/asm/text-patching.h2
-rw-r--r--arch/x86/kernel/alternative.c81
-rw-r--r--arch/x86/kernel/asm-offsets.c1
-rw-r--r--arch/x86/kernel/cpu/common.c20
-rw-r--r--arch/x86/kernel/crash.c8
-rw-r--r--arch/x86/kernel/ftrace.c7
-rw-r--r--arch/x86/kernel/ftrace_32.S78
-rw-r--r--arch/x86/kernel/ftrace_64.S3
-rw-r--r--arch/x86/kernel/kgdb.c8
-rw-r--r--arch/x86/kernel/kprobes/common.h28
-rw-r--r--arch/x86/kernel/kprobes/core.c29
-rw-r--r--arch/x86/kernel/kprobes/opt.c20
-rw-r--r--arch/x86/kernel/process_32.c16
-rw-r--r--arch/x86/kernel/ptrace.c29
-rw-r--r--arch/x86/kernel/smpboot.c8
-rw-r--r--arch/x86/kernel/time.c3
-rw-r--r--arch/x86/kernel/unwind_frame.c32
-rw-r--r--arch/x86/kernel/unwind_orc.c2
26 files changed, 394 insertions, 265 deletions
diff --git a/Documentation/x86/exception-tables.rst b/Documentation/x86/exception-tables.rst
index 24596c8210b5..ed6d4b0cf62c 100644
--- a/Documentation/x86/exception-tables.rst
+++ b/Documentation/x86/exception-tables.rst
@@ -35,7 +35,7 @@ page fault handler::
void do_page_fault(struct pt_regs *regs, unsigned long error_code)
in arch/x86/mm/fault.c. The parameters on the stack are set up by
-the low level assembly glue in arch/x86/kernel/entry_32.S. The parameter
+the low level assembly glue in arch/x86/entry/entry_32.S. The parameter
regs is a pointer to the saved registers on the stack, error_code
contains a reason code for the exception.
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index efb0d1b1f15f..9f1f9e3b8230 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -172,21 +172,6 @@ For 32-bit we have the following conventions - kernel is built with
.endif
.endm
-/*
- * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
- * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
- * is just setting the LSB, which makes it an invalid stack address and is also
- * a signal to the unwinder that it's a pt_regs pointer in disguise.
- *
- * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
- * the original rbp.
- */
-.macro ENCODE_FRAME_POINTER ptregs_offset=0
-#ifdef CONFIG_FRAME_POINTER
- leaq 1+\ptregs_offset(%rsp), %rbp
-#endif
-.endm
-
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 44c6e6f54bf7..1285e5abf669 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -67,7 +67,6 @@
# define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
# define preempt_stop(clobbers)
-# define resume_kernel restore_all_kernel
#endif
.macro TRACE_IRQS_IRET
@@ -203,9 +202,102 @@
.Lend_\@:
.endm
+#define CS_FROM_ENTRY_STACK (1 << 31)
+#define CS_FROM_USER_CR3 (1 << 30)
+#define CS_FROM_KERNEL (1 << 29)
+
+.macro FIXUP_FRAME
+ /*
+ * The high bits of the CS dword (__csh) are used for CS_FROM_*.
+ * Clear them in case hardware didn't do this for us.
+ */
+ andl $0x0000ffff, 3*4(%esp)
+
+#ifdef CONFIG_VM86
+ testl $X86_EFLAGS_VM, 4*4(%esp)
+ jnz .Lfrom_usermode_no_fixup_\@
+#endif
+ testl $SEGMENT_RPL_MASK, 3*4(%esp)
+ jnz .Lfrom_usermode_no_fixup_\@
+
+ orl $CS_FROM_KERNEL, 3*4(%esp)
+
+ /*
+ * When we're here from kernel mode; the (exception) stack looks like:
+ *
+ * 5*4(%esp) - <previous context>
+ * 4*4(%esp) - flags
+ * 3*4(%esp) - cs
+ * 2*4(%esp) - ip
+ * 1*4(%esp) - orig_eax
+ * 0*4(%esp) - gs / function
+ *
+ * Lets build a 5 entry IRET frame after that, such that struct pt_regs
+ * is complete and in particular regs->sp is correct. This gives us
+ * the original 5 enties as gap:
+ *
+ * 12*4(%esp) - <previous context>
+ * 11*4(%esp) - gap / flags
+ * 10*4(%esp) - gap / cs
+ * 9*4(%esp) - gap / ip
+ * 8*4(%esp) - gap / orig_eax
+ * 7*4(%esp) - gap / gs / function
+ * 6*4(%esp) - ss
+ * 5*4(%esp) - sp
+ * 4*4(%esp) - flags
+ * 3*4(%esp) - cs
+ * 2*4(%esp) - ip
+ * 1*4(%esp) - orig_eax
+ * 0*4(%esp) - gs / function
+ */
+
+ pushl %ss # ss
+ pushl %esp # sp (points at ss)
+ addl $6*4, (%esp) # point sp back at the previous context
+ pushl 6*4(%esp) # flags
+ pushl 6*4(%esp) # cs
+ pushl 6*4(%esp) # ip
+ pushl 6*4(%esp) # orig_eax
+ pushl 6*4(%esp) # gs / function
+.Lfrom_usermode_no_fixup_\@:
+.endm
+
+.macro IRET_FRAME
+ testl $CS_FROM_KERNEL, 1*4(%esp)
+ jz .Lfinished_frame_\@
+
+ /*
+ * Reconstruct the 3 entry IRET frame right after the (modified)
+ * regs->sp without lowering %esp in between, such that an NMI in the
+ * middle doesn't scribble our stack.
+ */
+ pushl %eax
+ pushl %ecx
+ movl 5*4(%esp), %eax # (modified) regs->sp
+
+ movl 4*4(%esp), %ecx # flags
+ movl %ecx, -4(%eax)
+
+ movl 3*4(%esp), %ecx # cs
+ andl $0x0000ffff, %ecx
+ movl %ecx, -8(%eax)
+
+ movl 2*4(%esp), %ecx # ip
+ movl %ecx, -12(%eax)
+
+ movl 1*4(%esp), %ecx # eax
+ movl %ecx, -16(%eax)
+
+ popl %ecx
+ lea -16(%eax), %esp
+ popl %eax
+.Lfinished_frame_\@:
+.endm
+
.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0
cld
PUSH_GS
+ FIXUP_FRAME
pushl %fs
pushl %es
pushl %ds
@@ -247,22 +339,6 @@
.Lend_\@:
.endm
-/*
- * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
- * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
- * is just clearing the MSB, which makes it an invalid stack address and is also
- * a signal to the unwinder that it's a pt_regs pointer in disguise.
- *
- * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
- * original rbp.
- */
-.macro ENCODE_FRAME_POINTER
-#ifdef CONFIG_FRAME_POINTER
- mov %esp, %ebp
- andl $0x7fffffff, %ebp
-#endif
-.endm
-
.macro RESTORE_INT_REGS
popl %ebx
popl %ecx
@@ -375,9 +451,6 @@
* switch to it before we do any copying.
*/
-#define CS_FROM_ENTRY_STACK (1 << 31)
-#define CS_FROM_USER_CR3 (1 << 30)
-
.macro SWITCH_TO_KERNEL_STACK
ALTERNATIVE "", "jmp .Lend_\@", X86_FEATURE_XENPV
@@ -391,13 +464,6 @@
* that register for the time this macro runs
*/
- /*
- * The high bits of the CS dword (__csh) are used for
- * CS_FROM_ENTRY_STACK and CS_FROM_USER_CR3. Clear them in case
- * hardware didn't do this for us.
- */
- andl $(0x0000ffff), PT_CS(%esp)
-
/* Are we on the entry stack? Bail out if not! */
movl PER_CPU_VAR(cpu_entry_area), %ecx
addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
@@ -755,7 +821,7 @@ ret_from_intr:
andl $SEGMENT_RPL_MASK, %eax
#endif
cmpl $USER_RPL, %eax
- jb resume_kernel # not returning to v8086 or userspace
+ jb restore_all_kernel # not returning to v8086 or userspace
ENTRY(resume_userspace)
DISABLE_INTERRUPTS(CLBR_ANY)
@@ -765,18 +831,6 @@ ENTRY(resume_userspace)
jmp restore_all
END(ret_from_exception)
-#ifdef CONFIG_PREEMPT
-ENTRY(resume_kernel)
- DISABLE_INTERRUPTS(CLBR_ANY)
- cmpl $0, PER_CPU_VAR(__preempt_count)
- jnz restore_all_kernel
- testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
- jz restore_all_kernel
- call preempt_schedule_irq
- jmp restore_all_kernel
-END(resume_kernel)
-#endif
-
GLOBAL(__begin_SYSENTER_singlestep_region)
/*
* All code from here through __end_SYSENTER_singlestep_region is subject
@@ -1019,6 +1073,7 @@ restore_all:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
.Lirq_return:
+ IRET_FRAME
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
* when returning from IPI handler and when returning from
@@ -1027,6 +1082,15 @@ restore_all:
INTERRUPT_RETURN
restore_all_kernel:
+#ifdef CONFIG_PREEMPT
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ cmpl $0, PER_CPU_VAR(__preempt_count)
+ jnz .Lno_preempt
+ testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
+ jz .Lno_preempt
+ call preempt_schedule_irq
+.Lno_preempt:
+#endif
TRACE_IRQS_IRET
PARANOID_EXIT_TO_KERNEL_MODE
BUG_IF_WRONG_CR3
@@ -1384,6 +1448,7 @@ END(page_fault)
common_exception:
/* the function address is in %gs's slot on the stack */
+ FIXUP_FRAME
pushl %fs
pushl %es
pushl %ds
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index 5cbce6fbb534..296b346184b2 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -22,6 +22,35 @@
pop %_ASM_BP
.endm
+#ifdef CONFIG_X86_64
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just setting the LSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
+ * the original rbp.
+ */
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+ leaq 1+\ptregs_offset(%rsp), %rbp
+.endm
+#else /* !CONFIG_X86_64 */
+/*
+ * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
+ * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
+ * is just clearing the MSB, which makes it an invalid stack address and is also
+ * a signal to the unwinder that it's a pt_regs pointer in disguise.
+ *
+ * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
+ * original ebp.
+ */
+.macro ENCODE_FRAME_POINTER
+ mov %esp, %ebp
+ andl $0x7fffffff, %ebp
+.endm
+#endif /* CONFIG_X86_64 */
+
#else /* !__ASSEMBLY__ */
#define FRAME_BEGIN \
@@ -30,12 +59,32 @@
#define FRAME_END "pop %" _ASM_BP "\n"
+#ifdef CONFIG_X86_64
+#define ENCODE_FRAME_POINTER \
+ "lea 1(%rsp), %rbp\n\t"
+#else /* !CONFIG_X86_64 */
+#define ENCODE_FRAME_POINTER \
+ "movl %esp, %ebp\n\t" \
+ "andl $0x7fffffff, %ebp\n\t"
+#endif /* CONFIG_X86_64 */
+
#endif /* __ASSEMBLY__ */
#define FRAME_OFFSET __ASM_SEL(4, 8)
#else /* !CONFIG_FRAME_POINTER */
+#ifdef __ASSEMBLY__
+
+.macro ENCODE_FRAME_POINTER ptregs_offset=0
+.endm
+
+#else /* !__ASSEMBLY */
+
+#define ENCODE_FRAME_POINTER
+
+#endif
+
#define FRAME_BEGIN
#define FRAME_END
#define FRAME_OFFSET 0
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 003f2daa3b0f..5e7d6b46de97 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -71,22 +71,6 @@ struct kimage;
#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
/*
- * CPU does not save ss and sp on stack if execution is already
- * running in kernel mode at the time of NMI occurrence. This code
- * fixes it.
- */
-static inline void crash_fixup_ss_esp(struct pt_regs *newregs,
- struct pt_regs *oldregs)
-{
-#ifdef CONFIG_X86_32
- newregs->sp = (unsigned long)&(oldregs->sp);
- asm volatile("xorl %%eax, %%eax\n\t"
- "movw %%ss, %%ax\n\t"
- :"=a"(newregs->ss));
-#endif
-}
-
-/*
* This function is responsible for capturing register states if coming
* via panic otherwise just fix up the ss and sp if coming via kernel
* mode exception.
@@ -96,7 +80,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
{
if (oldregs) {
memcpy(newregs, oldregs, sizeof(*newregs));
- crash_fixup_ss_esp(newregs, oldregs);
} else {
#ifdef CONFIG_X86_32
asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 8a7fc0cca2d1..3703c91f441e 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -166,14 +166,10 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#define compat_user_stack_pointer() current_pt_regs()->sp
#endif
-#ifdef CONFIG_X86_32
-extern unsigned long kernel_stack_pointer(struct pt_regs *regs);
-#else
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
return regs->sp;
}
-#endif
#define GET_IP(regs) ((regs)->ip)
#define GET_FP(regs) ((regs)->bp)
@@ -201,14 +197,6 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
if (unlikely(offset > MAX_REG_OFFSET))
return 0;
#ifdef CONFIG_X86_32
- /*
- * Traps from the kernel do not save sp and ss.
- * Use the helper function to retrieve sp.
- */
- if (offset == offsetof(struct pt_regs, sp) &&
- regs->cs == __KERNEL_CS)
- return kernel_stack_pointer(regs);
-
/* The selector fields are 16-bit. */
if (offset == offsetof(struct pt_regs, cs) ||
offset == offsetof(struct pt_regs, ss) ||
@@ -234,8 +222,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
static inline int regs_within_kernel_stack(struct pt_regs *regs,
unsigned long addr)
{
- return ((addr & ~(THREAD_SIZE - 1)) ==
- (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+ return ((addr & ~(THREAD_SIZE - 1)) == (regs->sp & ~(THREAD_SIZE - 1)));
}
/**
@@ -249,7 +236,7 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs,
*/
static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n)
{
- unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+ unsigned long *addr = (unsigned long *)regs->sp;
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 0a3c4cab39db..b2e84d113f2a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,6 +6,8 @@
#ifdef __KERNEL__
#include <asm/nops.h>
+#include <asm/processor-flags.h>
+#include <linux/jump_label.h>
/*
* Volatile isn't enough to prevent the compiler from reordering the
@@ -16,6 +18,10 @@
*/
extern unsigned long __force_order;
+/* Starts false and gets enabled once CPU feature detection is done. */
+DECLARE_STATIC_KEY_FALSE(cr_pinning);
+extern unsigned long cr4_pinned_bits;
+
static inline unsigned long native_read_cr0(void)
{
unsigned long val;
@@ -25,7 +31,20 @@ static inline unsigned long native_read_cr0(void)
static inline void native_write_cr0(unsigned long val)
{
- asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
+ unsigned long bits_missing = 0;
+
+set_register:
+ asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
+
+ if (static_branch_likely(&cr_pinning)) {
+ if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
+ bits_missing = X86_CR0_WP;
+ val |= bits_missing;
+ goto set_register;
+ }
+ /* Warn after we've set the missing bits. */
+ WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
+ }
}
static inline unsigned long native_read_cr2(void)
@@ -74,7 +93,21 @@ static inline unsigned long native_read_cr4(void)
static inline void native_write_cr4(unsigned long val)
{
- asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
+ unsigned long bits_missing = 0;
+
+set_register:
+ asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
+
+ if (static_branch_likely(&cr_pinning)) {
+ if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
+ bits_missing = ~val & cr4_pinned_bits;
+ val |= bits_missing;
+ goto set_register;
+ }
+ /* Warn after we've set the missing bits. */
+ WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
+ bits_missing);
+ }
}
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index a8d0cdf48616..14db05086bbf 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -78,7 +78,7 @@ static inline unsigned long *
get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
{
if (regs)
- return (unsigned long *)kernel_stack_pointer(regs);
+ return (unsigned long *)regs->sp;
if (task == current)
return __builtin_frame_address(0);
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index d83e9f771d86..70c09967a999 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -66,7 +66,6 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
#define INT3_INSN_SIZE 1
#define CALL_INSN_SIZE 5
-#ifdef CONFIG_X86_64
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
/*
@@ -84,7 +83,6 @@ static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
int3_emulate_jmp(regs, func);
}
-#endif /* CONFIG_X86_64 */
#endif /* !CONFIG_UML_X86 */
#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index bd542f9b0953..c3468b5242fd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -616,11 +616,83 @@ extern struct paravirt_patch_site __start_parainstructions[],
__stop_parainstructions[];
#endif /* CONFIG_PARAVIRT */
+/*
+ * Self-test for the INT3 based CALL emulation code.
+ *
+ * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up
+ * properly and that there is a stack gap between the INT3 frame and the
+ * previous context. Without this gap doing a virtual PUSH on the interrupted
+ * stack would corrupt the INT3 IRET frame.
+ *
+ * See entry_{32,64}.S for more details.
+ */
+static void __init int3_magic(unsigned int *ptr)
+{
+ *ptr = 1;
+}
+
+extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */
+
+static int __init
+int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
+{
+ struct die_args *args = data;
+ struct pt_regs *regs = args->regs;
+
+ if (!regs || user_mode(regs))
+ return NOTIFY_DONE;
+
+ if (val != DIE_INT3)
+ return NOTIFY_DONE;
+
+ if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip)
+ return NOTIFY_DONE;
+
+ int3_emulate_call(regs, (unsigned long)&int3_magic);
+ return NOTIFY_STOP;
+}
+
+static void __init int3_selftest(void)
+{
+ static __initdata struct notifier_block int3_exception_nb = {
+ .notifier_call = int3_exception_notify,
+ .priority = INT_MAX-1, /* last */
+ };
+ unsigned int val = 0;
+
+ BUG_ON(register_die_notifier(&int3_exception_nb));
+
+ /*
+ * Basically: int3_magic(&val); but really complicated :-)
+ *
+ * Stick the address of the INT3 instruction into int3_selftest_ip,
+ * then trigger the INT3, padded with NOPs to match a CALL instruction
+ * length.
+ */
+ asm volatile ("1: int3; nop; nop; nop; nop\n\t"
+ ".pushsection .init.data,\"aw\"\n\t"
+ ".align " __ASM_SEL(4, 8) "\n\t"
+ ".type int3_selftest_ip, @object\n\t"
+ ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t"
+ "int3_selftest_ip:\n\t"
+ __ASM_SEL(.long, .quad) " 1b\n\t"
+ ".popsection\n\t"
+ : : __ASM_SEL_RAW(a, D) (&val) : "memory");
+
+ BUG_ON(val != 1);
+
+ unregister_die_notifier(&int3_exception_nb);
+}
+
void __init alternative_instructions(void)
{
- /* The patching is not fully atomic, so try to avoid local interruptions
- that might execute the to be patched code.
- Other CPUs are not running. */
+ int3_selftest();
+
+ /*
+ * The patching is not fully atomic, so try to avoid local
+ * interruptions that might execute the to be patched code.
+ * Other CPUs are not running.
+ */
stop_nmi();
/*
@@ -645,10 +717,11 @@ void __init alternative_instructions(void)
_text, _etext);
}
- if (!uniproc_patched || num_possible_cpus() == 1)
+ if (!uniproc_patched || num_possible_cpus() == 1) {
free_init_pages("SMP alternatives",
(unsigned long)__smp_locks,
(unsigned long)__smp_locks_end);
+ }
#endif
apply_paravirt(__parainstructions, __parainstructions_end);
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 168543d077d7..da64452584b0 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -38,7 +38,6 @@ static void __used common(void)
#endif
BLANK();
- OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
BLANK();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dad20bc891d5..8febe90470f4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -366,6 +366,25 @@ out:
cr4_clear_bits(X86_CR4_UMIP);
}
+DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
+EXPORT_SYMBOL(cr_pinning);
+unsigned long cr4_pinned_bits __ro_after_init;
+EXPORT_SYMBOL(cr4_pinned_bits);
+
+/*
+ * Once CPU feature detection is finished (and boot params have been
+ * parsed), record any of the sensitive CR bits that are set, and
+ * enable CR pinning.
+ */
+static void __init setup_cr_pinning(void)
+{
+ unsigned long mask;
+
+ mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
+ cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+ static_key_enable(&cr_pinning.key);
+}
+
/*
* Protection Keys are not available in 32-bit mode.
*/
@@ -1468,6 +1487,7 @@ void __init identify_boot_cpu(void)
enable_sep_cpu();
#endif
cpu_detect_tlb(&boot_cpu_data);
+ setup_cr_pinning();
}
void identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 576b2e1bfc12..84e2d3ddd0eb 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -73,14 +73,6 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
{
-#ifdef CONFIG_X86_32
- struct pt_regs fixed_regs;
-
- if (!user_mode(regs)) {
- crash_fixup_ss_esp(&fixed_regs, regs);
- regs = &fixed_regs;
- }
-#endif
crash_save_cpu(regs, cpu);
/*
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 76228525acd0..4b73f5937f41 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -310,7 +310,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
ip = regs->ip - INT3_INSN_SIZE;
-#ifdef CONFIG_X86_64
if (ftrace_location(ip)) {
int3_emulate_call(regs, (unsigned long)ftrace_regs_caller);
return 1;
@@ -322,12 +321,6 @@ int ftrace_int3_handler(struct pt_regs *regs)
int3_emulate_call(regs, ftrace_update_func_call);
return 1;
}
-#else
- if (ftrace_location(ip) || is_ftrace_caller(ip)) {
- int3_emulate_jmp(regs, ip + CALL_INSN_SIZE);
- return 1;
- }
-#endif
return 0;
}
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index 2ba914a34b06..073aab525d80 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -9,6 +9,8 @@
#include <asm/export.h>
#include <asm/ftrace.h>
#include <asm/nospec-branch.h>
+#include <asm/frame.h>
+#include <asm/asm-offsets.h>
# define function_hook __fentry__
EXPORT_SYMBOL(__fentry__)
@@ -89,26 +91,38 @@ END(ftrace_caller)
ENTRY(ftrace_regs_caller)
/*
- * i386 does not save SS and ESP when coming from kernel.
- * Instead, to get sp, &regs->sp is used (see ptrace.h).
- * Unfortunately, that means eflags must be at the same location
- * as the current return ip is. We move the return ip into the
- * regs->ip location, and move flags into the return ip location.
+ * We're here from an mcount/fentry CALL, and the stack frame looks like:
+ *
+ * <previous context>
+ * RET-IP
+ *
+ * The purpose of this function is to call out in an emulated INT3
+ * environment with a stack frame like:
+ *
+ * <previous context>
+ * gap / RET-IP
+ * gap
+ * gap
+ * gap
+ * pt_regs
+ *
+ * We do _NOT_ restore: ss, flags, cs, gs, fs, es, ds
*/
- pushl $__KERNEL_CS
- pushl 4(%esp) /* Save the return ip */
- pushl $0 /* Load 0 into orig_ax */
+ subl $3*4, %esp # RET-IP + 3 gaps
+ pushl %ss # ss
+ pushl %esp # points at ss
+ addl $5*4, (%esp) # make it point at <previous context>
+ pushfl # flags
+ pushl $__KERNEL_CS # cs
+ pushl 7*4(%esp) # ip <- RET-IP
+ pushl $0 # orig_eax
+
pushl %gs
pushl %fs
pushl %es
pushl %ds
- pushl %eax
-
- /* Get flags and place them into the return ip slot */
- pushf
- popl %eax
- movl %eax, 8*4(%esp)
+ pushl %eax
pushl %ebp
pushl %edi
pushl %esi
@@ -116,24 +130,27 @@ ENTRY(ftrace_regs_caller)
pushl %ecx
pushl %ebx
- movl 12*4(%esp), %eax /* Load ip (1st parameter) */
- subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
- movl 15*4(%esp), %edx /* Load parent ip (2nd parameter) */
- movl function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
- pushl %esp /* Save pt_regs as 4th parameter */
+ ENCODE_FRAME_POINTER
+
+ movl PT_EIP(%esp), %eax # 1st argument: IP
+ subl $MCOUNT_INSN_SIZE, %eax
+ movl 21*4(%esp), %edx # 2nd argument: parent ip
+ movl function_trace_op, %ecx # 3rd argument: ftrace_pos
+ pushl %esp # 4th argument: pt_regs
GLOBAL(ftrace_regs_call)
call ftrace_stub
- addl $4, %esp /* Skip pt_regs */
+ addl $4, %esp # skip 4th argument
- /* restore flags */
- push 14*4(%esp)
- popf
+ /* place IP below the new SP */
+ movl PT_OLDESP(%esp), %eax
+ movl PT_EIP(%esp), %ecx
+ movl %ecx, -4(%eax)
- /* Move return ip back to its original location */
- movl 12*4(%esp), %eax
- movl %eax, 14*4(%esp)
+ /* place EAX below that */
+ movl PT_EAX(%esp), %ecx
+ movl %ecx, -8(%eax)
popl %ebx
popl %ecx
@@ -141,14 +158,9 @@ GLOBAL(ftrace_regs_call)
popl %esi
popl %edi
popl %ebp
- popl %eax
- popl %ds
- popl %es
- popl %fs
- popl %gs
- /* use lea to not affect flags */
- lea 3*4(%esp), %esp /* Skip orig_ax, ip and cs */
+ lea -8(%eax), %esp
+ popl %eax
jmp .Lftrace_ret
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 10eb2760ef2c..809d54397dba 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -9,6 +9,7 @@
#include <asm/export.h>
#include <asm/nospec-branch.h>
#include <asm/unwind_hints.h>
+#include <asm/frame.h>
.code64
.section .entry.text, "ax"
@@ -203,6 +204,8 @@ GLOBAL(ftrace_regs_caller_op_ptr)
leaq MCOUNT_REG_SIZE+8*2(%rsp), %rcx
movq %rcx, RSP(%rsp)
+ ENCODE_FRAME_POINTER
+
/* regs go into 4th parameter */
leaq (%rsp), %rcx
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 6690c5652aeb..23297ea64f5f 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -118,14 +118,6 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
#ifdef CONFIG_X86_32
switch (regno) {
- case GDB_SS:
- if (!user_mode(regs))
- *(unsigned long *)mem = __KERNEL_DS;
- break;
- case GDB_SP:
- if (!user_mode(regs))
- *(unsigned long *)mem = kernel_stack_pointer(regs);
- break;
case GDB_GS:
case GDB_FS:
*(unsigned long *)mem = 0xFFFF;
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 2b949f4fd4d8..7d3a2e2daf01 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -5,15 +5,10 @@
/* Kprobes and Optprobes common header */
#include <asm/asm.h>
-
-#ifdef CONFIG_FRAME_POINTER
-# define SAVE_RBP_STRING " push %" _ASM_BP "\n" \
- " mov %" _ASM_SP ", %" _ASM_BP "\n"
-#else
-# define SAVE_RBP_STRING " push %" _ASM_BP "\n"
-#endif
+#include <asm/frame.h>
#ifdef CONFIG_X86_64
+
#define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax. */ \
" subq $24, %rsp\n" \
@@ -27,11 +22,13 @@
" pushq %r10\n" \
" pushq %r11\n" \
" pushq %rbx\n" \
- SAVE_RBP_STRING \
+ " pushq %rbp\n" \
" pushq %r12\n" \
" pushq %r13\n" \
" pushq %r14\n" \
- " pushq %r15\n"
+ " pushq %r15\n" \
+ ENCODE_FRAME_POINTER
+
#define RESTORE_REGS_STRING \
" popq %r15\n" \
" popq %r14\n" \
@@ -51,19 +48,22 @@
/* Skip orig_ax, ip, cs */ \
" addq $24, %rsp\n"
#else
+
#define SAVE_REGS_STRING \
/* Skip cs, ip, orig_ax and gs. */ \
- " subl $16, %esp\n" \
+ " subl $4*4, %esp\n" \
" pushl %fs\n" \
" pushl %es\n" \
" pushl %ds\n" \
" pushl %eax\n" \
- SAVE_RBP_STRING \
+ " pushl %ebp\n" \
" pushl %edi\n" \