summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/entry/entry_32.S43
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c6
-rw-r--r--arch/x86/entry/vsyscall/vsyscall_64.c6
-rw-r--r--arch/x86/events/core.c18
-rw-r--r--arch/x86/hyperv/hv_init.c15
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h12
-rw-r--r--arch/x86/include/asm/doublefault.h13
-rw-r--r--arch/x86/include/asm/fpu/internal.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h4
-rw-r--r--arch/x86/include/asm/mshyperv.h1
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h7
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/traps.h3
-rw-r--r--arch/x86/include/uapi/asm/msgbuf.h6
-rw-r--r--arch/x86/include/uapi/asm/sembuf.h4
-rw-r--r--arch/x86/include/uapi/asm/shmbuf.h6
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/cpu/common.c12
-rw-r--r--arch/x86/kernel/cpu/mce/therm_throt.c17
-rw-r--r--arch/x86/kernel/doublefault.c86
-rw-r--r--arch/x86/kernel/doublefault_32.c136
-rw-r--r--arch/x86/kernel/dumpstack_32.c30
-rw-r--r--arch/x86/kernel/process.c52
-rw-r--r--arch/x86/kernel/ptrace.c36
-rw-r--r--arch/x86/kernel/traps.c31
-rw-r--r--arch/x86/lib/x86-opcode-map.txt44
-rw-r--r--arch/x86/mm/cpu_entry_area.c14
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/pat_interval.c12
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c8
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c6
-rw-r--r--arch/x86/um/vdso/um_vdso.c12
33 files changed, 432 insertions, 220 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 409c00f74e60..c4eab8ed33a3 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -117,7 +117,7 @@ config DEBUG_WX
config DOUBLEFAULT
default y
- bool "Enable doublefault exception handler" if EXPERT
+ bool "Enable doublefault exception handler" if EXPERT && X86_32
---help---
This option allows trapping of rare doublefault exceptions that
would otherwise cause a system to silently reboot. Disabling this
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 5832b11f01bb..7e0560442538 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1090,7 +1090,6 @@ SYM_FUNC_START(entry_INT80_32)
restore_all:
TRACE_IRQS_IRET
SWITCH_TO_ENTRY_STACK
-.Lrestore_all_notrace:
CHECK_AND_APPLY_ESPFIX
.Lrestore_nocheck:
/* Switch back to user CR3 */
@@ -1537,6 +1536,48 @@ SYM_CODE_START(debug)
jmp common_exception
SYM_CODE_END(debug)
+#ifdef CONFIG_DOUBLEFAULT
+SYM_CODE_START(double_fault)
+1:
+ /*
+ * This is a task gate handler, not an interrupt gate handler.
+ * The error code is on the stack, but the stack is otherwise
+ * empty. Interrupts are off. Our state is sane with the following
+ * exceptions:
+ *
+ * - CR0.TS is set. "TS" literally means "task switched".
+ * - EFLAGS.NT is set because we're a "nested task".
+ * - The doublefault TSS has back_link set and has been marked busy.
+ * - TR points to the doublefault TSS and the normal TSS is busy.
+ * - CR3 is the normal kernel PGD. This would be delightful, except
+ * that the CPU didn't bother to save the old CR3 anywhere. This
+ * would make it very awkward to return back to the context we came
+ * from.
+ *
+ * The rest of EFLAGS is sanitized for us, so we don't need to
+ * worry about AC or DF.
+ *
+ * Don't even bother popping the error code. It's always zero,
+ * and ignoring it makes us a bit more robust against buggy
+ * hypervisor task gate implementations.
+ *
+ * We will manually undo the task switch instead of doing a
+ * task-switching IRET.
+ */
+
+ clts /* clear CR0.TS */
+ pushl $X86_EFLAGS_FIXED
+ popfl /* clear EFLAGS.NT */
+
+ call doublefault_shim
+
+ /* We don't support returning, so we have no IRET here. */
+1:
+ hlt
+ jmp 1b
+SYM_CODE_END(double_fault)
+#endif
+
/*
* NMI is doubly nasty. It can happen on the first instruction of
* entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index d9ff616bb0f6..7d70935b6758 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -15,7 +15,7 @@
#include "../../../../lib/vdso/gettimeofday.c"
extern int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
-extern time_t __vdso_time(time_t *t);
+extern __kernel_old_time_t __vdso_time(__kernel_old_time_t *t);
int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
{
@@ -25,12 +25,12 @@ int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
int gettimeofday(struct __kernel_old_timeval *, struct timezone *)
__attribute__((weak, alias("__vdso_gettimeofday")));
-time_t __vdso_time(time_t *t)
+__kernel_old_time_t __vdso_time(__kernel_old_time_t *t)
{
return __cvdso_time(t);
}
-time_t time(time_t *t) __attribute__((weak, alias("__vdso_time")));
+__kernel_old_time_t time(__kernel_old_time_t *t) __attribute__((weak, alias("__vdso_time")));
#if defined(CONFIG_X86_64) && !defined(BUILD_VDSO32_64)
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index e7c596dea947..44c33103a955 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -184,7 +184,7 @@ bool emulate_vsyscall(unsigned long error_code,
*/
switch (vsyscall_nr) {
case 0:
- if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
+ if (!write_ok_or_segv(regs->di, sizeof(struct __kernel_old_timeval)) ||
!write_ok_or_segv(regs->si, sizeof(struct timezone))) {
ret = -EFAULT;
goto check_fault;
@@ -194,7 +194,7 @@ bool emulate_vsyscall(unsigned long error_code,
break;
case 1:
- if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
+ if (!write_ok_or_segv(regs->di, sizeof(__kernel_old_time_t))) {
ret = -EFAULT;
goto check_fault;
}
@@ -222,7 +222,7 @@ bool emulate_vsyscall(unsigned long error_code,
*/
regs->orig_ax = syscall_nr;
regs->ax = -ENOSYS;
- tmp = secure_computing(NULL);
+ tmp = secure_computing();
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
warn_bad_vsyscall(KERN_DEBUG, regs,
"seccomp tried to change syscall nr or ip");
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 6e3f0c18908e..9a89d98c55bd 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -49,6 +49,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.enabled = 1,
};
+DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
u64 __read_mostly hw_cache_event_ids
@@ -2181,21 +2182,26 @@ static ssize_t set_attr_rdpmc(struct device *cdev,
if (x86_pmu.attr_rdpmc_broken)
return -ENOTSUPP;
- if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) {
+ if (val != x86_pmu.attr_rdpmc) {
/*
- * Changing into or out of always available, aka
- * perf-event-bypassing mode. This path is extremely slow,
+ * Changing into or out of never available or always available,
+ * aka perf-event-bypassing mode. This path is extremely slow,
* but only root can trigger it, so it's okay.
*/
+ if (val == 0)
+ static_branch_inc(&rdpmc_never_available_key);
+ else if (x86_pmu.attr_rdpmc == 0)
+ static_branch_dec(&rdpmc_never_available_key);
+
if (val == 2)
static_branch_inc(&rdpmc_always_available_key);
- else
+ else if (x86_pmu.attr_rdpmc == 2)
static_branch_dec(&rdpmc_always_available_key);
+
on_each_cpu(refresh_pce, NULL, 1);
+ x86_pmu.attr_rdpmc = val;
}
- x86_pmu.attr_rdpmc = val;
-
return count;
}
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 50ff030d9224..caaf4dce99bf 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -7,6 +7,7 @@
* Author : K. Y. Srinivasan <kys@microsoft.com>
*/
+#include <linux/acpi.h>
#include <linux/efi.h>
#include <linux/types.h>
#include <asm/apic.h>
@@ -45,6 +46,14 @@ void *hv_alloc_hyperv_page(void)
}
EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
+void *hv_alloc_hyperv_zeroed_page(void)
+{
+ BUILD_BUG_ON(PAGE_SIZE != HV_HYP_PAGE_SIZE);
+
+ return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+}
+EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
+
void hv_free_hyperv_page(unsigned long addr)
{
free_page(addr);
@@ -437,3 +446,9 @@ bool hv_is_hyperv_initialized(void)
return hypercall_msr.enable;
}
EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized);
+
+bool hv_is_hibernation_supported(void)
+{
+ return acpi_sleep_state_supported(ACPI_STATE_S4);
+}
+EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index ea866c7bf31d..804734058c77 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -65,6 +65,13 @@ enum exception_stack_ordering {
#endif
+#ifdef CONFIG_X86_32
+struct doublefault_stack {
+ unsigned long stack[(PAGE_SIZE - sizeof(struct x86_hw_tss)) / sizeof(unsigned long)];
+ struct x86_hw_tss tss;
+} __aligned(PAGE_SIZE);
+#endif
+
/*
* cpu_entry_area is a percpu region that contains things needed by the CPU
* and early entry/exit code. Real types aren't used for all fields here
@@ -86,6 +93,11 @@ struct cpu_entry_area {
#endif
struct entry_stack_page entry_stack_page;
+#ifdef CONFIG_X86_32
+ char guard_doublefault_stack[PAGE_SIZE];
+ struct doublefault_stack doublefault_stack;
+#endif
+
/*
* On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
* we need task switches to work, and task switches write to the TSS.
diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h
new file mode 100644
index 000000000000..af9a14ac8962
--- /dev/null
+++ b/arch/x86/include/asm/doublefault.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_DOUBLEFAULT_H
+#define _ASM_X86_DOUBLEFAULT_H
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT)
+extern void doublefault_init_cpu_tss(void);
+#else
+static inline void doublefault_init_cpu_tss(void)
+{
+}
+#endif
+
+#endif /* _ASM_X86_DOUBLEFAULT_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 4c95c365058a..44c48e34d799 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -509,7 +509,7 @@ static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
{
- return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
+ return fpu == this_cpu_read(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
}
/*
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 16ae821483c8..5f33924e200f 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -26,12 +26,14 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
#ifdef CONFIG_PERF_EVENTS
+DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key);
DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key);
static inline void load_mm_cr4_irqsoff(struct mm_struct *mm)
{
if (static_branch_unlikely(&rdpmc_always_available_key) ||
- atomic_read(&mm->context.perf_rdpmc_allowed))
+ (!static_branch_unlikely(&rdpmc_never_available_key) &&
+ atomic_read(&mm->context.perf_rdpmc_allowed)))
cr4_set_bits_irqsoff(X86_CR4_PCE);
else
cr4_clear_bits_irqsoff(X86_CR4_PCE);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index f4138aeb4280..6b79515abb82 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -219,6 +219,7 @@ static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
void __init hyperv_init(void);
void hyperv_setup_mmu_ops(void);
void *hv_alloc_hyperv_page(void);
+void *hv_alloc_hyperv_zeroed_page(void);
void hv_free_hyperv_page(unsigned long addr);
void hyperv_reenlightenment_intr(struct pt_regs *regs);
void set_hv_tscchange_cb(void (*cb)(void));
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 19f5807260c3..0416d42e5bdd 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -41,10 +41,11 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
#endif
/*
- * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
- * to avoid include recursion hell
+ * This is an upper bound on sizeof(struct cpu_entry_area) / PAGE_SIZE.
+ * Define this here and validate with BUILD_BUG_ON() in cpu_entry_area.c
+ * to avoid include recursion hell.
*/
-#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 41)
+#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 43)
/* The +1 is for the readonly IDT page: */
#define CPU_ENTRY_AREA_BASE \
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e51afbb0cbfb..0340aad3f2fc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -166,7 +166,6 @@ enum cpuid_regs_idx {
extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
-extern struct x86_hw_tss doublefault_tss;
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
@@ -997,7 +996,6 @@ bool xen_set_default_idle(void);
#endif
void stop_this_cpu(void *dummy);
-void df_debug(struct pt_regs *regs, long error_code);
void microcode_check(void);
enum l1tf_mitigations {
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index b25e633033c3..ffa0dc8a535e 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -69,6 +69,9 @@ dotraplinkage void do_overflow(struct pt_regs *regs, long error_code);
dotraplinkage void do_bounds(struct pt_regs *regs, long error_code);
dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code);
dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code);
+#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT)
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2);
+#endif
dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code);
dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code);
dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code);
diff --git a/arch/x86/include/uapi/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h
index 90ab9a795b49..7c5bb43ed8af 100644
--- a/arch/x86/include/uapi/asm/msgbuf.h
+++ b/arch/x86/include/uapi/asm/msgbuf.h
@@ -15,9 +15,9 @@
struct msqid64_ds {
struct ipc64_perm msg_perm;
- __kernel_time_t msg_stime; /* last msgsnd time */
- __kernel_time_t msg_rtime; /* last msgrcv time */
- __kernel_time_t msg_ctime; /* last change time */
+ __kernel_long_t msg_stime; /* last msgsnd time */
+ __kernel_long_t msg_rtime; /* last msgrcv time */
+ __kernel_long_t msg_ctime; /* last change time */
__kernel_ulong_t msg_cbytes; /* current number of bytes on queue */
__kernel_ulong_t msg_qnum; /* number of messages in queue */
__kernel_ulong_t msg_qbytes; /* max number of bytes on queue */
diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h
index 89de6cd9f0a7..93030e97269a 100644
--- a/arch/x86/include/uapi/asm/sembuf.h
+++ b/arch/x86/include/uapi/asm/sembuf.h
@@ -21,9 +21,9 @@ struct semid64_ds {
unsigned long sem_ctime; /* last change time */
unsigned long sem_ctime_high;
#else
- __kernel_time_t sem_otime; /* last semop time */
+ __kernel_long_t sem_otime; /* last semop time */
__kernel_ulong_t __unused1;
- __kernel_time_t sem_ctime; /* last change time */
+ __kernel_long_t sem_ctime; /* last change time */
__kernel_ulong_t __unused2;
#endif
__kernel_ulong_t sem_nsems; /* no. of semaphores in array */
diff --git a/arch/x86/include/uapi/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h
index 644421f3823b..f0305dc660c9 100644
--- a/arch/x86/include/uapi/asm/shmbuf.h
+++ b/arch/x86/include/uapi/asm/shmbuf.h
@@ -16,9 +16,9 @@
struct shmid64_ds {
struct ipc64_perm shm_perm; /* operation perms */
size_t shm_segsz; /* size of segment (bytes) */
- __kernel_time_t shm_atime; /* last attach time */
- __kernel_time_t shm_dtime; /* last detach time */
- __kernel_time_t shm_ctime; /* last change time */
+ __kernel_long_t shm_atime; /* last attach time */
+ __kernel_long_t shm_dtime; /* last detach time */
+ __kernel_long_t shm_ctime; /* last change time */
__kernel_pid_t shm_cpid; /* pid of creator */
__kernel_pid_t shm_lpid; /* pid of last operator */
__kernel_ulong_t shm_nattch; /* no. of current attaches */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 32acb970f416..6175e370ee4a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,7 +100,9 @@ obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
obj-y += kprobes/
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
+ifeq ($(CONFIG_X86_32),y)
+obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
+endif
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_VM86) += vm86_32.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index baa2fed8deb6..2e4d90294fe6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -24,6 +24,7 @@
#include <asm/stackprotector.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
+#include <asm/doublefault.h>
#include <asm/archrandom.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
@@ -1814,8 +1815,6 @@ static inline void tss_setup_ist(struct tss_struct *tss)
tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
}
-static inline void gdt_setup_doublefault_tss(int cpu) { }
-
#else /* CONFIG_X86_64 */
static inline void setup_getcpu(int cpu) { }
@@ -1827,13 +1826,6 @@ static inline void ucode_cpu_init(int cpu)
static inline void tss_setup_ist(struct tss_struct *tss) { }
-static inline void gdt_setup_doublefault_tss(int cpu)
-{
-#ifdef CONFIG_DOUBLEFAULT
- /* Set up the doublefault TSS pointer in the GDT */
- __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
-#endif
-}
#endif /* !CONFIG_X86_64 */
static inline void tss_setup_io_bitmap(struct tss_struct *tss)
@@ -1923,7 +1915,7 @@ void cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
- gdt_setup_doublefault_tss(cpu);
+ doublefault_init_cpu_tss();
fpu__init_cpu();
diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c
index d01e0da0163a..b38010b541d6 100644
--- a/arch/x86/kernel/cpu/mce/therm_throt.c
+++ b/arch/x86/kernel/cpu/mce/therm_throt.c
@@ -195,17 +195,24 @@ static const struct attribute_group thermal_attr_group = {
#define THERM_THROT_POLL_INTERVAL HZ
#define THERM_STATUS_PROCHOT_LOG BIT(1)
+#define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
+#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
+
static void clear_therm_status_log(int level)
{
int msr;
- u64 msr_val;
+ u64 mask, msr_val;
- if (level == CORE_LEVEL)
- msr = MSR_IA32_THERM_STATUS;
- else
- msr = MSR_IA32_PACKAGE_THERM_STATUS;
+ if (level == CORE_LEVEL) {
+ msr = MSR_IA32_THERM_STATUS;
+ mask = THERM_STATUS_CLEAR_CORE_MASK;
+ } else {
+ msr = MSR_IA32_PACKAGE_THERM_STATUS;
+ mask = THERM_STATUS_CLEAR_PKG_MASK;
+ }
rdmsrl(msr, msr_val);
+ msr_val &= mask;
wrmsrl(msr, msr_val & ~THERM_STATUS_PROCHOT_LOG);
}
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
deleted file mode 100644
index 0d6c657593f8..000000000000
--- a/arch/x86/kernel/doublefault.c
+++ /dev/null
@@ -1,86 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/sched/debug.h>
-#include <linux/init_task.h>
-#include <linux/fs.h>
-
-#include <linux/uaccess.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/desc.h>
-
-#ifdef CONFIG_X86_32
-
-#define DOUBLEFAULT_STACKSIZE (1024)
-static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
-#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
-
-#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
-
-static void doublefault_fn(void)
-{
- struct desc_ptr gdt_desc = {0, 0};
- unsigned long gdt, tss;
-
- native_store_gdt(&gdt_desc);
- gdt = gdt_desc.address;
-
- printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
-
- if (ptr_ok(gdt)) {
- gdt += GDT_ENTRY_TSS << 3;
- tss = get_desc_base((struct desc_struct *)gdt);
- printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
-
- if (ptr_ok(tss)) {
- struct x86_hw_tss *t = (struct x86_hw_tss *)tss;
-
- printk(KERN_EMERG "eip = %08lx, esp = %08lx\n",
- t->ip, t->sp);
-
- printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
- t->ax, t->bx, t->cx, t->dx);
- printk(KERN_EMERG "esi = %08lx, edi = %08lx\n",
- t->si, t->di);
- }
- }
-
- for (;;)
- cpu_relax();
-}
-
-struct x86_hw_tss doublefault_tss __cacheline_aligned = {
- .sp0 = STACK_START,
- .ss0 = __KERNEL_DS,
- .ldt = 0,
- .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
-
- .ip = (unsigned long) doublefault_fn,
- /* 0x2 bit is always set */
- .flags = X86_EFLAGS_SF | 0x2,
- .sp = STACK_START,
- .es = __USER_DS,
- .cs = __KERNEL_CS,
- .ss = __KERNEL_DS,
- .ds = __USER_DS,
- .fs = __KERNEL_PERCPU,
-#ifndef CONFIG_X86_32_LAZY_GS
- .gs = __KERNEL_STACK_CANARY,
-#endif
-
- .__cr3 = __pa_nodebug(swapper_pg_dir),
-};
-
-/* dummy for do_double_fault() call */
-void df_debug(struct pt_regs *regs, long error_code) {}
-
-#else /* !CONFIG_X86_32 */
-
-void df_debug(struct pt_regs *regs, long error_code)
-{
- pr_emerg("PANIC: double fault, error_code: 0x%lx\n", error_code);
- show_regs(regs);
- panic("Machine halted.");
-}
-#endif
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
new file mode 100644
index 000000000000..3793646f0fb5
--- /dev/null
+++ b/arch/x86/kernel/doublefault_32.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+
+#include <linux/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/processor.h>
+#include <asm/desc.h>
+#include <asm/traps.h>
+
+extern void double_fault(void);
+#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
+
+#define TSS(x) this_cpu_read(cpu_tss_rw.x86_tss.x)
+
+static void set_df_gdt_entry(unsigned int cpu);
+
+/*
+ * Called by double_fault with CR0.TS and EFLAGS.NT cleared. The CPU thinks
+ * we're running the doublefault task. Cannot return.
+ */
+asmlinkage notrace void __noreturn doublefault_shim(void)
+{
+ unsigned long cr2;
+ struct pt_regs regs;
+
+ BUILD_BUG_ON(sizeof(struct doublefault_stack) != PAGE_SIZE);
+
+ cr2 = native_read_cr2();
+
+ /* Reset back to the normal kernel task. */
+ force_reload_TR();
+ set_df_gdt_entry(smp_processor_id());
+
+ trace_hardirqs_off();
+
+ /*
+ * Fill in pt_regs. A downside of doing this in C is that the unwinder
+ * won't see it (no ENCODE_FRAME_POINTER), so a nested stack dump
+ * won't successfully unwind to the source of the double fault.
+ * The main dump from do_double_fault() is fine, though, since it
+ * uses these regs directly.
+ *
+ * If anyone ever cares, this could be moved to asm.
+ */
+ regs.ss = TSS(ss);
+ regs.__ssh = 0;
+ regs.sp = TSS(sp);
+ regs.flags = TSS(flags);
+ regs.cs = TSS(cs);
+ /* We won't go through the entry asm, so we can leave __csh as 0. */
+ regs.__csh = 0;
+ regs.ip = TSS(ip);
+ regs.orig_ax = 0;
+ regs.gs = TSS(gs);
+ regs.__gsh = 0;
+ regs.fs = TSS(fs);
+ regs.__fsh = 0;
+ regs.es = TSS(es);
+ regs.__esh = 0;
+ regs.ds = TSS(ds);
+ regs.__dsh = 0;
+ regs.ax = TSS(ax);
+ regs.bp = TSS(bp);
+ regs.di = TSS(di);
+ regs.si = TSS(si);
+ regs.dx = TSS(dx);
+ regs.cx = TSS(cx);
+ regs.bx = TSS(bx);
+
+ do_double_fault(&regs, 0, cr2);
+
+ /*
+ * x86_32 does not save the original CR3 anywhere on a task switch.
+ * This means that, even if we wanted to return, we would need to find
+ * some way to reconstruct CR3. We could make a credible guess based
+ * on cpu_tlbstate, but that would be racy and would not account for
+ * PTI.
+ *
+ * Instead, don't bother. We can return through
+ * rewind_stack_do_exit() instead.
+ */
+ panic("cannot return from double fault\n");
+}
+NOKPROBE_SYMBOL(doublefault_shim);
+
+DEFINE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack) = {
+ .tss = {
+ /*
+ * No sp0 or ss0 -- we never run CPL != 0 with this TSS
+ * active. sp is filled in later.
+ */
+ .ldt = 0,
+ .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
+
+ .ip = (unsigned long) double_fault,
+ .flags = X86_EFLAGS_FIXED,
+ .es = __USER_DS,
+ .cs = __KERNEL_CS,
+ .ss = __KERNEL_DS,
+ .ds = __USER_DS,
+ .fs = __KERNEL_PERCPU,
+#ifndef CONFIG_X86_32_LAZY_GS
+ .gs = __KERNEL_STACK_CANARY,
+#endif
+
+ .__cr3 = __pa_nodebug(swapper_pg_dir),
+ },
+};
+
+static void set_df_gdt_entry(unsigned int cpu)
+{
+ /* Set up doublefault TSS pointer in the GDT */
+ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS,
+ &get_cpu_entry_area(cpu)->doublefault_stack.tss);
+
+}
+
+void doublefault_init_cpu_tss(void)
+{
+ unsigned int cpu = smp_processor_id();
+ struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
+
+ /*
+ * The linker isn't smart enough to initialize percpu variables that
+ * point to other places in percpu space.
+ */
+ this_cpu_write(doublefault_stack.tss.sp,
+ (unsigned long)&cea->doublefault_stack.stack +
+ sizeof(doublefault_stack.stack));
+
+ set_df_gdt_entry(cpu);
+}
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 64a59d726639..8e3a8fedfa4d 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -29,6 +29,9 @@ const char *stack_type_name(enum stack_type type)
if (type == STACK_TYPE_ENTRY)
return