summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-26 11:12:02 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-26 11:12:02 -0800
commitab851d49f6bfc781edd8bd44c72ec1e49211670b (patch)
tree19e3647f2af8d287cbc830107d65855344f4d9c1 /arch/x86
parent1d87200446f1d10dfe9672ca8edb027a82612f8c (diff)
parente3cb0c7102f04c83bf1a7cb1d052e92749310b46 (diff)
Merge branch 'x86-iopl-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 iopl updates from Ingo Molnar: "This implements a nice simplification of the iopl and ioperm code that Thomas Gleixner discovered: we can implement the IO privilege features of the iopl system call by using the IO permission bitmap in permissive mode, while trapping CLI/STI/POPF/PUSHF uses in user-space if they change the interrupt flag. This implements that feature, with testing facilities and related cleanups" [ "Simplification" may be an over-statement. The main goal is to avoid the cli/sti of iopl by effectively implementing the IO port access parts of iopl in terms of ioperm. This may end up not workign well in case people actually depend on cli/sti being available, or if there are mixed uses of iopl and ioperm. We will see.. - Linus ] * 'x86-iopl-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (22 commits) x86/ioperm: Fix use of deprecated config option x86/entry/32: Clarify register saving in __switch_to_asm() selftests/x86/iopl: Extend test to cover IOPL emulation x86/ioperm: Extend IOPL config to control ioperm() as well x86/iopl: Remove legacy IOPL option x86/iopl: Restrict iopl() permission scope x86/iopl: Fixup misleading comment selftests/x86/ioperm: Extend testing so the shared bitmap is exercised x86/ioperm: Share I/O bitmap if identical x86/ioperm: Remove bitmap if all permissions dropped x86/ioperm: Move TSS bitmap update to exit to user work x86/ioperm: Add bitmap sequence number x86/ioperm: Move iobitmap data into a struct x86/tss: Move I/O bitmap data into a seperate struct x86/io: Speedup schedule out of I/O bitmap user x86/ioperm: Avoid bitmap allocation if no permissions are set x86/ioperm: Simplify first ioperm() invocation logic x86/iopl: Cleanup include maze x86/tss: Fix and move VMX BUILD_BUG_ON() x86/cpu: Unify cpu_init() ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig18
-rw-r--r--arch/x86/entry/common.c4
-rw-r--r--arch/x86/entry/entry_32.S8
-rw-r--r--arch/x86/include/asm/io_bitmap.h29
-rw-r--r--arch/x86/include/asm/paravirt.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h2
-rw-r--r--arch/x86/include/asm/processor.h113
-rw-r--r--arch/x86/include/asm/ptrace.h6
-rw-r--r--arch/x86/include/asm/switch_to.h10
-rw-r--r--arch/x86/include/asm/thread_info.h14
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h2
-rw-r--r--arch/x86/kernel/cpu/common.c188
-rw-r--r--arch/x86/kernel/doublefault.c2
-rw-r--r--arch/x86/kernel/ioport.c209
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/process.c205
-rw-r--r--arch/x86/kernel/process_32.c77
-rw-r--r--arch/x86/kernel/process_64.c86
-rw-r--r--arch/x86/kernel/ptrace.c12
-rw-r--r--arch/x86/kvm/vmx/vmx.c8
-rw-r--r--arch/x86/mm/cpu_entry_area.c8
-rw-r--r--arch/x86/xen/enlighten_pv.c10
23 files changed, 553 insertions, 466 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b89eb1f0c0d2..d936174f9d49 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1224,6 +1224,24 @@ config X86_VSYSCALL_EMULATION
Disabling this option saves about 7K of kernel size and
possibly 4K of additional runtime pagetable memory.
+config X86_IOPL_IOPERM
+ bool "IOPERM and IOPL Emulation"
+ default y
+ ---help---
+ This enables the ioperm() and iopl() syscalls which are necessary
+ for legacy applications.
+
+ Legacy IOPL support is an overbroad mechanism which allows user
+ space aside of accessing all 65536 I/O ports also to disable
+ interrupts. To gain this access the caller needs CAP_SYS_RAWIO
+ capabilities and permission from potentially active security
+ modules.
+
+ The emulation restricts the functionality of the syscall to
+ only allowing the full range I/O port access, but prevents the
+ ability to disable interrupts from user space which would be
+ granted if the hardware IOPL mechanism would be used.
+
config TOSHIBA
tristate "Toshiba Laptop support"
depends on X86_32
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 3f8e22615812..9747876980b5 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -33,6 +33,7 @@
#include <asm/cpufeature.h>
#include <asm/fpu/api.h>
#include <asm/nospec-branch.h>
+#include <asm/io_bitmap.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -196,6 +197,9 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
/* Reload ti->flags; we may have rescheduled above. */
cached_flags = READ_ONCE(ti->flags);
+ if (unlikely(cached_flags & _TIF_IO_BITMAP))
+ tss_update_io_bitmap();
+
fpregs_assert_state_consistent();
if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD))
switch_fpu_return();
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 09fe5606a118..5832b11f01bb 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -739,6 +739,11 @@ SYM_CODE_START(__switch_to_asm)
pushl %ebx
pushl %edi
pushl %esi
+ /*
+ * Flags are saved to prevent AC leakage. This could go
+ * away if objtool would have 32bit support to verify
+ * the STAC/CLAC correctness.
+ */
pushfl
/* switch stack */
@@ -761,8 +766,9 @@ SYM_CODE_START(__switch_to_asm)
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
- /* restore callee-saved registers */
+ /* Restore flags or the incoming task to restore AC state. */
popfl
+ /* restore callee-saved registers */
popl %esi
popl %edi
popl %ebx
diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
new file mode 100644
index 000000000000..02c6ef8f7667
--- /dev/null
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IOBITMAP_H
+#define _ASM_X86_IOBITMAP_H
+
+#include <linux/refcount.h>
+#include <asm/processor.h>
+
+struct io_bitmap {
+ u64 sequence;
+ refcount_t refcnt;
+ /* The maximum number of bytes to copy so all zero bits are covered */
+ unsigned int max;
+ unsigned long bitmap[IO_BITMAP_LONGS];
+};
+
+struct task_struct;
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+void io_bitmap_share(struct task_struct *tsk);
+void io_bitmap_exit(void);
+
+void tss_update_io_bitmap(void);
+#else
+static inline void io_bitmap_share(struct task_struct *tsk) { }
+static inline void io_bitmap_exit(void) { }
+static inline void tss_update_io_bitmap(void) { }
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 69089d46f128..86e7317eb31f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -294,10 +294,6 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
{
PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
}
-static inline void set_iopl_mask(unsigned mask)
-{
- PVOP_VCALL1(cpu.set_iopl_mask, mask);
-}
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 70b654f3ffe5..84812964d3dd 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -140,8 +140,6 @@ struct pv_cpu_ops {
void (*load_sp0)(unsigned long sp0);
- void (*set_iopl_mask)(unsigned mask);
-
void (*wbinvd)(void);
/* cpuid emulation, mostly so that caps bits can be disabled */
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index 1636eb8e5a5b..19f5807260c3 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -44,7 +44,7 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
* Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
* to avoid include recursion hell
*/
-#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 39)
+#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 41)
/* The +1 is for the readonly IDT page: */
#define CPU_ENTRY_AREA_BASE \
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 94227da69da1..b4e29d8b9e5a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -7,6 +7,7 @@
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
+struct io_bitmap;
struct vm86;
#include <asm/math_emu.h>
@@ -336,10 +337,32 @@ struct x86_hw_tss {
* IO-bitmap sizes:
*/
#define IO_BITMAP_BITS 65536
-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
-#define INVALID_IO_BITMAP_OFFSET 0x8000
+#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE)
+#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long))
+
+#define IO_BITMAP_OFFSET_VALID_MAP \
+ (offsetof(struct tss_struct, io_bitmap.bitmap) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#define IO_BITMAP_OFFSET_VALID_ALL \
+ (offsetof(struct tss_struct, io_bitmap.mapall) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+/*
+ * sizeof(unsigned long) coming from an extra "long" at the end of the
+ * iobitmap. The limit is inclusive, i.e. the last valid byte.
+ */
+# define __KERNEL_TSS_LIMIT \
+ (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \
+ sizeof(unsigned long) - 1)
+#else
+# define __KERNEL_TSS_LIMIT \
+ (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1)
+#endif
+
+/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
+#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
struct entry_stack {
unsigned long words[64];
@@ -349,13 +372,21 @@ struct entry_stack_page {
struct entry_stack stack;
} __aligned(PAGE_SIZE);
-struct tss_struct {
+/*
+ * All IO bitmap related data stored in the TSS:
+ */
+struct x86_io_bitmap {
+ /* The sequence number of the last active bitmap. */
+ u64 prev_sequence;
+
/*
- * The fixed hardware portion. This must not cross a page boundary
- * at risk of violating the SDM's advice and potentially triggering
- * errata.
+ * Store the dirty size of the last io bitmap offender. The next
+ * one will have to do the cleanup as the switch out to a non io
+ * bitmap user will just set x86_tss.io_bitmap_base to a value
+ * outside of the TSS limit. So for sane tasks there is no need to
+ * actually touch the io_bitmap at all.
*/
- struct x86_hw_tss x86_tss;
+ unsigned int prev_max;
/*
* The extra 1 is there because the CPU will access an
@@ -363,21 +394,30 @@ struct tss_struct {
* bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
- unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+ unsigned long bitmap[IO_BITMAP_LONGS + 1];
+
+ /*
+ * Special I/O bitmap to emulate IOPL(3). All bytes zero,
+ * except the additional byte at the end.
+ */
+ unsigned long mapall[IO_BITMAP_LONGS + 1];
+};
+
+struct tss_struct {
+ /*
+ * The fixed hardware portion. This must not cross a page boundary
+ * at risk of violating the SDM's advice and potentially triggering
+ * errata.
+ */
+ struct x86_hw_tss x86_tss;
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+ struct x86_io_bitmap io_bitmap;
+#endif
} __aligned(PAGE_SIZE);
DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
-/*
- * sizeof(unsigned long) coming from an extra "long" at the end
- * of the iobitmap.
- *
- * -1? seg base+limit should be pointing to the address of the
- * last valid byte
- */
-#define __KERNEL_TSS_LIMIT \
- (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
-
/* Per CPU interrupt stacks */
struct irq_stack {
char stack[IRQ_STACK_SIZE];
@@ -488,10 +528,14 @@ struct thread_struct {
struct vm86 *vm86;
#endif
/* IO permissions: */
- unsigned long *io_bitmap_ptr;
- unsigned long iopl;
- /* Max allowed port in the bitmap, in bytes: */
- unsigned io_bitmap_max;
+ struct io_bitmap *io_bitmap;
+
+ /*
+ * IOPL. Priviledge level dependent I/O permission which is
+ * emulated via the I/O bitmap to prevent user space from disabling
+ * interrupts.
+ */
+ unsigned long iopl_emul;
mm_segment_t addr_limit;
@@ -523,25 +567,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
*/
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void native_set_iopl_mask(unsigned mask)
-{
-#ifdef CONFIG_X86_32
- unsigned int reg;
-
- asm volatile ("pushfl;"
- "popl %0;"
- "andl %1, %0;"
- "orl %2, %0;"
- "pushl %0;"
- "popfl"
- : "=&r" (reg)
- : "i" (~X86_EFLAGS_IOPL), "r" (mask));
-#endif
-}
-
static inline void
native_load_sp0(unsigned long sp0)
{
@@ -581,7 +606,6 @@ static inline void load_sp0(unsigned long sp0)
native_load_sp0(sp0);
}
-#define set_iopl_mask native_set_iopl_mask
#endif /* CONFIG_PARAVIRT_XXL */
/* Free all resources held by a thread. */
@@ -849,7 +873,6 @@ static inline void spin_lock_prefetch(const void *x)
#define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \
.sysenter_cs = __KERNEL_CS, \
- .io_bitmap_ptr = NULL, \
.addr_limit = KERNEL_DS, \
}
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 332eb3525867..5057a8ed100b 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -361,5 +361,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
+#ifdef CONFIG_X86_64
+# define do_set_thread_area_64(p, s, t) do_arch_prctl_64(p, s, t)
+#else
+# define do_set_thread_area_64(p, s, t) (0)
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 18a4b6890fa8..0e059b73437b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -103,7 +103,17 @@ static inline void update_task_stack(struct task_struct *task)
if (static_cpu_has(X86_FEATURE_XENPV))
load_sp0(task_top_of_stack(task));
#endif
+}
+static inline void kthread_frame_init(struct inactive_task_frame *frame,
+ unsigned long fun, unsigned long arg)
+{
+ frame->bx = fun;
+#ifdef CONFIG_X86_32
+ frame->di = arg;
+#else
+ frame->r12 = arg;
+#endif
}
#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index f9453536f9bb..d779366ce3f8 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -143,8 +143,8 @@ struct thread_info {
_TIF_NOHZ)
/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW_BASE \
- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \
+#define _TIF_WORK_CTXSW_BASE \
+ (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
/*
@@ -156,8 +156,14 @@ struct thread_info {
# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE)
#endif
-#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
+#ifdef CONFIG_X86_IOPL_IOPERM
+# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \
+ _TIF_IO_BITMAP)
+#else
+# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY)
+#endif
+
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
#define STACK_WARN (THREAD_SIZE/8)
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 42e1245af0d8..ff4b52e37e60 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -62,6 +62,4 @@ void xen_arch_register_cpu(int num);
void xen_arch_unregister_cpu(int num);
#endif
-extern void xen_set_iopl_mask(unsigned mask);
-
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3103f2bfed85..baa2fed8deb6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -53,10 +53,7 @@
#include <asm/microcode_intel.h>
#include <asm/intel-family.h>
#include <asm/cpu_device_id.h>
-
-#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
-#endif
#include "cpu.h"
@@ -1781,7 +1778,7 @@ static void wait_for_master_cpu(int cpu)
}
#ifdef CONFIG_X86_64
-static void setup_getcpu(int cpu)
+static inline void setup_getcpu(int cpu)
{
unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
struct desc_struct d = { };
@@ -1801,7 +1798,59 @@ static void setup_getcpu(int cpu)
write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S);
}
+
+static inline void ucode_cpu_init(int cpu)
+{
+ if (cpu)
+ load_ucode_ap();
+}
+
+static inline void tss_setup_ist(struct tss_struct *tss)
+{
+ /* Set up the per-CPU TSS IST stacks */
+ tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
+ tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
+ tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
+ tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
+}
+
+static inline void gdt_setup_doublefault_tss(int cpu) { }
+
+#else /* CONFIG_X86_64 */
+
+static inline void setup_getcpu(int cpu) { }
+
+static inline void ucode_cpu_init(int cpu)
+{
+ show_ucode_info_early();
+}
+
+static inline void tss_setup_ist(struct tss_struct *tss) { }
+
+static inline void gdt_setup_doublefault_tss(int cpu)
+{
+#ifdef CONFIG_DOUBLEFAULT
+ /* Set up the doublefault TSS pointer in the GDT */
+ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+#endif
+}
+#endif /* !CONFIG_X86_64 */
+
+static inline void tss_setup_io_bitmap(struct tss_struct *tss)
+{
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+ tss->io_bitmap.prev_max = 0;
+ tss->io_bitmap.prev_sequence = 0;
+ memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap));
+ /*
+ * Invalidate the extra array entry past the end of the all
+ * permission bitmap as required by the hardware.
+ */
+ tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL;
#endif
+}
/*
* cpu_init() initializes state that is per-CPU. Some data is already
@@ -1809,21 +1858,15 @@ static void setup_getcpu(int cpu)
* and IDT. We reload them nevertheless, this function acts as a
* 'CPU state barrier', nothing should get across.
*/
-#ifdef CONFIG_X86_64
-
void cpu_init(void)
{
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+ struct task_struct *cur = current;
int cpu = raw_smp_processor_id();
- struct task_struct *me;
- struct tss_struct *t;
- int i;
wait_for_master_cpu(cpu);
- if (cpu)
- load_ucode_ap();
-
- t = &per_cpu(cpu_tss_rw, cpu);
+ ucode_cpu_init(cpu);
#ifdef CONFIG_NUMA
if (this_cpu_read(numa_node) == 0 &&
@@ -1832,63 +1875,47 @@ void cpu_init(void)
#endif
setup_getcpu(cpu);
- me = current;
-
pr_debug("Initializing CPU#%d\n", cpu);
- cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) ||
+ boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE))
+ cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
/*
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
*/
-
switch_to_new_gdt(cpu);
- loadsegment(fs, 0);
-
load_current_idt();
- memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
- syscall_init();
-
- wrmsrl(MSR_FS_BASE, 0);
- wrmsrl(MSR_KERNEL_GS_BASE, 0);
- barrier();
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ loadsegment(fs, 0);
+ memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
+ syscall_init();
- x86_configure_nx();
- x2apic_setup();
+ wrmsrl(MSR_FS_BASE, 0);
+ wrmsrl(MSR_KERNEL_GS_BASE, 0);
+ barrier();
- /*
- * set up and load the per-CPU TSS
- */
- if (!t->x86_tss.ist[0]) {
- t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
- t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
- t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
- t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
+ x2apic_setup();
}
- t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
-
- /*
- * <= is required because the CPU will access up to
- * 8 bits beyond the end of the IO permission bitmap.
- */
- for (i = 0; i <= IO_BITMAP_LONGS; i++)
- t->io_bitmap[i] = ~0UL;
-
mmgrab(&init_mm);
- me->active_mm = &init_mm;
- BUG_ON(me->mm);
+ cur->active_mm = &init_mm;
+ BUG_ON(cur->mm);
initialize_tlbstate_and_flush();
- enter_lazy_tlb(&init_mm, me);
+ enter_lazy_tlb(&init_mm, cur);
- /*
- * Initialize the TSS. sp0 points to the entry trampoline stack
- * regardless of what task is running.
- */
+ /* Initialize the TSS. */
+ tss_setup_ist(tss);
+ tss_setup_io_bitmap(tss);
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+
load_TR_desc();
+ /*
+ * sp0 points to the entry trampoline stack regardless of what task
+ * is running.
+ */
load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
load_mm_ldt(&init_mm);
@@ -1896,6 +1923,8 @@ void cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
+ gdt_setup_doublefault_tss(cpu);
+
fpu__init_cpu();
if (is_uv_system())
@@ -1904,63 +1933,6 @@ void cpu_init(void)
load_fixmap_gdt(cpu);
}
-#else
-
-void cpu_init(void)
-{
- int cpu = smp_processor_id();
- struct task_struct *curr = current;
- struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
-
- wait_for_master_cpu(cpu);
-
- show_ucode_info_early();
-
- pr_info("Initializing CPU#%d\n", cpu);
-
- if (cpu_feature_enabled(X86_FEATURE_VME) ||
- boot_cpu_has(X86_FEATURE_TSC) ||
- boot_cpu_has(X86_FEATURE_DE))
- cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
-
- load_current_idt();
- switch_to_new_gdt(cpu);
-
- /*
- * Set up and load the per-CPU TSS and LDT
- */
- mmgrab(&init_mm);
- curr->active_mm = &init_mm;
- BUG_ON(curr->mm);
- initialize_tlbstate_and_flush();
- enter_lazy_tlb(&init_mm, curr);
-
- /*
- * Initialize the TSS. sp0 points to the entry trampoline stack
- * regardless of what task is running.
- */
- set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
- load_TR_desc();
- load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
-
- load_mm_ldt(&init_mm);
-
- t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
-
-#ifdef CONFIG_DOUBLEFAULT
- /* Set up doublefault TSS pointer in the GDT */
- __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
-#endif
-
- clear_all_debug_regs();
- dbg_restore_debug_regs();
-
- fpu__init_cpu();
-
- load_fixmap_gdt(cpu);
-}
-#endif
-
/*
* The microcode loader calls this upon late microcode load to recheck features,
* only when microcode has been updated. Caller holds microcode_mutex and CPU
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index d5c9b13bafdf..0d6c657593f8 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -54,7 +54,7 @@ struct x86_hw_tss doublefault_tss __cacheline_aligned = {
.sp0 = STACK_START,
.ss0 = __KERNEL_DS,
.ldt = 0,
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+ .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
.ip = (unsigned long) doublefault_fn,
/* 0x2 bit is always set */
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 61a89d3c0382..8abeee0dd7bf 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -3,32 +3,69 @@
* This contains the io-permission bitmap code - written by obz, with changes
* by Linus. 32/64 bits code unification by Miguel Botón.
*/
-
-#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
-#include <linux/kernel.h>
#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
#include <linux/security.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/thread_info.h>
#include <linux/syscalls.h>
#include <linux/bitmap.h>
-#include <asm/syscalls.h>
+#include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/io_bitmap.h>
#include <asm/desc.h>
+#ifdef CONFIG_X86_IOPL_IOPERM
+
+static atomic64_t io_bitmap_sequence;
+
+void io_bitmap_share(struct task_struct *tsk)
+{
+ /* Can be NULL when current->thread.iopl_emul == 3 */
+ if (current->thread.io_bitmap) {
+ /*
+ * Take a refcount on current's bitmap. It can be used by
+ * both tasks as long as none of them changes the bitmap.
+ */
+ refcount_inc(&current->thread.io_bitmap->refcnt);
+ tsk->thread.io_bitmap = current->thread.io_bitmap;
+ }
+ set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
+}
+
+static void task_update_io_bitmap(void)
+{
+ struct thread_struct *t = &current->thread;
+
+ if (t->iopl_emul == 3 || t->io_bitmap) {
+ /* TSS update is handled on exit to user space */
+ set_thread_flag(TIF_IO_BITMAP);
+ } else {
+ clear_thread_flag(TIF_IO_BITMAP);
+ /* Invalidate TSS */
+ preempt_disable();
+ tss_update_io_bitmap();
+ preempt_enable();
+ }
+}
+
+void io_bitmap_exit(void)
+{
+ struct io_bitmap *iobm = current->thread.io_bitmap;
+
+ current->thread.io_bitmap = NULL;
+ task_update_io_bitmap();
+ if (iobm && refcount_dec_and_test(&iobm->refcnt))
+ kfree(iobm);
+}
+
/*
- * this changes the io permissions bitmap in the current task.
+ * This changes the io permissions bitmap in the current task.
*/
long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
{
struct thread_struct *t = &current->thread;
- struct tss_struct *tss;
- unsigned int i, max_long, bytes, bytes_updated;
+ unsigned int i, max_long;
+ struct io_bitmap *iobm;
if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
return -EINVAL;
@@ -41,59 +78,72 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
* IO bitmap up. ioperm() is much less timing critical than clone(),
* this is why we delay this operation until now:
*/
- if (!t->io_bitmap_ptr) {
- unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
-
- if (!bitmap)
+ iobm = t->io_bitmap;
+ if (!iobm) {
+ /* No point to allocate a bitmap just to clear permissions */
+ if (!turn_on)
+ return 0;
+ iobm = kmalloc(sizeof(*iobm), GFP_KERNEL);
+ if (!iobm)
return -ENOMEM;
- memset(bitmap, 0xff, IO_BITMAP_BYTES);
- t->io_bitmap_ptr = bitmap;
- set_thread_flag(TIF_IO_BITMAP);
+ memset(iobm->bitmap, 0xff, sizeof(iobm->bitmap));
+ refcount_set(&iobm->refcnt, 1);
+ }
- /*
- * Now that we have an IO bitmap, we need our TSS limit to be
- * correct. It's fine if we are preempted after doing this:
- * with TIF_IO_BITMAP set, context switches will keep our TSS
- * limit correct.
- */
- preempt_disable();
- refresh_tss_limit();
- preempt_enable();
+ /*
+ * If the bitmap is not shared, then nothing can take a refcount as
+ * current can obviously not fork at the same time. If it's shared
+ * duplicate it and drop the refcount on the original one.
+ */
+ if (refcount_read(&iobm->refcnt) > 1) {
+ iobm = kmemdup(iobm, sizeof(*iobm), GFP_KERNEL);
+ if (!iobm)
+ return -ENOMEM;
+ refcount_set(&iobm->refcnt, 1);
+ io_bitmap_exit();
}
/*
- * do it in the per-thread copy and in the TSS ...
- *
- * Disable preemption via get_cpu() - we must not switch away
- * because the ->io_bitmap_max value must match the bitmap
- * contents:
+ * Store the bitmap pointer (might be the same if the task already
+ * head one). Must be done here so freeing the bitmap when all
+ * permissions are dropped has the pointer set up.
*/
- tss = &per_cpu(cpu_tss_rw, get_cpu());
+ t->io_bitmap = iobm;
+ /* Mark it active for context switching and exit to user mode */
+ set_thread_flag(TIF_IO_BITMAP);
+ /*
+ * Update the tasks bitmap. The update of the TSS bitmap happens on
+ * exit to user mode. So this needs no protection.
+ */
if (turn_on)
- bitmap_clear(t->io_bitmap_ptr, from, num);
+ bitmap_clear(iobm->bitmap, from, num);
else
- bitmap_set(t->io_bitmap_ptr, from, num);
+ bitmap_set(iobm->bitmap, from, num);
/*
* Search for a (possibly new) maximum. This is simple and stupid,
* to keep it obviously correct:
*/
- max_long = 0;
- for (i = 0; i < IO_BITMAP_LONGS; i++)
- if (t->io_bitmap_ptr[i] != ~0UL)
+ max_long = UINT_MAX;
+ for (i = 0; i < IO_BITMAP_LONGS; i++) {
+ if (iobm->bitmap[i] != ~0UL)
max_long = i;
+ }
+ /* All permissions dropped? */
+ if (max_long == UINT_MAX) {
+ io_bitmap_exit();
+ return 0;
+ }
- bytes = (max_long + 1) * sizeof(unsigned long);
- bytes_updated = max(bytes, t->io_bitmap_max);
-
- t->io_bitmap_max = bytes;
-
- /* Update the TSS: */